mirror of
https://github.com/AAndyProgram/SCrawler.git
synced 2026-03-15 08:12:17 +00:00
2023.3.1.0
Add 'Path' plugin UserDataBase: changed file names for saved posts; removed 'Self' property; add 'MyFileSettings' field; added UserSiteName; changed download envir algo Twitter: added MD5 comparison; duplicate images removal option; UserSiteName parsing; download icon and banner Instagram: added a new option for token 'www_claim'; removed requirement of token 'www_claim'; UserSiteName parsing; download icon Reddit: UserSiteName parsing; download icon and banner PornHub: fixed unicode titles XHamster: added channels ffmpeg: fixed max input length error during files combining; fixed encoding issue Feed: added images centering; added BackColor and ForeColor change MainFrame: added BackColor, ForeColor, and BackgroungImage change; added 'UpdateLogButton' when load completed ListImagesLoader: fixed wrong notification when no users found SettingsCLS: updated users loading algo
This commit is contained in:
@@ -7,13 +7,16 @@
|
||||
' This program is distributed in the hope that it will be useful,
|
||||
' but WITHOUT ANY WARRANTY
|
||||
Imports System.Net
|
||||
Imports System.Drawing
|
||||
Imports System.Threading
|
||||
Imports SCrawler.API.Base
|
||||
Imports PersonalUtilities.Functions.XML
|
||||
Imports PersonalUtilities.Functions.RegularExpressions
|
||||
Imports PersonalUtilities.Tools.Web.Clients
|
||||
Imports PersonalUtilities.Tools.Web.Documents.JSON
|
||||
Imports PersonalUtilities.Tools.ImageRenderer
|
||||
Imports UStates = SCrawler.API.Base.UserMedia.States
|
||||
Imports UTypes = SCrawler.API.Base.UserMedia.Types
|
||||
Namespace API.Twitter
|
||||
Friend Class UserData : Inherits UserDataBase
|
||||
Private Const SinglePostUrl As String = "https://api.twitter.com/1.1/statuses/show.json?id={0}&tweet_mode=extended"
|
||||
@@ -21,12 +24,18 @@ Namespace API.Twitter
|
||||
Private Const Name_GifsDownload As String = "GifsDownload"
|
||||
Private Const Name_GifsSpecialFolder As String = "GifsSpecialFolder"
|
||||
Private Const Name_GifsPrefix As String = "GifsPrefix"
|
||||
Private Const Name_UseMD5Comparison As String = "UseMD5Comparison"
|
||||
Private Const Name_RemoveExistingDuplicates As String = "RemoveExistingDuplicates"
|
||||
Private Const Name_StartMD5Checked As String = "StartMD5Checked"
|
||||
#End Region
|
||||
#Region "Declarations"
|
||||
Friend Property GifsDownload As Boolean
|
||||
Friend Property GifsSpecialFolder As String
|
||||
Friend Property GifsPrefix As String
|
||||
Private ReadOnly _DataNames As List(Of String)
|
||||
Friend Property UseMD5Comparison As Boolean = False
|
||||
Private StartMD5Checked As Boolean = False
|
||||
Friend Property RemoveExistingDuplicates As Boolean = False
|
||||
#End Region
|
||||
#Region "Exchange options"
|
||||
Friend Overrides Function ExchangeOptionsGet() As Object
|
||||
@@ -38,6 +47,8 @@ Namespace API.Twitter
|
||||
GifsDownload = .GifsDownload
|
||||
GifsSpecialFolder = .GifsSpecialFolder
|
||||
GifsPrefix = .GifsPrefix
|
||||
UseMD5Comparison = .UseMD5Comparison
|
||||
RemoveExistingDuplicates = .RemoveExistingDuplicates
|
||||
End With
|
||||
End If
|
||||
End Sub
|
||||
@@ -55,10 +66,16 @@ Namespace API.Twitter
|
||||
Else
|
||||
GifsPrefix = Container.Value(Name_GifsPrefix)
|
||||
End If
|
||||
UseMD5Comparison = Container.Value(Name_UseMD5Comparison).FromXML(Of Boolean)(False)
|
||||
RemoveExistingDuplicates = Container.Value(Name_RemoveExistingDuplicates).FromXML(Of Boolean)(False)
|
||||
StartMD5Checked = Container.Value(Name_StartMD5Checked).FromXML(Of Boolean)(False)
|
||||
Else
|
||||
Container.Add(Name_GifsDownload, GifsDownload.BoolToInteger)
|
||||
Container.Add(Name_GifsSpecialFolder, GifsSpecialFolder)
|
||||
Container.Add(Name_GifsPrefix, GifsPrefix)
|
||||
Container.Add(Name_UseMD5Comparison, UseMD5Comparison.BoolToInteger)
|
||||
Container.Add(Name_RemoveExistingDuplicates, RemoveExistingDuplicates.BoolToInteger)
|
||||
Container.Add(Name_StartMD5Checked, StartMD5Checked.BoolToInteger)
|
||||
End If
|
||||
End Sub
|
||||
#End Region
|
||||
@@ -70,6 +87,7 @@ Namespace API.Twitter
|
||||
Else
|
||||
If _ContentList.Count > 0 Then _DataNames.ListAddList(_ContentList.Select(Function(c) c.File.File), LAP.ClearBeforeAdd, LAP.NotContainsOnly)
|
||||
DownloadData(String.Empty, Token)
|
||||
If UseMD5Comparison Then ValidateMD5(Token)
|
||||
End If
|
||||
End Sub
|
||||
Private Overloads Sub DownloadData(ByVal POST As String, ByVal Token As CancellationToken)
|
||||
@@ -100,6 +118,30 @@ Namespace API.Twitter
|
||||
If Not r.IsEmptyString Then
|
||||
Using w As EContainer = JsonDocument.Parse(r)
|
||||
If w.ListExists Then
|
||||
|
||||
If Not IsSavedPosts And POST.IsEmptyString And Not w.ItemF({0, "user"}) Is Nothing Then
|
||||
With w.ItemF({0, "user"})
|
||||
If .Value("screen_name").StringToLower = Name Then
|
||||
UserSiteNameUpdate(.Value("name"))
|
||||
UserDescriptionUpdate(.Value("description"))
|
||||
Dim __getImage As Action(Of String) = Sub(ByVal img As String)
|
||||
If Not img.IsEmptyString Then
|
||||
Dim __imgFile As SFile = UrlFile(img, True)
|
||||
If Not __imgFile.Name.IsEmptyString Then
|
||||
If __imgFile.Extension.IsEmptyString Then __imgFile.Extension = "jpg"
|
||||
__imgFile.Path = MyFile.CutPath.Path
|
||||
If Not __imgFile.Exists Then GetWebFile(img, __imgFile, EDP.None)
|
||||
End If
|
||||
End If
|
||||
End Sub
|
||||
Dim icon$ = .Value("profile_image_url_https")
|
||||
If Not icon.IsEmptyString Then icon = icon.Replace("_normal", String.Empty)
|
||||
__getImage.Invoke(.Value("profile_banner_url"))
|
||||
__getImage.Invoke(icon)
|
||||
End If
|
||||
End With
|
||||
End If
|
||||
|
||||
For Each nn In If(IsSavedPosts, w({"globalObjects", "tweets"}).XmlIfNothing, w)
|
||||
ThrowAny(Token)
|
||||
If nn.Count > 0 Then
|
||||
@@ -114,9 +156,6 @@ Namespace API.Twitter
|
||||
End If
|
||||
End If
|
||||
|
||||
If Not IsSavedPosts AndAlso UserDescriptionNeedToUpdate() AndAlso nn.Value({"user"}, "screen_name") = Name Then _
|
||||
UserDescriptionUpdate(nn.Value({"user"}, "description"))
|
||||
|
||||
'Date Pattern:
|
||||
'Sat Jan 01 01:10:15 +0000 2000
|
||||
If nn.Contains("created_at") Then PostDate = nn("created_at").Value Else PostDate = String.Empty
|
||||
@@ -174,7 +213,7 @@ Namespace API.Twitter
|
||||
If Not dName.IsEmptyString AndAlso Not _DataNames.Contains(dName) Then
|
||||
_DataNames.Add(dName)
|
||||
_TempMediaList.ListAddValue(MediaFromData(m("media_url").Value,
|
||||
PostID, PostDate, GetPictureOption(m), State), LNC)
|
||||
PostID, PostDate, GetPictureOption(m), State, UTypes.Picture), LNC)
|
||||
End If
|
||||
End If
|
||||
Next
|
||||
@@ -190,7 +229,7 @@ Namespace API.Twitter
|
||||
Dim f$ = UrlFile(URL)
|
||||
If Not f.IsEmptyString AndAlso Not _DataNames.Contains(f) Then
|
||||
_DataNames.Add(f)
|
||||
_TempMediaList.ListAddValue(MediaFromData(URL, PostID, PostDate,, State), LNC)
|
||||
_TempMediaList.ListAddValue(MediaFromData(URL, PostID, PostDate,, State, UTypes.Video), LNC)
|
||||
End If
|
||||
Return True
|
||||
End If
|
||||
@@ -218,7 +257,7 @@ Namespace API.Twitter
|
||||
ff = UrlFile(url)
|
||||
If Not ff.IsEmptyString Then
|
||||
If GifsDownload And Not _DataNames.Contains(ff) Then
|
||||
m = MediaFromData(url, PostID, PostDate,, State)
|
||||
m = MediaFromData(url, PostID, PostDate,, State, UTypes.Video)
|
||||
f = m.File
|
||||
If Not f.IsEmptyString And Not GifsPrefix.IsEmptyString Then f.Name = $"{GifsPrefix}{f.Name}" : m.File = f
|
||||
If Not GifsSpecialFolder.IsEmptyString Then m.SpecialFolder = $"{GifsSpecialFolder}*"
|
||||
@@ -298,6 +337,131 @@ Namespace API.Twitter
|
||||
End Try
|
||||
End Sub
|
||||
#End Region
|
||||
#Region "MD5 support"
|
||||
Private Const VALIDATE_MD5_ERROR As String = "VALIDATE_MD5_ERROR"
|
||||
Private Sub ValidateMD5(ByVal Token As CancellationToken)
|
||||
Try
|
||||
Dim missingMD5 As Predicate(Of UserMedia) = Function(d) (d.Type = UTypes.GIF Or d.Type = UTypes.Picture) And d.MD5.IsEmptyString
|
||||
If UseMD5Comparison And _TempMediaList.Exists(missingMD5) Then
|
||||
Dim i%
|
||||
Dim data As UserMedia = Nothing
|
||||
Dim hashList As New Dictionary(Of String, SFile)
|
||||
Dim f As SFile
|
||||
Dim ErrMD5 As New ErrorsDescriber(EDP.ReturnValue)
|
||||
Dim __getMD5 As Func(Of UserMedia, Boolean, String) =
|
||||
Function(ByVal __data As UserMedia, ByVal IsUrl As Boolean) As String
|
||||
Try
|
||||
Dim ImgFormat As Imaging.ImageFormat = Nothing
|
||||
Dim hash$ = String.Empty
|
||||
Dim __isGif As Boolean = False
|
||||
If __data.Type = UTypes.GIF Then
|
||||
ImgFormat = Imaging.ImageFormat.Gif
|
||||
__isGif = True
|
||||
ElseIf Not __data.File.IsEmptyString Then
|
||||
ImgFormat = GetImageFormat(__data.File)
|
||||
End If
|
||||
If ImgFormat Is Nothing Then ImgFormat = Imaging.ImageFormat.Jpeg
|
||||
If IsUrl Then
|
||||
hash = ByteArrayToString(GetMD5(SFile.GetBytesFromNet(__data.URL_BASE.IfNullOrEmpty(__data.URL), ErrMD5), ImgFormat, ErrMD5))
|
||||
Else
|
||||
hash = ByteArrayToString(GetMD5(SFile.GetBytes(__data.File, ErrMD5), ImgFormat, ErrMD5))
|
||||
End If
|
||||
If hash.IsEmptyString And Not __isGif Then
|
||||
If ImgFormat Is Imaging.ImageFormat.Jpeg Then ImgFormat = Imaging.ImageFormat.Png Else ImgFormat = Imaging.ImageFormat.Jpeg
|
||||
If IsUrl Then
|
||||
hash = ByteArrayToString(GetMD5(SFile.GetBytesFromNet(__data.URL_BASE.IfNullOrEmpty(__data.URL), ErrMD5), ImgFormat, ErrMD5))
|
||||
Else
|
||||
hash = ByteArrayToString(GetMD5(SFile.GetBytes(__data.File, ErrMD5), ImgFormat, ErrMD5))
|
||||
End If
|
||||
End If
|
||||
Return hash
|
||||
Catch
|
||||
Return String.Empty
|
||||
End Try
|
||||
End Function
|
||||
If Not StartMD5Checked Then
|
||||
StartMD5Checked = True
|
||||
If _ContentList.Exists(missingMD5) Then
|
||||
Dim existingFiles As List(Of SFile) = SFile.GetFiles(MyFileSettings.CutPath, "*.jpg|*.jpeg|*.png|*.gif",, EDP.ReturnValue).ListIfNothing
|
||||
Dim eIndx%
|
||||
Dim eFinder As Predicate(Of SFile) = Function(ff) ff.File = data.File.File
|
||||
If RemoveExistingDuplicates Then
|
||||
RemoveExistingDuplicates = False
|
||||
_ForceSaveUserInfo = True
|
||||
If existingFiles.Count > 0 Then
|
||||
Dim h$
|
||||
For i = existingFiles.Count - 1 To 0 Step -1
|
||||
h = __getMD5(New UserMedia With {.File = existingFiles(i)}, False)
|
||||
If Not h.IsEmptyString Then
|
||||
If hashList.ContainsKey(h) Then
|
||||
MyMainLOG = $"[{ToStringForLog()}]: Removed image [{existingFiles(i).File}] (duplicate of [{hashList(h).File}])"
|
||||
existingFiles(i).Delete(SFO.File, SFODelete.DeleteToRecycleBin, ErrMD5)
|
||||
existingFiles.RemoveAt(i)
|
||||
Else
|
||||
hashList.Add(h, existingFiles(i))
|
||||
End If
|
||||
End If
|
||||
Next
|
||||
End If
|
||||
End If
|
||||
For i = 0 To _ContentList.Count - 1
|
||||
data = _ContentList(i)
|
||||
If (data.Type = UTypes.GIF Or data.Type = UTypes.Picture) Then
|
||||
If data.MD5.IsEmptyString Then
|
||||
ThrowAny(Token)
|
||||
eIndx = existingFiles.FindIndex(eFinder)
|
||||
If eIndx >= 0 Then
|
||||
data.MD5 = __getMD5(New UserMedia With {.File = existingFiles(eIndx)}, False)
|
||||
If Not data.MD5.IsEmptyString Then _ContentList(i) = data : _ForceSaveUserData = True
|
||||
End If
|
||||
End If
|
||||
existingFiles.RemoveAll(eFinder)
|
||||
End If
|
||||
Next
|
||||
If existingFiles.Count > 0 Then
|
||||
For i = 0 To existingFiles.Count - 1
|
||||
f = existingFiles(i)
|
||||
data = New UserMedia(f.File) With {
|
||||
.State = UStates.Downloaded,
|
||||
.Type = IIf(f.Extension = "gif", UTypes.GIF, UTypes.Picture),
|
||||
.File = f
|
||||
}
|
||||
ThrowAny(Token)
|
||||
data.MD5 = __getMD5(data, False)
|
||||
If Not data.MD5.IsEmptyString Then _ContentList.Add(data) : _ForceSaveUserData = True
|
||||
Next
|
||||
existingFiles.Clear()
|
||||
End If
|
||||
End If
|
||||
End If
|
||||
|
||||
If _ContentList.Count > 0 Then
|
||||
With _ContentList.Select(Function(d) d.MD5)
|
||||
If .ListExists Then .ToList.ForEach(Sub(md5value) If Not hashList.ContainsKey(md5value) Then hashList.Add(md5value, New SFile))
|
||||
End With
|
||||
End If
|
||||
|
||||
For i = _TempMediaList.Count - 1 To 0 Step -1
|
||||
data = _TempMediaList(i)
|
||||
If missingMD5(data) Then
|
||||
ThrowAny(Token)
|
||||
data.MD5 = __getMD5(data, True)
|
||||
If Not data.MD5.IsEmptyString Then
|
||||
If hashList.ContainsKey(data.MD5) Then
|
||||
_TempMediaList.RemoveAt(i)
|
||||
Else
|
||||
hashList.Add(data.MD5, New SFile)
|
||||
_TempMediaList(i) = data
|
||||
End If
|
||||
End If
|
||||
End If
|
||||
Next
|
||||
End If
|
||||
Catch ex As Exception
|
||||
ProcessException(ex, Token, "ValidateMD5",, VALIDATE_MD5_ERROR)
|
||||
End Try
|
||||
End Sub
|
||||
#End Region
|
||||
#Region "Get video static"
|
||||
Friend Shared Function GetVideoInfo(ByVal URL As String, ByVal resp As Responser) As IEnumerable(Of UserMedia)
|
||||
Try
|
||||
@@ -310,7 +474,7 @@ Namespace API.Twitter
|
||||
Using j As EContainer = JsonDocument.Parse(r)
|
||||
If j.ListExists Then
|
||||
Dim u$ = GetVideoNodeURL(j)
|
||||
If Not u.IsEmptyString Then Return {MediaFromData(u, PostID, String.Empty)}
|
||||
If Not u.IsEmptyString Then Return {MediaFromData(u, PostID, String.Empty,,, UTypes.Video)}
|
||||
End If
|
||||
End Using
|
||||
End If
|
||||
@@ -359,10 +523,17 @@ Namespace API.Twitter
|
||||
End Function
|
||||
#End Region
|
||||
#Region "UrlFile"
|
||||
Private Function UrlFile(ByVal URL As String) As String
|
||||
Private Function UrlFile(ByVal URL As String, Optional ByVal GetWithoutExtension As Boolean = False) As String
|
||||
Try
|
||||
Dim f As SFile = CStr(RegexReplace(LinkFormatterSecure(RegexReplace(URL.Replace("\", String.Empty), LinkPattern)), FilesPattern))
|
||||
If Not f.IsEmptyString Then Return f.File Else Return String.Empty
|
||||
If Not URL.IsEmptyString Then
|
||||
Dim f As SFile = CStr(RegexReplace(LinkFormatterSecure(RegexReplace(URL.Replace("\", String.Empty), LinkPattern)), FilesPattern))
|
||||
If f.IsEmptyString And GetWithoutExtension Then
|
||||
URL = LinkFormatterSecure(RegexReplace(URL.Replace("\", String.Empty), LinkPattern))
|
||||
If Not URL.IsEmptyString Then f = New SFile With {.Name = URL.Split("/").LastOrDefault}
|
||||
End If
|
||||
If Not f.IsEmptyString Then Return f.File
|
||||
End If
|
||||
Return String.Empty
|
||||
Catch ex As Exception
|
||||
Return String.Empty
|
||||
End Try
|
||||
@@ -371,9 +542,10 @@ Namespace API.Twitter
|
||||
#Region "Create media"
|
||||
Private Shared Function MediaFromData(ByVal _URL As String, ByVal PostID As String, ByVal PostDate As String,
|
||||
Optional ByVal _PictureOption As String = Nothing,
|
||||
Optional ByVal State As UStates = UStates.Unknown) As UserMedia
|
||||
Optional ByVal State As UStates = UStates.Unknown,
|
||||
Optional ByVal Type As UTypes = UTypes.Undefined) As UserMedia
|
||||
_URL = LinkFormatterSecure(RegexReplace(_URL.Replace("\", String.Empty), LinkPattern))
|
||||
Dim m As New UserMedia(_URL) With {.PictureOption = _PictureOption, .Post = New UserPost With {.ID = PostID}}
|
||||
Dim m As New UserMedia(_URL) With {.PictureOption = _PictureOption, .Post = New UserPost With {.ID = PostID}, .Type = Type}
|
||||
If Not m.URL.IsEmptyString Then m.File = CStr(RegexReplace(m.URL, FilesPattern))
|
||||
If Not m.PictureOption.IsEmptyString And Not m.File.IsEmptyString And Not m.URL.IsEmptyString Then
|
||||
m.URL = $"{m.URL.Replace($".{m.File.Extension}", String.Empty)}?format={m.File.Extension}&name={m.PictureOption}"
|
||||
@@ -391,20 +563,25 @@ Namespace API.Twitter
|
||||
#Region "Exception"
|
||||
Protected Overrides Function DownloadingException(ByVal ex As Exception, ByVal Message As String, Optional ByVal FromPE As Boolean = False,
|
||||
Optional ByVal EObj As Object = Nothing) As Integer
|
||||
With Responser
|
||||
If .StatusCode = HttpStatusCode.NotFound Then
|
||||
UserExists = False
|
||||
ElseIf .StatusCode = HttpStatusCode.Unauthorized Then
|
||||
UserSuspended = True
|
||||
ElseIf .StatusCode = HttpStatusCode.BadRequest Then
|
||||
MyMainLOG = "Twitter has invalid credentials"
|
||||
ElseIf .StatusCode = HttpStatusCode.ServiceUnavailable Or .StatusCode = HttpStatusCode.InternalServerError Then
|
||||
MyMainLOG = $"[{CInt(.StatusCode)}] Twitter is currently unavailable ({ToString()})"
|
||||
Else
|
||||
If Not FromPE Then LogError(ex, Message) : HasError = True
|
||||
Return 0
|
||||
End If
|
||||
End With
|
||||
If AEquals(EObj, VALIDATE_MD5_ERROR) Then
|
||||
If Not FromPE Then LogError(ex, Message)
|
||||
Return 0
|
||||
Else
|
||||
With Responser
|
||||
If .StatusCode = HttpStatusCode.NotFound Then
|
||||
UserExists = False
|
||||
ElseIf .StatusCode = HttpStatusCode.Unauthorized Then
|
||||
UserSuspended = True
|
||||
ElseIf .StatusCode = HttpStatusCode.BadRequest Then
|
||||
MyMainLOG = "Twitter has invalid credentials"
|
||||
ElseIf .StatusCode = HttpStatusCode.ServiceUnavailable Or .StatusCode = HttpStatusCode.InternalServerError Then
|
||||
MyMainLOG = $"[{CInt(.StatusCode)}] Twitter is currently unavailable ({ToString()})"
|
||||
Else
|
||||
If Not FromPE Then LogError(ex, Message) : HasError = True
|
||||
Return 0
|
||||
End If
|
||||
End With
|
||||
End If
|
||||
Return 1
|
||||
End Function
|
||||
#End Region
|
||||
|
||||
Reference in New Issue
Block a user