diff --git a/Changelog.md b/Changelog.md index be8dd2b..579a346 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,3 +1,17 @@ +# 2025.3.17.0 + +*2025-03-17* + +- Added + - **TikTok: downloading photos** +- Updated + - gallery-dl up to version **1.29.2** +- Fixed + - Sites + - Facebook: reels aren't downloaded from noname profiles + - PornHub: newly added users aren't downloading + - Threads: users aren't updated if there is a pinned post + # 2025.2.25.0 *2025-02-25* diff --git a/README.md b/README.md index 8ca99a0..8c83205 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ A program to download photo and video from [any site](#supported-sites) (e.g. Yo - Instagram images and videos, tagged posts, stories, saved posts; - Threads images and videos, saved posts; - Facebook images and videos, stories, saved posts; - - TikTok videos; + - TikTok images and videos; - Pinterest boards, users, saved posts; - Imgur images, galleries and videos; - Gfycat videos; diff --git a/SCrawler/API/Base/SiteSettingsBase.vb b/SCrawler/API/Base/SiteSettingsBase.vb index 91f236d..56b2eaa 100644 --- a/SCrawler/API/Base/SiteSettingsBase.vb +++ b/SCrawler/API/Base/SiteSettingsBase.vb @@ -394,7 +394,7 @@ Namespace API.Base With c.GetParameters If .ListExists Then If .Count = 1 Then - Return .Self()(0).ParameterType.GetInterfaces.ListIfNothing.Where(Function(i) i Is Me.GetType).Count = 1 + Return .Self()(0).ParameterType Is Me.GetType Else Return False End If @@ -412,7 +412,8 @@ Namespace API.Base End If End With If Not constructor Is Nothing Then - If args > 0 AndAlso Not constructor.GetParameters()(0).ParameterType Is GetType(ISiteSettings) Then Throw New Exception + If args > 0 AndAlso constructor.GetParameters()(0).ParameterType.GetInterface(GetType(ISiteSettings).Name) Is Nothing Then _ + Throw New Exception("Class Interface type is incompatible") If args = 0 Then Options = constructor.Invoke(Nothing) Else Options = constructor.Invoke({Me}) End If If Options Is Nothing Then Options = Activator.CreateInstance(_UserOptionsType) diff --git a/SCrawler/API/Base/UserDataBase.vb b/SCrawler/API/Base/UserDataBase.vb index b6f3ab3..1c00259 100644 --- a/SCrawler/API/Base/UserDataBase.vb +++ b/SCrawler/API/Base/UserDataBase.vb @@ -1461,6 +1461,7 @@ BlockNullPicture: Data.DownloadState = UserMediaStates.Missing End If YouTube.Objects.YouTubeMediaContainerBase.Update(_ContentNew(0), Data) + If _ContentNew.Count > 1 Then Data.Files.ListAddList(_ContentNew.Select(Function(cc) cc.File), LNC) If ResetTitle And Not _ContentNew(0).File.Name.IsEmptyString Then Data.Title = _ContentNew(0).File.Name Else Data.DownloadState = UserMediaStates.Missing diff --git a/SCrawler/API/Facebook/UserData.vb b/SCrawler/API/Facebook/UserData.vb index d0a204f..3bd32a1 100644 --- a/SCrawler/API/Facebook/UserData.vb +++ b/SCrawler/API/Facebook/UserData.vb @@ -638,7 +638,7 @@ Namespace API.Facebook End If End Function Private Sub GetVideoPageID(ByVal GetReels As Boolean, ByVal Token As CancellationToken) - Dim URL$ = $"{GetProfileUrl()}\{IIf(GetReels, "reels", "videos")}" + Dim URL$ = $"{GetProfileUrl()}{IIf(IsNoNameProfile, "&sk=", "/")}{IIf(GetReels, IIf(IsNoNameProfile, "reels_tab", "reels"), "videos")}" Dim resp As Responser = HtmlResponserCreate() Try WaitTimer() diff --git a/SCrawler/API/Pinterest/SiteSettings.vb b/SCrawler/API/Pinterest/SiteSettings.vb index c743c4b..f2d8a67 100644 --- a/SCrawler/API/Pinterest/SiteSettings.vb +++ b/SCrawler/API/Pinterest/SiteSettings.vb @@ -31,6 +31,7 @@ Namespace API.Pinterest CheckNetscapeCookiesOnEndInit = True UseNetscapeCookies = True UserRegex = RParams.DMS("https?://w{0,3}.?[^/]*?.?pinterest.com/([^/]+)/?(?(_)|([^/]*))/?([^/\?]*)", 0, RegexReturn.ListByMatch, EDP.ReturnValue) + UserOptionsType = GetType(EditorExchangeOptions) End Sub #End Region #Region "GetInstance, Available" @@ -72,12 +73,6 @@ Namespace API.Pinterest Return String.Empty End If End Function - Friend Overrides Sub UserOptions(ByRef Options As Object, ByVal OpenForm As Boolean) - If Options Is Nothing Then Options = New EditorExchangeOptions - If OpenForm Then - Using f As New InternalSettingsForm(Options, Me, False) : f.ShowDialog() : End Using - End If - End Sub #End Region End Class End Namespace \ No newline at end of file diff --git a/SCrawler/API/Pinterest/UserData.vb b/SCrawler/API/Pinterest/UserData.vb index 7077be4..e3e0c5a 100644 --- a/SCrawler/API/Pinterest/UserData.vb +++ b/SCrawler/API/Pinterest/UserData.vb @@ -170,6 +170,7 @@ Namespace API.Pinterest urls.ListAddList(GetDataFromGalleryDL(URL, True, Token), LNC) If urls.ListExists Then urls.RemoveAll(Function(__url) Not __url.Contains("BoardsResource/get/")) If urls.ListExists Then + Responser.Headers.Add(PwsHeader) ProgressPre.ChangeMax(urls.Count) For Each URL In urls ProgressPre.Perform() @@ -193,6 +194,8 @@ Namespace API.Pinterest Catch ex As Exception ProcessException(ex, Token, $"data (gallery-dl boards) downloading error [{URL}]") Return Nothing + Finally + Responser.Headers.Remove(PwsHeader) End Try End Function Private Sub DownloadBoardImages(ByRef Board As BoardInfo, ByVal Token As CancellationToken) diff --git a/SCrawler/API/PornHub/UserData.vb b/SCrawler/API/PornHub/UserData.vb index 3ef84b0..82d4ee2 100644 --- a/SCrawler/API/PornHub/UserData.vb +++ b/SCrawler/API/PornHub/UserData.vb @@ -195,7 +195,7 @@ Namespace API.PornHub If Not Force OrElse (Not IsUser AndAlso Not SiteMode = SiteModes.Playlists AndAlso Not NewUrl.IsEmptyString AndAlso MyFileSettings.Exists) Then Dim eObj As Plugin.ExchangeOptions = Nothing If Force Then eObj = MySettings.IsMyUser(NewUrl) - If (Force And Not eObj.UserName.IsEmptyString) Or (Not Force And Not Name.IsEmptyString And NameTrue.IsEmptyString) Then + If (Force And Not eObj.UserName.IsEmptyString) Or (Not Force And Not Name.IsEmptyString And NameTrue(True).IsEmptyString) Then If Not If(Force, eObj.Options, Options).IsEmptyString Then If (IsUser Or SiteMode = SiteModes.Playlists) And Force Then Return False @@ -241,7 +241,7 @@ Namespace API.PornHub SiteMode = .Value(Name_SiteMode).FromXML(Of Integer)(SiteModes.User) UpdateUserOptions() Else - If UpdateUserOptions() Then .Value(Name_LabelsName) = LabelsString + If UpdateUserOptions() Then .Value(Name_LabelsName) = LabelsString : .Value(Name_TrueName) = NameTrue(True) .Add(Name_PersonType, PersonType) .Add(Name_DownloadUHD, DownloadUHD.BoolToInteger) .Add(Name_DownloadUploaded, DownloadUploaded.BoolToInteger) diff --git a/SCrawler/API/ThreadsNet/UserData.vb b/SCrawler/API/ThreadsNet/UserData.vb index 466cf2d..a0328c0 100644 --- a/SCrawler/API/ThreadsNet/UserData.vb +++ b/SCrawler/API/ThreadsNet/UserData.vb @@ -128,10 +128,13 @@ Namespace API.ThreadsNet If IsSavedPosts Then Return False Else - If MaxLastDownDate.HasValue Then - Dim d As Date? = AConvert(Of Date)(Items(Index).ItemF(DefaultParser_ElemNode_Default).Value("taken_at"), UnixDate32Provider, Nothing) - If d.HasValue Then Return d.Value < MaxLastDownDate.Value - End If + With Items(Index).ItemF(DefaultParser_ElemNode) + Return .Value({"text_post_app_info", "pinned_post_info"}, "is_pinned_to_profile").FromXML(Of Boolean)(False) + If MaxLastDownDate.HasValue Then + Dim d As Date? = AConvert(Of Date)(.Value("taken_at"), UnixDate32Provider, Nothing) + If d.HasValue Then Return d.Value <= MaxLastDownDate.Value + End If + End With Return Not FirstLoadingDone End If Catch ex As Exception diff --git a/SCrawler/API/TikTok/Declarations.vb b/SCrawler/API/TikTok/Declarations.vb index de31241..e8860e6 100644 --- a/SCrawler/API/TikTok/Declarations.vb +++ b/SCrawler/API/TikTok/Declarations.vb @@ -6,11 +6,14 @@ ' ' This program is distributed in the hope that it will be useful, ' but WITHOUT ANY WARRANTY +Imports System.Text.RegularExpressions Imports PersonalUtilities.Functions.RegularExpressions Namespace API.TikTok Friend Module Declarations Friend ReadOnly SimpleDateConverter As New ADateTime("yyyyMMdd") Friend ReadOnly RegexTagsReplacer As RParams = RParams.DM("#\w+\s?", -1, RegexReturn.Replace, CType(Function(input$) String.Empty, Func(Of String, String)), EDP.ReturnValue) + Friend ReadOnly RegexPhotoJson As RParams = RParams.DMS("UNIVERSAL_DATA_FOR_REHYDRATION__"" type=""application/json""\>([^\<]+)\<", 1, + RegexOptions.IgnoreCase, EDP.ReturnValue) End Module End Namespace \ No newline at end of file diff --git a/SCrawler/API/TikTok/SiteSettings.vb b/SCrawler/API/TikTok/SiteSettings.vb index 5d1349a..29fef64 100644 --- a/SCrawler/API/TikTok/SiteSettings.vb +++ b/SCrawler/API/TikTok/SiteSettings.vb @@ -13,6 +13,15 @@ Imports PersonalUtilities.Functions.RegularExpressions Namespace API.TikTok Friend Class SiteSettings : Inherits SiteSettingsBase +#Region "Categories" + Private Const CAT_DOWN As String = "Download" +#End Region +#Region "Download" + + Friend ReadOnly Property DownloadTTVideos As PropertyValue + + Friend ReadOnly Property DownloadTTPhotos As PropertyValue +#End Region Friend ReadOnly Property RemoveTagsFromTitle As PropertyValue @@ -36,6 +45,10 @@ Namespace API.TikTok Friend ReadOnly Property UseParsedVideoDateSTD As PropertyValue Friend Sub New(ByVal AccName As String, ByVal Temp As Boolean) MyBase.New("TikTok", "www.tiktok.com", AccName, Temp, My.Resources.SiteResources.TikTokIcon_32, My.Resources.SiteResources.TikTokPic_192) + + DownloadTTVideos = New PropertyValue(True) + DownloadTTPhotos = New PropertyValue(True) + RemoveTagsFromTitle = New PropertyValue(False) TitleUseNative = New PropertyValue(True) TitleUseNativeSTD = New PropertyValue(True) @@ -45,6 +58,7 @@ Namespace API.TikTok TitleUseRegexForTitle_Value = New PropertyValue(String.Empty, GetType(String)) UseParsedVideoDate = New PropertyValue(True) UseParsedVideoDateSTD = New PropertyValue(False) + UseNetscapeCookies = True UrlPatternUser = "https://www.tiktok.com/@{0}/" UserRegex = RParams.DMS(String.Format(UserRegexDefaultPattern, "tiktok.com/@"), 1) diff --git a/SCrawler/API/TikTok/UserData.vb b/SCrawler/API/TikTok/UserData.vb index 52dde0e..2308d6f 100644 --- a/SCrawler/API/TikTok/UserData.vb +++ b/SCrawler/API/TikTok/UserData.vb @@ -13,6 +13,7 @@ Imports PersonalUtilities.Functions.XML Imports PersonalUtilities.Functions.RegularExpressions Imports PersonalUtilities.Tools Imports PersonalUtilities.Tools.Web.Documents.JSON +Imports UTypes = SCrawler.API.Base.UserMedia.Types Namespace API.TikTok Friend Class UserData : Inherits UserDataBase #Region "XML names" @@ -23,6 +24,7 @@ Namespace API.TikTok Private Const Name_TitleUseRegexForTitle As String = "TitleUseRegexForTitle" Private Const Name_TitleUseRegexForTitle_Value As String = "TitleUseRegexForTitle_Value" Private Const Name_TitleUseGlobalRegexOptions As String = "TitleUseGlobalRegexOptions" + Private Const Name_PhotosDownloaded As String = "PhotosDownloaded" #End Region #Region "Declarations" Private ReadOnly Property MySettings As SiteSettings @@ -62,6 +64,7 @@ Namespace API.TikTok Friend Property TitleUseRegexForTitle_Value As String = String.Empty Friend Property TitleUseGlobalRegexOptions As Boolean = True Private Property LastDownloadDate As Date? = Nothing + Private Property PhotosDownloaded As Boolean = False #End Region #Region "Exchange" Friend Overrides Function ExchangeOptionsGet() As Object @@ -92,6 +95,7 @@ Namespace API.TikTok TitleUseRegexForTitle = .Value(Name_TitleUseRegexForTitle).FromXML(Of Boolean)(False) TitleUseRegexForTitle_Value = .Value(Name_TitleUseRegexForTitle_Value) TitleUseGlobalRegexOptions = .Value(Name_TitleUseGlobalRegexOptions).FromXML(Of Boolean)(True) + PhotosDownloaded = .Value(Name_PhotosDownloaded).FromXML(Of Boolean)(False) Else .Add(Name_RemoveTagsFromTitle, RemoveTagsFromTitle.BoolToInteger) .Add(Name_TitleUseNative, TitleUseNative.BoolToInteger) @@ -100,6 +104,7 @@ Namespace API.TikTok .Add(Name_TitleUseRegexForTitle, TitleUseRegexForTitle.BoolToInteger) .Add(Name_TitleUseRegexForTitle_Value, TitleUseRegexForTitle_Value) .Add(Name_TitleUseGlobalRegexOptions, TitleUseGlobalRegexOptions.BoolToInteger) + .Add(Name_PhotosDownloaded, PhotosDownloaded.BoolToInteger) End If End With End Sub @@ -142,7 +147,7 @@ Namespace API.TikTok End Function Private Function GetNewFileName(ByVal Title As String, ByVal Native As Boolean, ByVal RemoveTags As Boolean, ByVal AddVideoID As Boolean, ByVal PostID As String, ByVal TitleRegex As RParams) As String - If Not Title.IsEmptyString Then Title = Left(Title, 150).StringTrim + If Not Title.IsEmptyString Then Title = TitleHtmlConverter(Left(Title, 150)).StringTrim If Title.IsEmptyString Or Not Native Then Title = PostID Else @@ -157,6 +162,9 @@ Namespace API.TikTok End If Return Title End Function + Private Function GetPhotoNode() As Object() + Return {"imageURL", "urlList", 0, 0} + End Function Friend Overrides Sub DownloadData(ByVal Token As CancellationToken) MyBase.DownloadData(Token) UserCache.DisposeIfReady(False) @@ -166,13 +174,20 @@ Namespace API.TikTok Dim URL$ = $"https://www.tiktok.com/@{NameTrue}" UserCache = CreateCache() Try - Dim postID$, title$, postUrl$, newName$ + Const photoPrefix$ = "photo_" + Dim postID$, title$, postUrl$, newName$, t$, postID2$, imgUrl$ Dim postDate As Date? Dim dateAfterC As Date? = Nothing Dim dateBefore As Date? = DownloadDateTo Dim dateAfter As Date? = DownloadDateFrom Dim baseDataObtained As Boolean = False Dim titleRegex As RParams = GetTitleRegex() + Dim vPath As SFile = Nothing, pPath As SFile = Nothing + Dim file As SFile + Dim j As EContainer, photo As EContainer + Dim photoNode As Object() = GetPhotoNode() + Dim c%, cc%, i% + Dim errDef As New ErrorsDescriber(EDP.ReturnValue) If _ContentList.Count > 0 Then With (From d In _ContentList Where d.Post.Date.HasValue Select d.Post.Date.Value) @@ -198,57 +213,131 @@ Namespace API.TikTok End If End If - Using b As New YTDLP.YTDLPBatch(Token) With {.TempPostsList = _TempPostsList} - b.Commands.Clear() - b.ChangeDirectory(UserCache) - b.Encoding = BatchExecutor.UnicodeEncoding - b.Execute(CreateYTCommand(UserCache.RootDirectory, URL, False, dateBefore, dateAfter)) - End Using + If DownloadVideos And Settings.YtdlpFile.Exists And CBool(MySettings.DownloadTTVideos.Value) Then + With UserCache.NewInstance : .Validate() : vPath = .RootDirectory : End With + Using b As New YTDLP.YTDLPBatch(Token) With {.TempPostsList = _TempPostsList} + b.Commands.Clear() + b.ChangeDirectory(vPath) + b.Encoding = BatchExecutor.UnicodeEncoding + b.Execute(CreateYTCommand(vPath, URL, False, dateBefore, dateAfter)) + End Using + End If + + If DownloadImages And Settings.GalleryDLFile.Exists And CBool(MySettings.DownloadTTPhotos.Value) Then + With UserCache.NewInstance : .Validate() : pPath = .RootDirectory : End With + Using b As New GDL.GDLBatch(Token) + With b + If PhotosDownloaded And _TempPostsList.Count > 0 Then + .TempPostsList = (From p As String In _TempPostsList + Where Not p.IsEmptyString AndAlso p.StartsWith(photoPrefix) + Select p.Replace(photoPrefix, String.Empty)).ListIfNothing + Else + .TempPostsList = New List(Of String) + End If + .ChangeDirectory(pPath) + .Encoding = BatchExecutor.UnicodeEncoding + .Execute(CreateGDLCommand(URL)) + If Not PhotosDownloaded Then _ForceSaveUserInfo = True : _ForceSaveUserInfoOnException = True + PhotosDownloaded = True + End With + End Using + End If ThrowAny(Token) - Dim files As List(Of SFile) = SFile.GetFiles(UserCache, "*.json",, EDP.ReturnValue) - If files.ListExists Then - Dim j As EContainer - For Each file As SFile In files - j = JsonDocument.Parse(file.GetText, EDP.ReturnValue) - If j.ListExists Then - If j.Value("_type").StringToLower = "video" Then - If Not baseDataObtained Then - baseDataObtained = True - If ID.IsEmptyString Then - ID = j.Value("uploader_id") - If Not ID.IsEmptyString Then _ForceSaveUserInfo = True + Dim files As List(Of SFile) + If Not vPath.IsEmptyString AndAlso vPath.Exists(SFO.Path, False) Then + files = SFile.GetFiles(vPath, "*.json",, errDef) + If files.ListExists Then + For Each file In files + j = JsonDocument.Parse(file.GetText, errDef) + If j.ListExists Then + If j.Value("_type").StringToLower = "video" Then + If Not baseDataObtained Then + baseDataObtained = True + If ID.IsEmptyString Then + ID = j.Value("uploader_id") + If Not ID.IsEmptyString Then _ForceSaveUserInfo = True + End If + newName = j.Value("uploader") + If Not newName.IsEmptyString Then NameTrue = newName + newName = j.Value("creator") + If Not newName.IsEmptyString Then UserSiteName = newName End If - newName = j.Value("uploader") - If Not newName.IsEmptyString Then NameTrue = newName - newName = j.Value("creator") - If Not newName.IsEmptyString Then UserSiteName = newName - End If - postID = j.Value("id") - If Not _TempPostsList.Contains(postID) Then - _TempPostsList.Add(postID) - Else - Exit Sub - End If - title = GetNewFileName(j.Value("title").StringRemoveWinForbiddenSymbols, - TitleUseNative, RemoveTagsFromTitle, TitleAddVideoID, postID, titleRegex) - postDate = AConvert(Of Date)(j.Value("timestamp"), UnixDate32Provider, Nothing) - If Not postDate.HasValue Then postDate = AConvert(Of Date)(j.Value("upload_date"), SimpleDateConverter, Nothing) - Select Case CheckDatesLimit(postDate, SimpleDateConverter) - Case DateResult.Skip : Continue For - Case DateResult.Exit : Exit Sub - End Select + postID = j.Value("id") + If Not _TempPostsList.Contains(postID) Then + _TempPostsList.ListAddValue(postID, LNC) + Else + Exit For 'Exit Sub + End If + title = GetNewFileName(j.Value("title").StringRemoveWinForbiddenSymbols, + TitleUseNative, RemoveTagsFromTitle, TitleAddVideoID, postID, titleRegex) + postDate = AConvert(Of Date)(j.Value("timestamp"), UnixDate32Provider, Nothing) + If Not postDate.HasValue Then postDate = AConvert(Of Date)(j.Value("upload_date"), SimpleDateConverter, Nothing) + Select Case CheckDatesLimit(postDate, SimpleDateConverter) + Case DateResult.Skip : Continue For + Case DateResult.Exit : Exit For 'Exit Sub + End Select - postUrl = j.Value("webpage_url") - If postUrl.IsEmptyString Then postUrl = $"https://www.tiktok.com/@{Name}/video/{postID}" - _TempMediaList.Add(New UserMedia(postUrl, UserMedia.Types.Video) With { - .File = $"{title}.mp4", .Post = New UserPost(postID, postDate)}) + postUrl = j.Value("webpage_url") + If postUrl.IsEmptyString Then postUrl = $"https://www.tiktok.com/@{Name}/video/{postID}" + _TempMediaList.Add(New UserMedia(postUrl, UTypes.Video) With { + .File = $"{title}.mp4", .Post = New UserPost(postID, postDate)}) + End If + j.Dispose() End If - j.Dispose() - End If - Next + Next + End If End If + + If Not pPath.IsEmptyString AndAlso pPath.Exists(SFO.Path, False) Then + files = SFile.GetFiles(pPath, "*.txt",, errDef) + If files.ListExists Then + For Each file In files + t = file.GetText(errDef) + If Not t.IsEmptyString Then t = RegexReplace(t, RegexPhotoJson) + If Not t.IsEmptyString Then + j = JsonDocument.Parse(t, errDef) + If j.ListExists Then + With j.ItemF({0, "webapp.video-detail", "itemInfo", "itemStruct"}) + If .ListExists Then + postID = .Value("id") + postID2 = $"{photoPrefix}{postID}" + If Not _TempPostsList.Contains(postID2) Then _TempPostsList.ListAddValue(postID2, LNC) Else Exit For 'Exit Sub + postDate = AConvert(Of Date)(j.Value("createTime"), UnixDate32Provider, Nothing) + Select Case CheckDatesLimit(postDate, SimpleDateConverter) + Case DateResult.Skip : Continue For + Case DateResult.Exit : Exit For 'Exit Sub + End Select + title = GetNewFileName(j.Value({"imagePost"}, "title").StringRemoveWinForbiddenSymbols, + TitleUseNative, RemoveTagsFromTitle, TitleAddVideoID, postID, titleRegex) + postUrl = $"https://www.tiktok.com/@{Name}/photo/{postID}" + With .Item({"imagePost", "images"}) + If .ListExists Then + i = 0 + c = .Count + cc = Math.Max(c.ToString.Length, 3) + For Each photo In .Self + i += 1 + imgUrl = photo.ItemF(photoNode).XmlIfNothingValue + If Not imgUrl.IsEmptyString Then _ + _TempMediaList.Add(New UserMedia(imgUrl, UTypes.Picture) With { + .URL_BASE = postUrl, + .SpecialFolder = "Photo", + .File = $"{title}{IIf(c > 1, $"_{i.NumToString(ANumbers.Formats.NumberGroup, cc)}", String.Empty)}.jpg", + .Post = New UserPost(postID, postDate)}) + Next + End If + End With + End If + End With + j.Dispose() + End If + End If + Next + End If + End If + If _TempMediaList.Count > 0 Then LastDownloadDate = Now Catch ex As Exception ProcessException(ex, Token, $"data downloading error [{URL}]") @@ -259,16 +348,41 @@ Namespace API.TikTok Protected Overrides Sub ReparseMissing(ByVal Token As CancellationToken) If ContentMissingExists Then Dim m As UserMedia + Dim d As IYouTubeMediaContainer = Nothing Dim i% Dim rList As New List(Of Integer) + Dim picIDs As New List(Of String) + Dim defDir As SFile = SFile.GetPath(DownloadContentDefault_GetRootDir()) + Dim result As Boolean For i = 0 To _ContentList.Count - 1 If _ContentList(i).State = UserMedia.States.Missing Then m = _ContentList(i) - m.URL = m.URL_BASE - _TempMediaList.Add(m) - rList.Add(i) + result = False + Try + If m.Type = UTypes.Video Then + d = MySettings.GetSingleMediaInstance(m.URL_BASE, defDir) + result = False + If If(UserCache?.Disposed, True) Then UserCache = CreateCache() + DownloadSingleObject_GetPosts(d, Token, UserCache, result) + ElseIf m.Type = UTypes.Picture Then + If picIDs.Contains(m.Post.ID) Then + rList.Add(i) + Else + d = MySettings.GetSingleMediaInstance(m.URL_BASE, defDir) + If If(UserCache?.Disposed, True) Then UserCache = CreateCache() + DownloadSingleObject_GetPosts(d, Token, UserCache, result) + picIDs.Add(m.Post.ID) + End If + End If + Catch ex As Exception + result = False + ProcessException(ex, Token, "ReparseMissing") + End Try + If result Then rList.Add(i) + d.DisposeIfReady(False) End If Next + picIDs.Clear() If rList.Count > 0 Then For i% = rList.Count - 1 To 0 Step -1 : _ContentList.RemoveAt(rList(i)) : Next End If @@ -303,10 +417,18 @@ Namespace API.TikTok Return command End Function #End Region +#Region "GDL Support" + Private Function CreateGDLCommand(ByVal URL As String) As String + Return $"""{Settings.GalleryDLFile}"" --verbose --no-download --no-skip --write-pages {URL}" + End Function +#End Region #Region "DownloadContent, DownloadFile" Protected Overrides Sub DownloadContent(ByVal Token As CancellationToken) DownloadContentDefault(Token) End Sub + Protected Overrides Function ValidateDownloadFile(ByVal URL As String, ByVal Media As UserMedia, ByRef Interrupt As Boolean) As Boolean + Return Not Media.Type = UTypes.Picture + End Function Protected Overrides Function DownloadFile(ByVal URL As String, ByVal Media As UserMedia, ByVal DestinationFile As SFile, ByVal Token As CancellationToken) As SFile Using b As New TokenBatch(Token) With {.FileExchanger = RootCacheTikTok} b.Encoding = BatchExecutor.UnicodeEncoding @@ -316,33 +438,94 @@ Namespace API.TikTok End Function #End Region #Region "DownloadSingleObject" - Protected Overrides Sub DownloadSingleObject_GetPosts(ByVal Data As IYouTubeMediaContainer, ByVal Token As CancellationToken) + Protected Overloads Overrides Sub DownloadSingleObject_GetPosts(ByVal Data As IYouTubeMediaContainer, ByVal Token As CancellationToken) + DownloadSingleObject_GetPosts(Data, Token, Nothing, Nothing) + End Sub + Private Overloads Sub DownloadSingleObject_GetPosts(ByVal Data As IYouTubeMediaContainer, ByVal Token As CancellationToken, + ByRef Cache As CacheKeeper, ByRef Result As Boolean) Dim f$ = String.Empty - If CBool(MySettings.TitleUseNativeSTD.Value) Then - Using b As New BatchExecutor(True) With { - .Encoding = BatchExecutor.UnicodeEncoding, - .CleanAutomaticallyViaRegEx = True, - .CleanAutomaticallyViaRegExRemoveAllCommands = True - } - b.Execute(CreateYTCommand(Nothing, Data.URL, True,,, True, False)) - b.Clean() - With b.OutputData - If .Count > 0 Then - For Each vData$ In .Self - If Not vData.Contains($": {BatchExecutor.UnicodeEncoding}") Then f = vData : Exit For - Next - End If - End With + Dim urlsList As New List(Of String) + Dim t As UTypes + Dim defName$ = New SFile(Data.URL).Name + If Data.URL.ToLower.Contains("/video/") Then + urlsList.Add(Data.URL) + t = UTypes.Video + If CBool(MySettings.TitleUseNativeSTD.Value) Then + Using b As New BatchExecutor(True) With { + .Encoding = BatchExecutor.UnicodeEncoding, + .CleanAutomaticallyViaRegEx = True, + .CleanAutomaticallyViaRegExRemoveAllCommands = True + } + b.Execute(CreateYTCommand(Nothing, Data.URL, True,,, True, False)) + b.Clean() + With b.OutputData + If .Count > 0 Then + For Each vData$ In .Self + If Not vData.Contains($": {BatchExecutor.UnicodeEncoding}") Then f = vData : Exit For + Next + End If + End With + End Using + End If + Else + t = UTypes.Picture + Data.ContentType = Plugin.UserMediaTypes.Picture + Data.Title = defName + Dim dir As SFile + With If(Cache, Settings.Cache).NewInstance() : .Validate() : dir = .RootDirectory : End With + Using b As New GDL.GDLBatch(Token) + b.ChangeDirectory(dir) + b.Encoding = BatchExecutor.UnicodeEncoding + b.Execute(CreateGDLCommand(Data.URL)) End Using + Dim file As SFile = SFile.GetFiles(dir, "*.txt",, EDP.ReturnValue).FirstOrDefault + If file.Exists Then + Dim r$ = file.GetText(EDP.ReturnValue) + If Not r.IsEmptyString Then r = RegexReplace(r, RegexPhotoJson) + If Not r.IsEmptyString Then + Using j As EContainer = JsonDocument.Parse(r, EDP.ReturnValue) + If j.ListExists Then + With j.ItemF({0, "webapp.video-detail", "itemInfo", "itemStruct"}) + If CBool(MySettings.TitleUseNativeSTD.Value) Then f = j.Value({"imagePost"}, "title").StringRemoveWinForbiddenSymbols + With .Item({"imagePost", "images"}) + If .ListExists Then + For Each photo As EContainer In .Self : urlsList.Add(photo.ItemF(GetPhotoNode()).XmlIfNothingValue) : Next + End If + End With + End With + End If + End Using + End If + End If End If - Dim m As New UserMedia(Data.URL, UserMedia.Types.Video) - If Not f.IsEmptyString Then f = TitleHtmlConverter(f) - If Not f.IsEmptyString Then - f = GetNewFileName(f, MySettings.TitleUseNativeSTD.Value, MySettings.RemoveTagsFromTitle.Value, MySettings.TitleAddVideoIDSTD.Value, - m.File.Name, GetTitleRegex) - If Not f.IsEmptyString Then m.File.Name = f.StringTrim + + Dim m As UserMedia + Dim i% = 0, c%, cc% + Dim ff As Boolean = False + If urlsList.Count > 0 Then + c = urlsList.Count + cc = Math.Max(c.ToString.Length, 3) + For Each url$ In urlsList + i += 1 + m = New UserMedia(url, t) With {.URL_BASE = Data.URL} + If Not f.IsEmptyString Then f = TitleHtmlConverter(f) + If Not f.IsEmptyString Or t = UTypes.Picture Then + If Not ff Then f = GetNewFileName(f, MySettings.TitleUseNativeSTD.Value, MySettings.RemoveTagsFromTitle.Value, MySettings.TitleAddVideoIDSTD.Value, + defName, GetTitleRegex) + ff = True + If Not f.IsEmptyString Then + m.File.Name = $"{f.StringTrim}{IIf(c > 1, $"_{i.NumToString(ANumbers.Formats.NumberGroup, cc)}", String.Empty)}" + If t = UTypes.Picture Then m.File.Extension = "jpg" + End If + End If + + _TempMediaList.Add(m) + Result = True + Next End If - _TempMediaList.Add(m) + End Sub + Protected Overrides Sub DownloadSingleObject_PostProcessing(ByVal Data As IYouTubeMediaContainer, Optional ByVal ResetTitle As Boolean = True) + MyBase.DownloadSingleObject_PostProcessing(Data, Not Data.ContentType = Plugin.UserMediaTypes.Picture) End Sub #End Region #Region "EraseData" diff --git a/SCrawler/My Project/AssemblyInfo.vb b/SCrawler/My Project/AssemblyInfo.vb index 189981f..b9f5a37 100644 --- a/SCrawler/My Project/AssemblyInfo.vb +++ b/SCrawler/My Project/AssemblyInfo.vb @@ -32,6 +32,6 @@ Imports System.Runtime.InteropServices ' by using the '*' as shown below: ' - - + +