From 10516f229b9b065c227a95302663d5cf2b7f4f42 Mon Sep 17 00:00:00 2001 From: Andy <88590076+AAndyProgram@users.noreply.github.com> Date: Sat, 10 Feb 2024 08:19:08 +0300 Subject: [PATCH] 2024.2.10.0 Plugins: added `ReplaceInternalPluginAttribute` attribute SCrawler API: update user regex for some sites API.Instagram: simplify 5xx errors; hide JSON deserialization error API.TikTok: files with long names aren't downloaded (PathTooLongException) --- .../Attributes/Attributes.vb | 9 ++++++++ SCrawler/API/Instagram/SiteSettings.vb | 2 +- SCrawler/API/Instagram/UserData.vb | 23 +++++++++++++++---- SCrawler/API/JustForFans/SiteSettings.vb | 2 +- SCrawler/API/OnlyFans/SiteSettings.vb | 2 +- SCrawler/API/Reddit/SiteSettings.vb | 2 +- SCrawler/API/Redgifs/SiteSettings.vb | 2 +- SCrawler/API/ThreadsNet/SiteSettings.vb | 2 +- SCrawler/API/TikTok/SiteSettings.vb | 2 +- SCrawler/API/TikTok/UserData.vb | 3 ++- SCrawler/API/Twitter/SiteSettings.vb | 2 +- SCrawler/MainFrameObjects.vb | 2 +- SCrawler/MainMod.vb | 1 + .../PluginsEnvironment/Hosts/PluginHost.vb | 10 ++++++++ .../PluginsEnvironment/Hosts/SettingsHost.vb | 8 +++++++ SCrawler/SettingsCLS.vb | 16 ++++++++++++- 16 files changed, 73 insertions(+), 15 deletions(-) diff --git a/SCrawler.PluginProvider/Attributes/Attributes.vb b/SCrawler.PluginProvider/Attributes/Attributes.vb index 96e8028..0a4a279 100644 --- a/SCrawler.PluginProvider/Attributes/Attributes.vb +++ b/SCrawler.PluginProvider/Attributes/Attributes.vb @@ -188,4 +188,13 @@ Namespace Plugin.Attributes Repository = RepoName End Sub End Class + ''' Replace internal plugin with the current one + Public NotInheritable Class ReplaceInternalPluginAttribute : Inherits Attribute + Public ReadOnly SiteName As String + Public ReadOnly PluginKey As String + Public Sub New(ByVal PluginKey As String, Optional ByVal SiteName As String = Nothing) + Me.PluginKey = PluginKey + Me.SiteName = SiteName + End Sub + End Class End Namespace \ No newline at end of file diff --git a/SCrawler/API/Instagram/SiteSettings.vb b/SCrawler/API/Instagram/SiteSettings.vb index 0aba8c6..08380cb 100644 --- a/SCrawler/API/Instagram/SiteSettings.vb +++ b/SCrawler/API/Instagram/SiteSettings.vb @@ -276,7 +276,7 @@ Namespace API.Instagram _AllowUserAgentUpdate = False UrlPatternUser = "https://www.instagram.com/{0}/" - UserRegex = RParams.DMS("[htps:/]{7,8}.*?instagram.com/([^/]+)", 1) + UserRegex = RParams.DMS(String.Format(UserRegexDefaultPattern, "instagram.com/"), 1) ImageVideoContains = "instagram.com" End Sub #End Region diff --git a/SCrawler/API/Instagram/UserData.vb b/SCrawler/API/Instagram/UserData.vb index 8903041..5a98a87 100644 --- a/SCrawler/API/Instagram/UserData.vb +++ b/SCrawler/API/Instagram/UserData.vb @@ -16,6 +16,7 @@ Imports PersonalUtilities.Functions.Messaging Imports PersonalUtilities.Functions.RegularExpressions Imports PersonalUtilities.Tools.Web.Clients Imports PersonalUtilities.Tools.Web.Clients.Base +Imports PersonalUtilities.Tools.Web.Documents Imports PersonalUtilities.Tools.Web.Documents.JSON Imports UTypes = SCrawler.API.Base.UserMedia.Types Imports UStates = SCrawler.API.Base.UserMedia.States @@ -142,13 +143,15 @@ Namespace API.Instagram #End Region #Region "Download data" Private E560Thrown As Boolean = False + Friend Err5xx As Integer = -1 Private Class ExitException : Inherits Exception + Friend Property Is560 As Boolean = False Friend Shared Sub Throw560(ByRef Source As UserData) If Not Source.E560Thrown Then - MyMainLOG = $"{Source.ToStringForLog}: (560) Download skipped until next session" + MyMainLOG = $"{Source.ToStringForLog}: ({IIf(Source.Err5xx > 0, Source.Err5xx, 560)}) Download skipped until next session" Source.E560Thrown = True End If - Throw New ExitException + Throw New ExitException With {.Is560 = True} End Sub End Class Private ReadOnly Property MyFilePostsKV As SFile @@ -236,6 +239,7 @@ Namespace API.Instagram Dim s As Sections = Sections.Timeline Dim errorFound As Boolean = False Try + Err5xx = -1 _Limit = If(DownloadTopCount, -1) _TotalPostsParsed = 0 LoadSavePostsKV(True) @@ -450,6 +454,7 @@ Namespace API.Instagram Dim StoriesList As List(Of String) = Nothing Dim StoriesRequested As Boolean = False Dim dValue% = 1 + Dim jsonArgs As New WebDocumentEventArgs With {.DeclaredError = EDP.ThrowException} LastCursor = Cursor Try Do While dValue = 1 @@ -524,7 +529,7 @@ Namespace API.Instagram 'Parsing If Not r.IsEmptyString Then - Using j As EContainer = JsonDocument.Parse(r).XmlIfNothing + Using j As EContainer = JsonDocument.Parse(r, jsonArgs).XmlIfNothing n = If(ENode Is Nothing, j, j.ItemF(ENode)).XmlIfNothing If n.Count > 0 Then Select Case Section @@ -605,18 +610,27 @@ Namespace API.Instagram End If dValue = 0 If HasNextPage And Not EndCursor.IsEmptyString Then DownloadData(EndCursor, Section, Token) + Catch jsonNull As ArgumentNullException When jsonArgs.State = WebDocumentEventArgs.States.Error And Section = Sections.Reels + Throw jsonNull Catch eex As ExitException Throw eex Catch ex As Exception dValue = ProcessException(ex, Token, $"data downloading error [{URL}]",, Section, False) End Try Loop + Catch jsonNull2 As ArgumentNullException When jsonArgs.State = WebDocumentEventArgs.States.Error And Section = Sections.Reels Catch eex2 As ExitException - If Not Section = Sections.Reels And (Section = Sections.Timeline Or Section = Sections.Tagged) And Not Cursor.IsEmptyString Then Throw eex2 + If eex2.Is560 Then + Throw New Plugin.ExitException With {.Silent = True} + Else + If Not Section = Sections.Reels And (Section = Sections.Timeline Or Section = Sections.Tagged) And Not Cursor.IsEmptyString Then Throw eex2 + End If Catch oex2 As OperationCanceledException When Token.IsCancellationRequested Or oex2.HelpLink = InstAborted If oex2.HelpLink = InstAborted Then HasError = True Catch DoEx As Exception ProcessException(DoEx, Token, $"data downloading error [{URL}]",, Section) + Finally + jsonArgs.DisposeIfReady End Try End Sub Private Sub DownloadPosts(ByVal Token As CancellationToken, Optional ByVal IsTagged As Boolean = False) @@ -1191,6 +1205,7 @@ Namespace API.Instagram Return 1 ElseIf Responser.StatusCode = 560 Or Responser.StatusCode = HttpStatusCode.InternalServerError Then '560, 500 MySiteSettings.SkipUntilNextSession = True + Err5xx = Responser.StatusCode Else MyMainLOG = $"Something is wrong. Your credentials may have expired [{CInt(Responser.StatusCode)}/{CInt(Responser.Status)}]: {ToString()} [{s}]" DisableSection(s) diff --git a/SCrawler/API/JustForFans/SiteSettings.vb b/SCrawler/API/JustForFans/SiteSettings.vb index db01a1d..d1a167b 100644 --- a/SCrawler/API/JustForFans/SiteSettings.vb +++ b/SCrawler/API/JustForFans/SiteSettings.vb @@ -46,7 +46,7 @@ Namespace API.JustForFans UserAgent = New PropertyValue(If(Responser.UserAgentExists, Responser.UserAgent, String.Empty), GetType(String), Sub(v) UpdateHeader(NameOf(UserAgent), v)) _AllowUserAgentUpdate = False - UserRegex = RParams.DMS("https://justfor.fans/([^/\?]+)", 1, EDP.ReturnValue) + UserRegex = RParams.DMS(String.Format(UserRegexDefaultPattern, "justfor.fans/"), 1, EDP.ReturnValue) UrlPatternUser = "https://justfor.fans/{0}" ImageVideoContains = "justfor.fans" End Sub diff --git a/SCrawler/API/OnlyFans/SiteSettings.vb b/SCrawler/API/OnlyFans/SiteSettings.vb index 69301a9..faffb46 100644 --- a/SCrawler/API/OnlyFans/SiteSettings.vb +++ b/SCrawler/API/OnlyFans/SiteSettings.vb @@ -169,7 +169,7 @@ Namespace API.OnlyFans OFScraperMP4decrypt_XML = New PropertyValue(String.Empty, GetType(String)) KeyModeDefault_XML = New PropertyValue(KeyModeDefault_Default) - UserRegex = RParams.DMS("onlyfans.com/([\w\._]+)", 1, EDP.ReturnValue) + UserRegex = RParams.DMS(String.Format(UserRegexDefaultPattern, "onlyfans.com/"), 1, EDP.ReturnValue) UrlPatternUser = "https://onlyfans.com/{0}" ImageVideoContains = "onlyfans.com" End Sub diff --git a/SCrawler/API/Reddit/SiteSettings.vb b/SCrawler/API/Reddit/SiteSettings.vb index ee54a5e..5780751 100644 --- a/SCrawler/API/Reddit/SiteSettings.vb +++ b/SCrawler/API/Reddit/SiteSettings.vb @@ -90,7 +90,7 @@ Namespace API.Reddit UrlPatternUser = "https://www.reddit.com/{0}/{1}/" ImageVideoContains = "reddit.com" - UserRegex = RParams.DM("[htps:/]{7,8}.*?reddit.com/([user]{1,4})/([^/]+)", 0, RegexReturn.ListByMatch, EDP.ReturnValue) + UserRegex = RParams.DM("[htps:/]{7,8}.*?reddit.com/([user]{1,4})/([^/\?&]+)", 0, RegexReturn.ListByMatch, EDP.ReturnValue) End Sub #End Region #Region "GetInstance" diff --git a/SCrawler/API/Redgifs/SiteSettings.vb b/SCrawler/API/Redgifs/SiteSettings.vb index 08ea703..5f4fe7f 100644 --- a/SCrawler/API/Redgifs/SiteSettings.vb +++ b/SCrawler/API/Redgifs/SiteSettings.vb @@ -49,7 +49,7 @@ Namespace API.RedGifs TokenUpdateIntervalProvider = New TokenRefreshIntervalProvider _AllowUserAgentUpdate = False UrlPatternUser = "https://www.redgifs.com/users/{0}/" - UserRegex = RParams.DMS("[htps:/]{7,8}.*?redgifs.com/users/([^/]+)", 1) + UserRegex = RParams.DMS(String.Format(UserRegexDefaultPattern, "redgifs.com/users/"), 1) ImageVideoContains = "redgifs" End Sub #End Region diff --git a/SCrawler/API/ThreadsNet/SiteSettings.vb b/SCrawler/API/ThreadsNet/SiteSettings.vb index b46d974..8393147 100644 --- a/SCrawler/API/ThreadsNet/SiteSettings.vb +++ b/SCrawler/API/ThreadsNet/SiteSettings.vb @@ -126,7 +126,7 @@ Namespace API.ThreadsNet HH_USER_AGENT = New PropertyValue(useragent, GetType(String), Sub(v) ChangeResponserFields(NameOf(HH_USER_AGENT), v)) UrlPatternUser = "https://www.threads.net/@{0}" - UserRegex = RParams.DMS("threads.net/@([^/\?&]+)", 1) + UserRegex = RParams.DMS(String.Format(UserRegexDefaultPattern, "threads.net/@"), 1) ImageVideoContains = "threads.net" End Sub #End Region diff --git a/SCrawler/API/TikTok/SiteSettings.vb b/SCrawler/API/TikTok/SiteSettings.vb index 260dc62..aab1139 100644 --- a/SCrawler/API/TikTok/SiteSettings.vb +++ b/SCrawler/API/TikTok/SiteSettings.vb @@ -40,7 +40,7 @@ Namespace API.TikTok UseParsedVideoDate = New PropertyValue(True) UseNetscapeCookies = True UrlPatternUser = "https://www.tiktok.com/@{0}/" - UserRegex = RParams.DMS("[htps:/]{7,8}.*?tiktok.com/@([^/]+)", 1) + UserRegex = RParams.DMS(String.Format(UserRegexDefaultPattern, "tiktok.com/@"), 1) ImageVideoContains = "tiktok.com" End Sub Friend Overrides Function GetInstance(ByVal What As ISiteSettings.Download) As IPluginContentProvider diff --git a/SCrawler/API/TikTok/UserData.vb b/SCrawler/API/TikTok/UserData.vb index 4d2bd0c..b275119 100644 --- a/SCrawler/API/TikTok/UserData.vb +++ b/SCrawler/API/TikTok/UserData.vb @@ -229,6 +229,7 @@ Namespace API.TikTok Exit Sub End If title = j.Value("title").StringRemoveWinForbiddenSymbols + If Not title.IsEmptyString Then title = Left(title, 150) If title.IsEmptyString Or Not TitleUseNative Then title = postID Else @@ -320,7 +321,7 @@ Namespace API.TikTok b.Encoding = BatchExecutor.UnicodeEncoding b.Execute(CreateYTCommand(DestinationFile, URL, True)) End Using - Return DestinationFile + If DestinationFile.Exists Then Return DestinationFile Else Return Nothing End Function #End Region #Region "DownloadSingleObject" diff --git a/SCrawler/API/Twitter/SiteSettings.vb b/SCrawler/API/Twitter/SiteSettings.vb index aca626e..c559af8 100644 --- a/SCrawler/API/Twitter/SiteSettings.vb +++ b/SCrawler/API/Twitter/SiteSettings.vb @@ -96,7 +96,7 @@ Namespace API.Twitter ConcurrentDownloads = New PropertyValue(1) MyConcurrentDownloadsProvider = New ConcurrentDownloadsProvider - UserRegex = RParams.DMS("[htps:/]{7,8}.*?twitter.com/([^/]+)", 1) + UserRegex = RParams.DMS(String.Format(UserRegexDefaultPattern, "/(twitter|x).com/"), 2) UrlPatternUser = "https://twitter.com/{0}" ImageVideoContains = "twitter" CheckNetscapeCookiesOnEndInit = True diff --git a/SCrawler/MainFrameObjects.vb b/SCrawler/MainFrameObjects.vb index 6a7a0c0..39e19ac 100644 --- a/SCrawler/MainFrameObjects.vb +++ b/SCrawler/MainFrameObjects.vb @@ -88,7 +88,7 @@ Friend Class MainFrameObjects : Implements INotificator If Settings.ProcessNotification(Sender) Then Dim b As List(Of IButton) = Nothing If Sender = NotifyObj.Profiles Or Sender = NotifyObj.AutoDownloader Or Sender = NotifyObj.SavedPosts Then _ - b = New List(Of IButton) From {New ToastButton("DEF_BTT_FEED", "Feed"), New ToastButton(DEF_BTT_DISABLE, "Disable")} + b = New List(Of IButton) From {New ToastButton(DEF_BTT_FEED, "Feed"), New ToastButton(DEF_BTT_DISABLE, "Disable")} Using n As New Notification(Message) With {.Key = $"{NotificationInternalKey}_{Sender}", .Buttons = b} : n.Show() : End Using End If End Sub diff --git a/SCrawler/MainMod.vb b/SCrawler/MainMod.vb index e4835ba..101c9ef 100644 --- a/SCrawler/MainMod.vb +++ b/SCrawler/MainMod.vb @@ -16,6 +16,7 @@ Imports SCrawler.DownloadObjects Friend Module MainMod Friend Settings As SettingsCLS Friend Const SettingsFolderName As String = XML.XmlFile.SettingsFolder + Friend Const UserRegexDefaultPattern As String = "{0}([^/\?&]+)" Friend ReadOnly LinkPattern As RParams = RParams.DMS("[htps:]{0,6}[/]{0,2}(.+)", 1) Friend ReadOnly FilesPattern As RParams = RParams.DM("[^\./]+?\.\w+", 1, EDP.ReturnValue) Friend Delegate Sub NotificationEventHandler(ByVal Sender As SettingsCLS.NotificationObjects, ByVal Message As String) diff --git a/SCrawler/PluginsEnvironment/Hosts/PluginHost.vb b/SCrawler/PluginsEnvironment/Hosts/PluginHost.vb index 4acf622..011a8b1 100644 --- a/SCrawler/PluginsEnvironment/Hosts/PluginHost.vb +++ b/SCrawler/PluginsEnvironment/Hosts/PluginHost.vb @@ -25,6 +25,16 @@ Namespace Plugin.Hosts Return Settings.Key End Get End Property + Friend ReadOnly Property Replacer As ReplaceInternalPluginAttribute + Get + Return Settings.Default.Replacer + End Get + End Property + Friend ReadOnly Property IsReplacer As Boolean + Get + Return Settings.Default.IsReplacer + End Get + End Property Friend ReadOnly Property Exists As Boolean Get Return Not Settings Is Nothing diff --git a/SCrawler/PluginsEnvironment/Hosts/SettingsHost.vb b/SCrawler/PluginsEnvironment/Hosts/SettingsHost.vb index d0a99f5..8615eaa 100644 --- a/SCrawler/PluginsEnvironment/Hosts/SettingsHost.vb +++ b/SCrawler/PluginsEnvironment/Hosts/SettingsHost.vb @@ -107,6 +107,12 @@ Namespace Plugin.Hosts #End Region #Region "Host declarations" Friend ReadOnly Property Source As ISiteSettings + Friend Property Replacer As ReplaceInternalPluginAttribute = Nothing + Friend ReadOnly Property IsReplacer As Boolean + Get + Return Not Replacer Is Nothing AndAlso (Not Replacer.PluginKey.IsEmptyString Or Not Replacer.SiteName.IsEmptyString) + End Get + End Property Friend ReadOnly Property PropList As List(Of PropertyValueHost) Friend ReadOnly Property Name As String Get @@ -251,6 +257,8 @@ Namespace Plugin.Hosts HasSpecialOptions = True End If End With + ElseIf TypeOf a Is ReplaceInternalPluginAttribute Then + Replacer = a End If Next End If diff --git a/SCrawler/SettingsCLS.vb b/SCrawler/SettingsCLS.vb index 6018550..c1fe436 100644 --- a/SCrawler/SettingsCLS.vb +++ b/SCrawler/SettingsCLS.vb @@ -210,7 +210,21 @@ Friend Class SettingsCLS : Implements IDownloaderSettings, IDisposable Plugins.AddRange(PluginHost.GetMyHosts(MyXML, GlobalPath.Value, DefaultTemporary, DefaultDownloadImages, DefaultDownloadVideos)) Dim tmpPluginList As IEnumerable(Of PluginHost) = PluginHost.GetPluginsHosts(MyXML, GlobalPath.Value, DefaultTemporary, DefaultDownloadImages, DefaultDownloadVideos) - If tmpPluginList.ListExists Then Plugins.AddRange(tmpPluginList) + If tmpPluginList.ListExists Then + Dim tplIndx% = -1 + For Each tpl As PluginHost In tmpPluginList + If tpl.IsReplacer Then + tplIndx = Plugins.FindIndex(Function(pl) pl.Key.StringToLower = tpl.Replacer.PluginKey.StringToLower Or + pl.Name.StringToLower = tpl.Replacer.SiteName.StringToLower) + If tplIndx >= 0 Then + Plugins(tplIndx).Settings.ListClearDispose + Plugins.RemoveAt(tplIndx) + If Plugins.Count = 0 Then Exit For + End If + End If + Next + Plugins.AddRange(tmpPluginList) + End If MainFrameUsersShowDefaults = New XMLValue(Of Boolean)("UsersShowDefaults", True, MyXML) MainFrameUsersShowSubscriptions = New XMLValue(Of Boolean)("UsersShowSubscriptions", True, MyXML)