diff --git a/SCrawler/API/Base/UserDataBase.vb b/SCrawler/API/Base/UserDataBase.vb index a775605..445ae86 100644 --- a/SCrawler/API/Base/UserDataBase.vb +++ b/SCrawler/API/Base/UserDataBase.vb @@ -1829,6 +1829,7 @@ BlockNullPicture: If m.Contains(IUserData.EraseMode.History) Then If MyFilePosts.Delete(SFO.File, SFODelete.DeleteToRecycleBin, e) Then result = True If MyFileData.Delete(SFO.File, SFODelete.DeleteToRecycleBin, e) Then result = True + EraseData_AdditionalDataFiles() End If If m.Contains(IUserData.EraseMode.Data) Then Dim files As List(Of SFile) = SFile.GetFiles(DownloadContentDefault_GetRootDir.CSFileP,, SearchOption.AllDirectories, e) @@ -1850,6 +1851,8 @@ BlockNullPicture: Return ErrorsDescriber.Execute(EDP.SendToLog + EDP.ReturnValue, ex, $"EraseData({CInt(Mode)}): {ToStringForLog()}", False) End Try End Function + Protected Overridable Sub EraseData_AdditionalDataFiles() + End Sub Friend Overridable Function Delete(Optional ByVal Multiple As Boolean = False, Optional ByVal CollectionValue As Integer = -1) As Integer Implements IUserData.Delete Dim f As SFile = SFile.GetPath(MyFile.CutPath.Path) If f.Exists(SFO.Path, False) AndAlso (User.Merged OrElse f.Delete(SFO.Path, Settings.DeleteMode)) Then @@ -2026,8 +2029,8 @@ BlockNullPicture: End Function #End Region #Region "Errors functions" - Protected Sub LogError(ByVal ex As Exception, ByVal Message As String) - ErrorsDescriber.Execute(EDP.SendToLog, ex, $"{ToStringForLog()}: {Message}") + Protected Sub LogError(ByVal ex As Exception, ByVal Message As String, Optional ByVal e As ErrorsDescriber = Nothing) + ErrorsDescriber.Execute(If(e.Exists, e, New ErrorsDescriber(EDP.SendToLog)), ex, $"{ToStringForLog()}: {Message}") End Sub Protected Sub ErrorDownloading(ByVal f As SFile, ByVal URL As String) If Not f.Exists Then MyMainLOG = $"Error downloading from [{URL}] to [{f}]" @@ -2040,9 +2043,13 @@ BlockNullPicture: Private Overloads Sub ThrowAny() Implements IThrower.ThrowAny ThrowAny(TokenQueue) End Sub + ''' ThrowAnyImpl(Token) ''' ''' Friend Overridable Overloads Sub ThrowAny(ByVal Token As CancellationToken) + ThrowAnyImpl(Token) + End Sub + Protected Sub ThrowAnyImpl(ByVal Token As CancellationToken) Token.ThrowIfCancellationRequested() TokenQueue.ThrowIfCancellationRequested() TokenPersonal.ThrowIfCancellationRequested() diff --git a/SCrawler/API/Instagram/Declarations.vb b/SCrawler/API/Instagram/Declarations.vb index e8bed4c..ae1e125 100644 --- a/SCrawler/API/Instagram/Declarations.vb +++ b/SCrawler/API/Instagram/Declarations.vb @@ -17,7 +17,7 @@ Namespace API.Instagram Friend ReadOnly FilesPattern As RParams = RParams.DMS(".+?([^/\?]+?\.[\w\d]{3,4})(?=(\?|\Z))", 1, EDP.ReturnValue) Friend Sub UpdateResponser(ByVal Source As IResponse, ByRef Destination As Responser) Const r_wwwClaimName$ = "x-ig-set-www-claim" - Const r_tokenName$ = "csrftoken" + Const r_tokenName$ = SiteSettings.Header_CSRF_TOKEN_COOKIE If Not Source Is Nothing Then Dim isInternal As Boolean = TypeOf Source Is WebDataResponse Dim wwwClaimName$, tokenName$ diff --git a/SCrawler/API/Instagram/SiteSettings.vb b/SCrawler/API/Instagram/SiteSettings.vb index d73a00a..888d8ab 100644 --- a/SCrawler/API/Instagram/SiteSettings.vb +++ b/SCrawler/API/Instagram/SiteSettings.vb @@ -70,13 +70,14 @@ Namespace API.Instagram End Class #End Region #Region "Authorization properties" - Private Const Header_IG_APP_ID As String = "x-ig-app-id" + Friend Const Header_IG_APP_ID As String = "x-ig-app-id" Friend Const Header_IG_WWW_CLAIM As String = "x-ig-www-claim" Friend Const Header_CSRF_TOKEN As String = "x-csrftoken" - Private Const Header_ASBD_ID As String = "X-Asbd-Id" - Private Const Header_Browser As String = "Sec-Ch-Ua" - Private Const Header_BrowserExt As String = "Sec-Ch-Ua-Full-Version-List" - Private Const Header_Platform As String = "Sec-Ch-Ua-Platform-Version" + Friend Const Header_CSRF_TOKEN_COOKIE As String = "csrftoken" + Friend Const Header_ASBD_ID As String = "X-Asbd-Id" + Friend Const Header_Browser As String = "Sec-Ch-Ua" + Friend Const Header_BrowserExt As String = "Sec-Ch-Ua-Full-Version-List" + Friend Const Header_Platform As String = "Sec-Ch-Ua-Platform-Version" Friend ReadOnly Property HashTagged As PropertyValue @@ -365,13 +366,16 @@ Namespace API.Instagram SkipUntilNextSession = False End Sub #End Region -#Region "UserOptions, GetUserPostUrl" +#Region "UserOptions, GetUserUrl, GetUserPostUrl" Friend Overrides Sub UserOptions(ByRef Options As Object, ByVal OpenForm As Boolean) If Options Is Nothing OrElse Not TypeOf Options Is EditorExchangeOptions Then Options = New EditorExchangeOptions(Me) If OpenForm Then Using f As New InternalSettingsForm(Options, Me, False) : f.ShowDialog() : End Using End If End Sub + Friend Overrides Function GetUserUrl(ByVal User As IPluginContentProvider) As String + Return String.Format(UrlPatternUser, DirectCast(User, UserData).NameTrue) + End Function Friend Overrides Function GetUserPostUrl(ByVal User As UserDataBase, ByVal Media As UserMedia) As String Try Dim code$ = DirectCast(User, UserData).GetPostCodeById(Media.Post.ID) diff --git a/SCrawler/API/Instagram/UserData.vb b/SCrawler/API/Instagram/UserData.vb index 47026d0..e46f08a 100644 --- a/SCrawler/API/Instagram/UserData.vb +++ b/SCrawler/API/Instagram/UserData.vb @@ -30,7 +30,7 @@ Namespace API.Instagram Private Const Name_NameTrue As String = "NameTrue" #End Region #Region "Declarations" - Private Structure PostKV : Implements IEContainerProvider + Protected Structure PostKV : Implements IEContainerProvider Private Const Name_Code As String = "Code" Private Const Name_Section As String = "Section" Friend Code As String @@ -78,8 +78,8 @@ Namespace API.Instagram Friend Property GetStories As Boolean Friend Property GetStoriesUser As Boolean Friend Property GetTaggedData As Boolean - Private _NameTrue As String = String.Empty - Private ReadOnly Property NameTrue As String + Protected _NameTrue As String = String.Empty + Friend ReadOnly Property NameTrue As String Get Return _NameTrue.IfNullOrEmpty(Name) End Get @@ -143,12 +143,22 @@ Namespace API.Instagram Throw New ExitException End Sub End Class - Private Sub LoadSavePostsKV(ByVal Load As Boolean) + Private ReadOnly Property MyFilePostsKV As SFile + Get + Dim f As SFile = MyFilePosts + If Not f.IsEmptyString Then + f.Name &= "_KV" + f.Extension = "xml" + Return f + Else + Return Nothing + End If + End Get + End Property + Protected Sub LoadSavePostsKV(ByVal Load As Boolean) Dim x As XmlFile - Dim f As SFile = MyFilePosts + Dim f As SFile = MyFilePostsKV If Not f.IsEmptyString Then - f.Name &= "_KV" - f.Extension = "xml" If Load Then PostsKVIDs.Clear() x = New XmlFile(f, Protector.Modes.All, False) With {.AllowSameNames = True, .XmlReadOnly = True} @@ -182,10 +192,8 @@ Namespace API.Instagram Friend Function GetPostCodeById(ByVal PostID As String) As String Try If Not PostID.IsEmptyString Then - Dim f As SFile = MyFilePosts + Dim f As SFile = MyFilePostsKV If Not f.IsEmptyString Then - f.Name &= "_KV" - f.Extension = "xml" Dim l As List(Of PostKV) = Nothing Using x As New XmlFile(f, Protector.Modes.All, False) With {.AllowSameNames = True, .XmlReadOnly = True} x.LoadData() @@ -213,11 +221,15 @@ Namespace API.Instagram End If End Function Private _DownloadingInProgress As Boolean = False + Private _Limit As Integer = -1 + Private _TotalPostsParsed As Integer = 0 Protected Overrides Sub DownloadDataF(ByVal Token As CancellationToken) UserNameRequested = False Dim s As Sections = Sections.Timeline Dim errorFound As Boolean = False Try + _Limit = If(DownloadTopCount, -1) + _TotalPostsParsed = 0 LoadSavePostsKV(True) _DownloadingInProgress = True AddHandler Responser.ResponseReceived, AddressOf Responser_ResponseReceived @@ -270,7 +282,7 @@ Namespace API.Instagram Catch ex As Exception End Try End Sub - Private Sub UpdateResponser() + Protected Overridable Sub UpdateResponser() Try If _DownloadingInProgress AndAlso Not Responser Is Nothing AndAlso Not Responser.Disposed Then _DownloadingInProgress = False @@ -280,10 +292,10 @@ Namespace API.Instagram Catch End Try End Sub - Private Sub Responser_ResponseReceived(ByVal Sender As Object, ByVal e As EventArguments.WebDataResponse) + Protected Overridable Sub Responser_ResponseReceived(ByVal Sender As Object, ByVal e As EventArguments.WebDataResponse) Declarations.UpdateResponser(e, Responser) End Sub - Private Enum Sections : Timeline : Tagged : Stories : UserStories : SavedPosts : End Enum + Protected Enum Sections : Timeline : Tagged : Stories : UserStories : SavedPosts : End Enum Private Const StoriesFolder As String = "Stories" Private Const TaggedFolder As String = "Tagged" #Region "429 bypass" @@ -572,6 +584,7 @@ Namespace API.Instagram Dim URL$ = String.Empty Dim dValue% = 1 Dim _Index% = 0 + Dim before% If PostsToReparse.Count > 0 Then ProgressPre.ChangeMax(PostsToReparse.Count) Try Do While dValue = 1 @@ -600,7 +613,12 @@ Namespace API.Instagram If Not j Is Nothing Then If If(j("items")?.Count, 0) > 0 Then With j("items") - For Each jj In .Self : ObtainMedia(jj, PostsToReparse(i).ID) : Next + For Each jj In .Self + before = _TempMediaList.Count + ObtainMedia(jj, PostsToReparse(i).ID) + If Not before = _TempMediaList.Count Then _TotalPostsParsed += 1 + If _Limit > 0 And _TotalPostsParsed >= _Limit Then Throw New ExitException + Next End With End If j.Dispose() @@ -643,13 +661,17 @@ Namespace API.Instagram End Using End If End Sub - Private Function DefaultParser(ByVal Items As IEnumerable(Of EContainer), ByVal Section As Sections, ByVal Token As CancellationToken, - Optional ByVal SpecFolder As String = Nothing) As Boolean + Protected DefaultParser_ElemNode() As Object = Nothing + Protected DefaultParser_IgnorePass As Boolean = False + Protected DefaultParser_PostUrlCreator As Func(Of PostKV, String) = Function(post) $"https://www.instagram.com/p/{post.Code}/" + Protected Function DefaultParser(ByVal Items As IEnumerable(Of EContainer), ByVal Section As Sections, ByVal Token As CancellationToken, + Optional ByVal SpecFolder As String = Nothing) As Boolean ThrowAny(Token) If Items.Count > 0 Then Dim PostIDKV As PostKV Dim Pinned As Boolean - Dim PostDate$ + Dim PostDate$, PostOriginUrl$ + Dim before% If SpecFolder.IsEmptyString Then Select Case Section Case Sections.Tagged : SpecFolder = TaggedFolder @@ -660,22 +682,26 @@ Namespace API.Instagram ProgressPre.ChangeMax(Items.Count) For Each nn In Items ProgressPre.Perform() - With nn + With If(Not DefaultParser_ElemNode Is Nothing, nn.ItemF(DefaultParser_ElemNode), nn) PostIDKV = New PostKV(.Value("code"), .Value("id"), Section) + PostOriginUrl = DefaultParser_PostUrlCreator(PostIDKV) Pinned = .Contains("timeline_pinned_user_ids") - If PostKvExists(PostIDKV) Then + If Not DefaultParser_IgnorePass AndAlso PostKvExists(PostIDKV) Then If Not Pinned Then Return False Else _TempPostsList.Add(PostIDKV.ID) PostsKVIDs.ListAddValue(PostIDKV, LNC) PostDate = .Value("taken_at") - If Not IsSavedPosts Then + If Not DefaultParser_IgnorePass And Not IsSavedPosts Then Select Case CheckDatesLimit(PostDate, UnixDate32Provider) Case DateResult.Skip : Continue For Case DateResult.Exit : If Not Pinned Then Return False End Select End If - ObtainMedia(.Self, PostIDKV.ID, SpecFolder, PostDate) + before = _TempMediaList.Count + ObtainMedia(.Self, PostIDKV.ID, SpecFolder, PostDate,, PostOriginUrl) + If Not before = _TempMediaList.Count Then _TotalPostsParsed += 1 + If _Limit > 0 And _TotalPostsParsed >= _Limit Then Return False End If End With Next @@ -686,7 +712,7 @@ Namespace API.Instagram End Function #End Region #Region "Code ID converters" - Private Function CodeToID(ByVal Code As String) As String + Protected Function CodeToID(ByVal Code As String) As String Const CodeSymbols$ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" Try If Not Code.IsEmptyString Then @@ -706,12 +732,19 @@ Namespace API.Instagram End Function #End Region #Region "Obtain Media" - Private Sub ObtainMedia(ByVal n As EContainer, ByVal PostID As String, Optional ByVal SpecialFolder As String = Nothing, - Optional ByVal DateObj As String = Nothing) + Protected ObtainMedia_SizeFuncVid As Func(Of EContainer, Sizes) = Nothing + Protected ObtainMedia_SizeFuncPic As Func(Of EContainer, Sizes) = Nothing + Protected ObtainMedia_AllowAbstract As Boolean = False + Protected Sub ObtainMedia(ByVal n As EContainer, ByVal PostID As String, Optional ByVal SpecialFolder As String = Nothing, + Optional ByVal DateObj As String = Nothing, Optional ByVal InitialType As Integer = -1, + Optional ByVal PostOriginUrl As String = Nothing) Try + Dim wrongData As Predicate(Of Sizes) = Function(_ss) _ss.HasError Or _ss.Data.IsEmptyString Dim img As Predicate(Of EContainer) = Function(_img) Not _img.Name.IsEmptyString AndAlso _img.Name.StartsWith("image_versions") AndAlso _img.Count > 0 Dim vid As Predicate(Of EContainer) = Function(_vid) Not _vid.Name.IsEmptyString AndAlso _vid.Name.StartsWith("video_versions") AndAlso _vid.Count > 0 Dim ss As Func(Of EContainer, Sizes) = Function(_ss) New Sizes(_ss.Value("width"), _ss.Value("url")) + Dim ssVid As Func(Of EContainer, Sizes) = ss + Dim ssPic As Func(Of EContainer, Sizes) = ss Dim mDate As Func(Of EContainer, String) = Function(ByVal elem As EContainer) As String If Not DateObj.IsEmptyString Then Return DateObj If elem.Contains("taken_at") Then @@ -731,28 +764,41 @@ Namespace API.Instagram End If End If End Function + If Not ObtainMedia_SizeFuncVid Is Nothing Then ssVid = ObtainMedia_SizeFuncVid + If Not ObtainMedia_SizeFuncPic Is Nothing Then ssPic = ObtainMedia_SizeFuncPic If n.Count > 0 Then Dim l As New List(Of Sizes) Dim d As EContainer Dim t% + Dim abstractDecision As Boolean = False '8 - gallery '2 - one video '1 - one picture t = n.Value("media_type").FromXML(Of Integer)(-1) + If t = -1 And InitialType = 8 And ObtainMedia_AllowAbstract Then + If n.Contains(vid) Then + t = 2 + abstractDecision = True + ElseIf n.Contains(img) Then + t = 1 + abstractDecision = True + End If + End If If t >= 0 Then Select Case t Case 1 If n.Contains(img) Then - t = n.Value("media_type").FromXML(Of Integer)(-1) + If Not abstractDecision Then t = n.Value("media_type").FromXML(Of Integer)(-1) DateObj = mDate(n) If t >= 0 Then With n.ItemF({img, "candidates"}).XmlIfNothing If .Count > 0 Then l.Clear() - l.ListAddList(.Select(ss), LNC) + l.ListAddList(.Select(ssPic), LNC) + If l.Count > 0 Then l.RemoveAll(wrongData) If l.Count > 0 Then l.Sort() - _TempMediaList.ListAddValue(MediaFromData(UTypes.Picture, l.First.Data, PostID, DateObj, SpecialFolder), LNC) + _TempMediaList.ListAddValue(MediaFromData(UTypes.Picture, l.First.Data, PostID, DateObj, SpecialFolder, PostOriginUrl), LNC) l.Clear() End If End If @@ -765,10 +811,11 @@ Namespace API.Instagram With n.ItemF({vid}).XmlIfNothing If .Count > 0 Then l.Clear() - l.ListAddList(.Select(ss), LNC) + l.ListAddList(.Select(ssVid), LNC) + If l.Count > 0 Then l.RemoveAll(wrongData) If l.Count > 0 Then l.Sort() - _TempMediaList.ListAddValue(MediaFromData(UTypes.Video, l.First.Data, PostID, DateObj, SpecialFolder), LNC) + _TempMediaList.ListAddValue(MediaFromData(UTypes.Video, l.First.Data, PostID, DateObj, SpecialFolder, PostOriginUrl), LNC) l.Clear() End If End If @@ -778,7 +825,7 @@ Namespace API.Instagram DateObj = mDate(n) With n("carousel_media").XmlIfNothing If .Count > 0 Then - For Each d In .Self : ObtainMedia(d, PostID, SpecialFolder, DateObj) : Next + For Each d In .Self : ObtainMedia(d, PostID, SpecialFolder, DateObj, 8, PostOriginUrl) : Next End If End With End Select @@ -939,6 +986,12 @@ Namespace API.Instagram DownloadContentDefault(Token) End Sub #End Region +#Region "Erase" + Protected Overrides Sub EraseData_AdditionalDataFiles() + Dim f As SFile = MyFilePostsKV + If f.Exists Then f.Delete(SFO.File, SFODelete.DeleteToRecycleBin, EDP.ReturnValue) + End Sub +#End Region #Region "Exceptions" ''' ''' @@ -996,9 +1049,9 @@ Namespace API.Instagram #End Region #Region "Create media" Private Function MediaFromData(ByVal t As UTypes, ByVal _URL As String, ByVal PostID As String, ByVal PostDate As String, - Optional ByVal SpecialFolder As String = Nothing) As UserMedia + Optional ByVal SpecialFolder As String = Nothing, Optional ByVal PostOriginUrl As String = Nothing) As UserMedia _URL = LinkFormatterSecure(RegexReplace(_URL.Replace("\", String.Empty), LinkPattern)) - Dim m As New UserMedia(_URL, t) With {.Post = New UserPost With {.ID = PostID}} + Dim m As New UserMedia(_URL, t) With {.URL_BASE = PostOriginUrl.IfNullOrEmpty(_URL), .Post = New UserPost With {.ID = PostID}} If Not m.URL.IsEmptyString Then m.File = CStr(RegexReplace(m.URL, FilesPattern)) If Not PostDate.IsEmptyString Then m.Post.Date = AConvert(Of Date)(PostDate, UnixDate32Provider, Nothing) Else m.Post.Date = Nothing m.SpecialFolder = SpecialFolder diff --git a/SCrawler/API/ThreadsNet/SiteSettings.vb b/SCrawler/API/ThreadsNet/SiteSettings.vb new file mode 100644 index 0000000..cd04063 --- /dev/null +++ b/SCrawler/API/ThreadsNet/SiteSettings.vb @@ -0,0 +1,166 @@ +' Copyright (C) 2023 Andy https://github.com/AAndyProgram +' This program is free software: you can redistribute it and/or modify +' it under the terms of the GNU General Public License as published by +' the Free Software Foundation, either version 3 of the License, or +' (at your option) any later version. +' +' This program is distributed in the hope that it will be useful, +' but WITHOUT ANY WARRANTY +Imports SCrawler.API.Base +Imports SCrawler.Plugin +Imports SCrawler.Plugin.Attributes +Imports PersonalUtilities.Tools.Web.Clients +Imports PersonalUtilities.Tools.Web.Cookies +Imports PersonalUtilities.Functions.RegularExpressions +Imports IG = SCrawler.API.Instagram.SiteSettings +Namespace API.ThreadsNet + + Friend Class SiteSettings : Inherits SiteSettingsBase +#Region "Declarations" + Friend Overrides ReadOnly Property Icon As Icon + Get + Return My.Resources.SiteResources.ThreadsIcon_192 + End Get + End Property + Private ReadOnly _Image As Image + Friend Overrides ReadOnly Property Image As Image + Get + Return _Image + End Get + End Property +#Region "Authorization" + + Friend ReadOnly Property HH_CSRF_TOKEN As PropertyValue + + Friend Property HH_IG_APP_ID As PropertyValue + + Friend Property HH_ASBD_ID As PropertyValue + + Private Property HH_BROWSER As PropertyValue + + Private Property HH_BROWSER_EXT As PropertyValue + + Private Property HH_PLATFORM As PropertyValue + + Private ReadOnly Property HH_USER_AGENT As PropertyValue + Private Sub ChangeResponserFields(ByVal PropName As String, ByVal Value As Object) + If Not PropName.IsEmptyString Then + Dim f$ = String.Empty + Dim isUserAgent As Boolean = False + Select Case PropName + Case NameOf(HH_IG_APP_ID) : f = IG.Header_IG_APP_ID + Case NameOf(HH_ASBD_ID) : f = IG.Header_ASBD_ID + Case NameOf(HH_CSRF_TOKEN) : f = IG.Header_CSRF_TOKEN + Case NameOf(HH_BROWSER) : f = IG.Header_Browser + Case NameOf(HH_BROWSER_EXT) : f = IG.Header_BrowserExt + Case NameOf(HH_PLATFORM) : f = IG.Header_Platform + Case NameOf(HH_USER_AGENT) : isUserAgent = True + End Select + If Not f.IsEmptyString Then + Responser.Headers.Remove(f) + If Not CStr(Value).IsEmptyString Then Responser.Headers.Add(f, CStr(Value)) + ElseIf isUserAgent Then + Responser.UserAgent = CStr(Value) + End If + End If + End Sub +#End Region +#End Region +#Region "Initializer" + Friend Sub New() + MyBase.New("Threads", "threads.net") + _AllowUserAgentUpdate = False + _Image = My.Resources.SiteResources.ThreadsIcon_192.ToBitmap + + Dim app_id$ = String.Empty + Dim token$ = String.Empty + Dim asbd$ = String.Empty + Dim browser$ = String.Empty + Dim browserExt$ = String.Empty + Dim platform$ = String.Empty + Dim useragent$ = String.Empty + + With Responser + .Accept = "*/*" + 'URGENT: remove after debug + .DeclaredError = EDP.SendToLog + EDP.ThrowException + If .UserAgentExists Then useragent = .UserAgent + With .Headers + If .Count > 0 Then + token = .Value(IG.Header_CSRF_TOKEN) + app_id = .Value(IG.Header_IG_APP_ID) + asbd = .Value(IG.Header_ASBD_ID) + browser = .Value(IG.Header_Browser) + browserExt = .Value(IG.Header_BrowserExt) + platform = .Value(IG.Header_Platform) + End If + .Add("Authority", "www.threads.net") + .Add("Origin", "https://www.threads.net") + .Add("Upgrade-Insecure-Requests", 1) + .Add("Sec-Ch-Ua-Model", "") + .Add("Sec-Ch-Ua-Mobile", "?0") + .Add("Sec-Ch-Ua-Platform", """Windows""") + .Add("Sec-Fetch-Dest", "empty") + .Add("Sec-Fetch-Mode", "cors") + .Add("Sec-Fetch-Site", "same-origin") + .Add("Sec-Fetch-User", "?1") + .Add("x-fb-friendly-name", "BarcelonaProfileThreadsTabRefetchableQuery") + End With + .CookiesExtractMode = Responser.CookiesExtractModes.Any + .CookiesUpdateMode = CookieKeeper.UpdateModes.ReplaceByNameAll + .CookiesExtractedAutoSave = False + .Cookies.ChangedAllowInternalDrop = False + .Cookies.Changed = False + End With + + HH_CSRF_TOKEN = New PropertyValue(token, GetType(String), Sub(v) ChangeResponserFields(NameOf(HH_CSRF_TOKEN), v)) + HH_IG_APP_ID = New PropertyValue(app_id, GetType(String), Sub(v) ChangeResponserFields(NameOf(HH_IG_APP_ID), v)) + HH_ASBD_ID = New PropertyValue(asbd, GetType(String), Sub(v) ChangeResponserFields(NameOf(HH_ASBD_ID), v)) + HH_BROWSER = New PropertyValue(browser, GetType(String), Sub(v) ChangeResponserFields(NameOf(HH_BROWSER), v)) + HH_BROWSER_EXT = New PropertyValue(browserExt, GetType(String), Sub(v) ChangeResponserFields(NameOf(HH_BROWSER_EXT), v)) + HH_PLATFORM = New PropertyValue(platform, GetType(String), Sub(v) ChangeResponserFields(NameOf(HH_PLATFORM), v)) + HH_USER_AGENT = New PropertyValue(useragent, GetType(String), Sub(v) ChangeResponserFields(NameOf(HH_USER_AGENT), v)) + + UrlPatternUser = "https://www.threads.net/@{0}" + UserRegex = RParams.DMS("threads.net/@([^/\?&]+)", 1) + ImageVideoContains = "threads.net" + End Sub +#End Region +#Region "UpdateResponserData" + Friend Sub UpdateResponserData(ByVal Resp As Responser) + With Responser.Cookies + Dim csrf$ = String.Empty + .Update(Resp.Cookies) + If .Changed Then + Responser.SaveCookies() + .Changed = False + csrf = If(.FirstOrDefault(Function(c) c.Name.StringToLower = IG.Header_CSRF_TOKEN_COOKIE)?.Value, String.Empty) + End If + If Not csrf.IsEmptyString AndAlso Not AEquals(Of String)(csrf, HH_CSRF_TOKEN.Value) Then HH_CSRF_TOKEN.Value = csrf + End With + End Sub +#End Region +#Region "GetInstance" + Friend Overrides Function GetInstance(ByVal What As ISiteSettings.Download) As IPluginContentProvider + Return New UserData + End Function +#End Region +#Region "BaseAuthExists, GetUserUrl, GetUserPostUrl" + Friend Overrides Function BaseAuthExists() As Boolean + Return Responser.CookiesExists And {HH_CSRF_TOKEN, HH_IG_APP_ID}.All(Function(v) ACheck(Of String)(v.Value)) + End Function + Friend Overrides Function GetUserUrl(ByVal User As IPluginContentProvider) As String + Return String.Format(UrlPatternUser, DirectCast(User, UserData).NameTrue) + End Function + Friend Overrides Function GetUserPostUrl(ByVal User As UserDataBase, ByVal Media As UserMedia) As String + Try + Dim code$ = DirectCast(User, UserData).GetPostCodeById(Media.Post.ID) + Dim name$ = DirectCast(User, UserData).NameTrue + If Not code.IsEmptyString Then Return $"https://www.threads.net/@{name}/post/{code}/" Else Return String.Empty + Catch ex As Exception + Return ErrorsDescriber.Execute(EDP.SendToLog, ex, "Can't open user's post", String.Empty) + End Try + End Function +#End Region + End Class +End Namespace \ No newline at end of file diff --git a/SCrawler/API/ThreadsNet/UserData.vb b/SCrawler/API/ThreadsNet/UserData.vb new file mode 100644 index 0000000..8562aa6 --- /dev/null +++ b/SCrawler/API/ThreadsNet/UserData.vb @@ -0,0 +1,290 @@ +' Copyright (C) 2023 Andy https://github.com/AAndyProgram +' This program is free software: you can redistribute it and/or modify +' it under the terms of the GNU General Public License as published by +' the Free Software Foundation, either version 3 of the License, or +' (at your option) any later version. +' +' This program is distributed in the hope that it will be useful, +' but WITHOUT ANY WARRANTY +Imports System.Threading +Imports SCrawler.API.Base +Imports SCrawler.API.YouTube.Objects +Imports PersonalUtilities.Functions.XML +Imports PersonalUtilities.Functions.RegularExpressions +Imports PersonalUtilities.Tools.Web.Documents.JSON +Imports PersonalUtilities.Tools.Web.Clients +Imports PersonalUtilities.Tools.Web.Clients.EventArguments +Imports IGS = SCrawler.API.Instagram.SiteSettings +Namespace API.ThreadsNet + Friend Class UserData : Inherits Instagram.UserData +#Region "Declarations" + Private Const Header_FB_LSD As String = "x-fb-lsd" + Private ReadOnly Property MySettings As SiteSettings + Get + Return HOST.Source + End Get + End Property + Private ReadOnly ObtainMedia_SizeFuncPic_RegexP As RParams = RParams.DMS("_p(\d+)x(\d+)", 1, EDP.ReturnValue) + Private ReadOnly ObtainMedia_SizeFuncPic_RegexS As RParams = RParams.DMS("_s(\d+)x(\d+)", 1, EDP.ReturnValue) + Private ReadOnly DefaultParser_ElemNode_Default() As Object = {"node", "thread_items", 0, "post"} + Private OPT_LSD As String = String.Empty + Private OPT_FB_DTSG As String = String.Empty + Private ReadOnly Property Valid As Boolean + Get + Return Not OPT_LSD.IsEmptyString And Not OPT_FB_DTSG.IsEmptyString And Not ID.IsEmptyString + End Get + End Property +#End Region +#Region "Loader" + Protected Overrides Sub LoadUserInformation_OptionalFields(ByRef Container As XmlFile, ByVal Loading As Boolean) + End Sub +#End Region +#Region "Exchange" + Friend Overrides Function ExchangeOptionsGet() As Object + Return Nothing + End Function + Friend Overrides Sub ExchangeOptionsSet(ByVal Obj As Object) + End Sub +#End Region +#Region "Initializer" + Friend Sub New() + ObtainMedia_SizeFuncPic = Function(ByVal ss As EContainer) As Sizes + If ss.Value("url").IsEmptyString Then + Return New Sizes("----", "") + ElseIf Not ss.Value("width").IsEmptyString Then + Return New Sizes(ss.Value("height").IfNullOrEmpty(ss.Value("width")), ss.Value("url")) + Else + Dim rval$ = RegexReplace(ss.Value("url"), ObtainMedia_SizeFuncPic_RegexP) + If Not rval.IsEmptyString Then Return New Sizes(rval, ss.Value("url")) + rval = RegexReplace(ss.Value("url"), ObtainMedia_SizeFuncPic_RegexS) + If Not rval.IsEmptyString Then Return New Sizes(AConvert(Of Integer)(rval, 1) * -1, ss.Value("url")) + Return New Sizes(10000, ss.Value("url")) + End If + End Function + ObtainMedia_SizeFuncVid = Function(ss) If(ss.Value("url").IsEmptyString, New Sizes("----", ""), New Sizes(10000, ss.Value("url"))) + ObtainMedia_AllowAbstract = True + DefaultParser_ElemNode = DefaultParser_ElemNode_Default + DefaultParser_PostUrlCreator = Function(post) $"https://www.threads.net/@{NameTrue}/post/{post.Code}" + End Sub +#End Region +#Region "Download functions" + Protected Overrides Sub DownloadDataF(ByVal Token As CancellationToken) + Dim errorFound As Boolean = False + Try + Responser.Method = "POST" + AddHandler Responser.ResponseReceived, AddressOf Responser_ResponseReceived + LoadSavePostsKV(True) + OPT_LSD = String.Empty + OPT_FB_DTSG = String.Empty + DownloadData(String.Empty, Token) + Catch ex As Exception + errorFound = True + Throw ex + Finally + Responser.Method = "POST" + UpdateResponser() + MySettings.UpdateResponserData(Responser) + If Not errorFound Then LoadSavePostsKV(False) + End Try + End Sub + Protected Overrides Sub UpdateResponser() + If Not Responser Is Nothing AndAlso Not Responser.Disposed Then + RemoveHandler Responser.ResponseReceived, AddressOf Responser_ResponseReceived + End If + End Sub + Protected Overrides Sub Responser_ResponseReceived(ByVal Sender As Object, ByVal e As WebDataResponse) + If e.CookiesExists Then + Dim csrf$ = If(e.Cookies.FirstOrDefault(Function(v) v.Name.StringToLower = IGS.Header_CSRF_TOKEN_COOKIE)?.Value, String.Empty) + If Not csrf.IsEmptyString AndAlso Not AEquals(Of String)(csrf, Responser.Headers.Value(IGS.Header_CSRF_TOKEN)) Then _ + Responser.Headers.Add(IGS.Header_CSRF_TOKEN, csrf) + End If + End Sub + Private Overloads Sub DownloadData(ByVal Cursor As String, ByVal Token As CancellationToken) + Const urlPattern$ = "https://www.threads.net/api/graphql?lsd={0}&variables={1}&doc_id=6371597506283707&fb_api_req_friendly_name=BarcelonaProfileThreadsTabRefetchableQuery&server_timestamps=true&fb_dtsg={2}" + Const var_init$ = """userID"":""{0}""" + Const var_cursor$ = """after"":""{1}"",""before"":null,""first"":25,""last"":null,""userID"":""{0}"",""__relay_internal__pv__BarcelonaIsLoggedInrelayprovider"":true,""__relay_internal__pv__BarcelonaIsFeedbackHubEnabledrelayprovider"":false" + Dim URL$ = String.Empty + Try + If Not Valid Then + Dim idIsNull As Boolean = ID.IsEmptyString + UpdateCredentials() + If idIsNull And Not ID.IsEmptyString Then _ForceSaveUserInfo = True + End If + If Not Valid Then Throw New Plugin.ExitException("Some credentials are missing") + + Responser.Method = "POST" + Responser.Referer = $"https://www.threads.net/@{NameTrue}" + Responser.Headers.Add(Header_FB_LSD, OPT_LSD) + + Dim nextCursor$ = String.Empty + Dim dataFound As Boolean = False + + Dim vars$ + If Cursor.IsEmptyString Then + vars = String.Format(var_init, ID) + Else + vars = String.Format(var_cursor, ID, Cursor) + End If + vars = SymbolsConverter.ASCII.EncodeSymbolsOnly("{" & vars & "}") + + URL = String.Format(urlPattern, OPT_LSD, vars, SymbolsConverter.ASCII.EncodeSymbolsOnly(OPT_FB_DTSG)) + + Using j As EContainer = GetDocument(URL, Token) + If j.ListExists Then + With j({"data", "mediaData"}) + If .ListExists Then + nextCursor = .Value({"page_info"}, "end_cursor") + With .Item({"edges"}) + If .ListExists Then dataFound = DefaultParser(.Self, Sections.Timeline, Token) + End With + End If + End With + End If + End Using + + If dataFound And Not nextCursor.IsEmptyString Then DownloadData(nextCursor, Token) + Catch ex As Exception + ProcessException(ex, Token, $"data downloading error [{URL}]") + End Try + End Sub + Private Function GetDocument(ByVal URL As String, ByVal Token As CancellationToken, Optional ByVal Round As Integer = 0) As EContainer + Try + ThrowAny(Token) + If Round > 0 AndAlso Not UpdateCredentials() Then Throw New Exception("Failed to update credentials") + ThrowAny(Token) + Dim r$ = Responser.GetResponse(URL) + If Not r.IsEmptyString Then Return JsonDocument.Parse(r) Else Throw New Exception("Failed to get a response") + Catch ex As Exception + If Round = 0 Then + Return GetDocument(URL, Token, Round + 1) + Else + Throw ex + End If + End Try + End Function + Private Function UpdateCredentials(Optional ByVal e As ErrorsDescriber = Nothing) As Boolean + Dim URL$ = $"https://www.threads.net/@{NameTrue}" + OPT_LSD = String.Empty + OPT_FB_DTSG = String.Empty + Try + Responser.Method = "GET" + Responser.Referer = URL + Responser.Headers.Remove(Header_FB_LSD) + Dim r$ = Responser.GetResponse(URL,, EDP.SendToLog + EDP.ThrowException) + Dim rr As RParams + Dim tt$, ttVal$ + If Not r.IsEmptyString Then + rr = RParams.DM("\[\],{""token"":""(.*?)""},\d+\]", 0, RegexReturn.List, EDP.ReturnValue) + Dim tokens As List(Of String) = RegexReplace(r, rr) + If tokens.ListExists Then + With rr + .Match = Nothing + .MatchSub = 1 + .WhatGet = RegexReturn.Value + End With + For Each tt In tokens + If Not OPT_FB_DTSG.IsEmptyString And Not OPT_LSD.IsEmptyString Then + Exit For + Else + ttVal = RegexReplace(tt, rr) + If Not ttVal.IsEmptyString Then + If ttVal.Contains(":") Then + If OPT_FB_DTSG.IsEmptyString Then OPT_FB_DTSG = ttVal + Else + If OPT_LSD.IsEmptyString Then OPT_LSD = ttVal + End If + End If + End If + Next + End If + If ID.IsEmptyString Then ID = RegexReplace(r, RParams.DMS("""props"":\{""user_id"":""(\d+)""\},", 1, EDP.ReturnValue)) + End If + Return Valid + Catch ex As Exception + Dim notFound$ = String.Empty + If OPT_FB_DTSG.IsEmptyString Then notFound.StringAppend(Header_FB_LSD) + If OPT_LSD.IsEmptyString Then notFound.StringAppend("lsd") + If ID.IsEmptyString Then notFound.StringAppend("User ID") + LogError(ex, $"failed to update some{IIf(notFound.IsEmptyString, String.Empty, $" ({notFound})")} credentials", e) + Return False + End Try + End Function +#End Region +#Region "ReparseMissing" + Protected Overrides Sub ReparseMissing(ByVal Token As CancellationToken) + Const varsPattern$ = """postID"":""{0}"",""userID"":""{1}"",""__relay_internal__pv__BarcelonaIsLoggedInrelayprovider"":true,""__relay_internal__pv__BarcelonaIsFeedbackHubEnabledrelayprovider"":false" + 'Const varsPattern$ = "{""postID"":""{0}"",""__relay_internal__pv__BarcelonaIsLoggedInrelayprovider"":true,""__relay_internal__pv__BarcelonaIsFeedbackHubEnabledrelayprovider"":false}" + Const urlPattern$ = "https://www.threads.net/api/graphql?lsd={0}&variables={1}&fb_api_req_friendly_name=BarcelonaPostPageQuery&server_timestamps=true&fb_dtsg={2}&doc_id=25460088156920903" + Dim rList As New List(Of Integer) + Dim URL$ = String.Empty + DefaultParser_ElemNode = Nothing + DefaultParser_IgnorePass = True + Try + If ContentMissingExists Then + Responser.Method = "POST" + Responser.Referer = $"https://www.threads.net/@{NameTrue}" + If Not IsSingleObjectDownload AndAlso Not UpdateCredentials() Then Throw New Exception("Failed to update credentials") + Dim m As UserMedia + Dim vars$ + Dim j As EContainer + ProgressPre.ChangeMax(_ContentList.Count) + For i% = 0 To _ContentList.Count - 1 + ProgressPre.Perform() + m = _ContentList(i) + If m.State = UserMedia.States.Missing And Not m.Post.ID.IsEmptyString Then + ThrowAny(Token) + vars = SymbolsConverter.ASCII.EncodeSymbolsOnly("{" & String.Format(varsPattern, m.Post.ID.Split("_").FirstOrDefault, ID) & "}") + URL = String.Format(urlPattern, OPT_LSD, vars, SymbolsConverter.ASCII.EncodeSymbolsOnly(OPT_FB_DTSG)) + + j = GetDocument(URL, Token) + If j.ListExists Then + With j.ItemF({"data", "data", "edges", 0, "node", "thread_items", 0, "post"}) + If .ListExists AndAlso DefaultParser({ .Self}, Sections.Timeline, Token) Then rList.Add(i) + End With + j.Dispose() + End If + End If + Next + End If + Catch ex As Exception + ProcessException(ex, Token, $"ReparseMissing error [{URL}]") + Finally + DefaultParser_ElemNode = DefaultParser_ElemNode_Default + DefaultParser_IgnorePass = False + If rList.Count > 0 Then + For i% = rList.Count - 1 To 0 Step -1 : _ContentList.RemoveAt(rList(i)) : Next + rList.Clear() + End If + End Try + End Sub +#End Region +#Region "DownloadSingleObject" + Protected Overrides Sub DownloadSingleObject_GetPosts(ByVal Data As IYouTubeMediaContainer, ByVal Token As CancellationToken) + Dim url$ = Data.URL_BASE.IfNullOrEmpty(Data.URL) + Dim postCode$ = RegexReplace(url, RParams.DMS("post/([^/\?&]+)", 1, EDP.ReturnValue)) + If Not postCode.IsEmptyString Then + Dim postId$ = CodeToID(postCode) + If Not postId.IsEmptyString Then + _NameTrue = MySettings.IsMyUser(url).UserName + DefaultParser_PostUrlCreator = Function(post) url + If Not _NameTrue.IsEmptyString AndAlso UpdateCredentials(EDP.ReturnValue) Then + _ContentList.Add(New UserMedia(url) With {.State = UserMedia.States.Missing, .Post = postId}) + ReparseMissing(Token) + End If + End If + End If + End Sub +#End Region +#Region "ThrowAny" + Friend Overrides Sub ThrowAny(ByVal Token As CancellationToken) + ThrowAnyImpl(Token) + End Sub +#End Region +#Region "DownloadingException" + Protected Overrides Function DownloadingException(ByVal ex As Exception, ByVal Message As String, Optional ByVal FromPE As Boolean = False, + Optional ByVal EObj As Object = Nothing) As Integer + Return 0 + End Function +#End Region + End Class +End Namespace \ No newline at end of file diff --git a/SCrawler/Content/Icons/SiteIcons/ThreadsIcon_192.ico b/SCrawler/Content/Icons/SiteIcons/ThreadsIcon_192.ico new file mode 100644 index 0000000..bc454b4 Binary files /dev/null and b/SCrawler/Content/Icons/SiteIcons/ThreadsIcon_192.ico differ diff --git a/SCrawler/PluginsEnvironment/Hosts/PluginHost.vb b/SCrawler/PluginsEnvironment/Hosts/PluginHost.vb index d52c1b2..0328d30 100644 --- a/SCrawler/PluginsEnvironment/Hosts/PluginHost.vb +++ b/SCrawler/PluginsEnvironment/Hosts/PluginHost.vb @@ -79,6 +79,7 @@ Namespace Plugin.Hosts New PluginHost(New API.Twitter.SiteSettings, _XML, GlobalPath, _Temp, _Imgs, _Vids), New PluginHost(New API.Mastodon.SiteSettings, _XML, GlobalPath, _Temp, _Imgs, _Vids), New PluginHost(New API.Instagram.SiteSettings(_XML, GlobalPath), _XML, GlobalPath, _Temp, _Imgs, _Vids), + New PluginHost(New API.ThreadsNet.SiteSettings, _XML, GlobalPath, _Temp, _Imgs, _Vids), New PluginHost(New API.RedGifs.SiteSettings, _XML, GlobalPath, _Temp, _Imgs, _Vids), New PluginHost(New API.YouTube.SiteSettings, _XML, GlobalPath, _Temp, _Imgs, _Vids), New PluginHost(New API.Pinterest.SiteSettings, _XML, GlobalPath, _Temp, _Imgs, _Vids), diff --git a/SCrawler/SCrawler.vbproj b/SCrawler/SCrawler.vbproj index b96a3e0..9b7abb6 100644 --- a/SCrawler/SCrawler.vbproj +++ b/SCrawler/SCrawler.vbproj @@ -223,6 +223,8 @@ + + @@ -709,6 +711,9 @@ + + + diff --git a/SCrawler/SiteResources.Designer.vb b/SCrawler/SiteResources.Designer.vb index 4f30e04..70442f3 100644 --- a/SCrawler/SiteResources.Designer.vb +++ b/SCrawler/SiteResources.Designer.vb @@ -264,6 +264,16 @@ Namespace My.Resources End Get End Property + ''' + ''' Looks up a localized resource of type System.Drawing.Icon similar to (Icon). + ''' + Friend Shared ReadOnly Property ThreadsIcon_192() As System.Drawing.Icon + Get + Dim obj As Object = ResourceManager.GetObject("ThreadsIcon_192", resourceCulture) + Return CType(obj,System.Drawing.Icon) + End Get + End Property + ''' ''' Looks up a localized resource of type System.Drawing.Icon similar to (Icon). ''' diff --git a/SCrawler/SiteResources.resx b/SCrawler/SiteResources.resx index 73de61b..b86f2da 100644 --- a/SCrawler/SiteResources.resx +++ b/SCrawler/SiteResources.resx @@ -178,6 +178,9 @@ Content\Pictures\SitePictures\ThisVidPic_16.png;System.Drawing.Bitmap, System.Drawing, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a + + Content\Icons\SiteIcons\ThreadsIcon_192.ico;System.Drawing.Icon, System.Drawing, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a + Content\Icons\SiteIcons\TikTokIcon_32.ico;System.Drawing.Icon, System.Drawing, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a