Files
SCrawler/SCrawler/API/ThreadsNet/UserData.vb
Andy ec2266f1bf 2024.5.4.0
YT
remove dots from the end of the file name; add a setting to remove specific characters

SCrawler
API.Instagram: simplify the 'Connection closed' error
API.Reddit: update token refresh request; add 'BearerTokenUseCurl' hidden property
API.Threads: fix frong header name ('dnt'); update 'UpdateCredentials' function
AutoDownloader: change 'IndexOutOfRangeException' to 'Exception' in the 'Download' function
TDownloader: fix 'FilesUpdatePendingUsers' function (freeze)
UserSearchForm: add 'FriendlyName' to search results
2024-05-04 07:04:26 +03:00

289 lines
15 KiB
VB.net

' Copyright (C) 2023 Andy https://github.com/AAndyProgram
' This program is free software: you can redistribute it and/or modify
' it under the terms of the GNU General Public License as published by
' the Free Software Foundation, either version 3 of the License, or
' (at your option) any later version.
'
' This program is distributed in the hope that it will be useful,
' but WITHOUT ANY WARRANTY
Imports System.Threading
Imports SCrawler.API.Base
Imports SCrawler.API.YouTube.Objects
Imports PersonalUtilities.Functions.XML
Imports PersonalUtilities.Functions.RegularExpressions
Imports PersonalUtilities.Tools.Web.Documents.JSON
Imports PersonalUtilities.Tools.Web.Clients
Imports PersonalUtilities.Tools.Web.Clients.EventArguments
Imports IGS = SCrawler.API.Instagram.SiteSettings
Namespace API.ThreadsNet
Friend Class UserData : Inherits Instagram.UserData
#Region "Declarations"
Private ReadOnly Property MySettings As SiteSettings
Get
Return HOST.Source
End Get
End Property
Private ReadOnly DefaultParser_ElemNode_Default() As Object = {"node", "thread_items", 0, "post"}
Private ReadOnly Property Valid As Boolean
Get
Return ValidateBaseTokens() And Not ID.IsEmptyString
End Get
End Property
#End Region
#Region "Loader"
Protected Overrides Sub LoadUserInformation_OptionalFields(ByRef Container As XmlFile, ByVal Loading As Boolean)
End Sub
#End Region
#Region "Exchange"
Friend Overrides Function ExchangeOptionsGet() As Object
Return Nothing
End Function
Friend Overrides Sub ExchangeOptionsSet(ByVal Obj As Object)
End Sub
#End Region
#Region "Initializer"
Friend Sub New()
ObtainMedia_SetReelsFunc()
ObtainMedia_AllowAbstract = True
DefaultParser_ElemNode = DefaultParser_ElemNode_Default
DefaultParser_PostUrlCreator = Function(post) $"https://www.threads.net/@{NameTrue}/post/{post.Code}"
_ResponserAutoUpdateCookies = True
_ResponserAddResponseReceivedHandler = True
End Sub
#End Region
#Region "Download functions"
Private Sub WaitTimer()
If CInt(MySettings.RequestsWaitTimer_Any.Value) > 0 Then Thread.Sleep(CInt(MySettings.RequestsWaitTimer_Any.Value))
End Sub
Private Sub DisableDownload()
MySettings.DownloadData_Impl.Value = False
MyMainLOG = $"{Site} downloading is disabled until you update your credentials"
End Sub
Protected Overrides Sub DownloadDataF(ByVal Token As CancellationToken)
If CBool(MySettings.DownloadData_Impl.Value) Then
Dim errorFound As Boolean = False
Try
Responser.Method = "POST"
LoadSavePostsKV(True)
ResetBaseTokens()
DownloadData(String.Empty, Token)
Catch ex As Exception
errorFound = True
Throw ex
Finally
Responser.Method = "POST"
UpdateResponser()
MySettings.UpdateResponserData(Responser)
If Not errorFound Then LoadSavePostsKV(False)
End Try
End If
End Sub
Protected Overrides Sub UpdateResponser()
If Not Responser Is Nothing AndAlso Not Responser.Disposed Then
RemoveHandler Responser.ResponseReceived, AddressOf Responser_ResponseReceived
End If
End Sub
Protected Overrides Sub Responser_ResponseReceived(ByVal Sender As Object, ByVal e As WebDataResponse)
If e.CookiesExists Then
Dim csrf$ = If(e.Cookies.FirstOrDefault(Function(v) v.Name.StringToLower = IGS.Header_CSRF_TOKEN_COOKIE)?.Value, String.Empty)
If Not csrf.IsEmptyString AndAlso Not AEquals(Of String)(csrf, Responser.Headers.Value(IGS.Header_CSRF_TOKEN)) Then _
Responser.Headers.Add(IGS.Header_CSRF_TOKEN, csrf)
End If
End Sub
Private Overloads Sub DownloadData(ByVal Cursor As String, ByVal Token As CancellationToken)
Const urlPattern$ = "https://www.threads.net/api/graphql?lsd={0}&variables={1}&doc_id=6371597506283707&fb_api_req_friendly_name=BarcelonaProfileThreadsTabRefetchableQuery&server_timestamps=true&fb_dtsg={2}"
Const var_init$ = """userID"":""{0}"""
Const var_cursor$ = """after"":""{1}"",""before"":null,""first"":25,""last"":null,""userID"":""{0}"",""__relay_internal__pv__BarcelonaIsLoggedInrelayprovider"":true,""__relay_internal__pv__BarcelonaIsFeedbackHubEnabledrelayprovider"":false"
Dim URL$ = String.Empty
Try
If Not Valid Then
Dim idIsNull As Boolean = ID.IsEmptyString
UpdateCredentials()
If idIsNull And Not ID.IsEmptyString Then _ForceSaveUserInfo = True
End If
If Not Valid Then DisableDownload() : Throw New Plugin.ExitException("Some credentials are missing")
Responser.Method = "POST"
Responser.Referer = $"https://www.threads.net/@{NameTrue}"
Responser.Headers.Add(GQL_HEADER_FB_LSD, Token_lsd)
Dim nextCursor$ = String.Empty
Dim dataFound As Boolean = False
Dim vars$
If Cursor.IsEmptyString Then
vars = String.Format(var_init, ID)
Else
vars = String.Format(var_cursor, ID, Cursor)
End If
vars = SymbolsConverter.ASCII.EncodeSymbolsOnly("{" & vars & "}")
URL = String.Format(urlPattern, Token_lsd, vars, Token_dtsg_Var)
Using j As EContainer = GetDocument(URL, Token)
If j.ListExists Then
With j({"data", "mediaData"})
If .ListExists Then
nextCursor = .Value({"page_info"}, "end_cursor")
With .Item({"edges"})
If .ListExists Then dataFound = DefaultParser(.Self, Sections.Timeline, Token)
End With
End If
End With
End If
End Using
If dataFound And Not nextCursor.IsEmptyString Then DownloadData(nextCursor, Token)
Catch ex As Exception
ProcessException(ex, Token, $"data downloading error [{URL}]")
End Try
End Sub
Private Function GetDocument(ByVal URL As String, ByVal Token As CancellationToken, Optional ByVal Round As Integer = 0) As EContainer
Try
ThrowAny(Token)
If Round > 0 AndAlso Not UpdateCredentials() Then DisableDownload() : Throw New Exception("Failed to update credentials")
ThrowAny(Token)
WaitTimer()
Dim r$ = Responser.GetResponse(URL)
If Not r.IsEmptyString Then Return JsonDocument.Parse(r) Else Throw New Exception("Failed to get a response")
Catch ex As Exception
If Round = 0 Then
Return GetDocument(URL, Token, Round + 1)
Else
Throw ex
End If
End Try
End Function
Private Function UpdateCredentials(Optional ByVal e As ErrorsDescriber = Nothing) As Boolean
Dim URL$ = $"https://www.threads.net/@{NameTrue}"
ResetBaseTokens()
Dim headers As New HttpHeaderCollection
headers.AddRange(Responser.Headers)
Try
With Responser
.Method = "GET"
.Referer = URL
With .Headers
.Clear()
.Add("dnt", 1)
.Add("drp", 1)
.Add(HttpHeaderCollection.GetSpecialHeader(MyHeaderTypes.Authority, "www.threads.net"))
.Add(HttpHeaderCollection.GetSpecialHeader(MyHeaderTypes.Origin, "https://www.threads.net"))
.Add("Sec-Ch-Ua-Model", "")
.Add(HttpHeaderCollection.GetSpecialHeader(MyHeaderTypes.SecChUaMobile, "?0"))
.Add(HttpHeaderCollection.GetSpecialHeader(MyHeaderTypes.SecChUaPlatform, """Windows"""))
.Add(HttpHeaderCollection.GetSpecialHeader(MyHeaderTypes.SecFetchDest, "document"))
.Add(HttpHeaderCollection.GetSpecialHeader(MyHeaderTypes.SecFetchMode, "navigate"))
.Add(HttpHeaderCollection.GetSpecialHeader(MyHeaderTypes.SecFetchSite, "none"))
.Add("Upgrade-Insecure-Requests", 1)
.Add("Sec-Fetch-User", "?1")
.Add(IGS.Header_Browser, MySettings.HH_BROWSER.Value)
.Add(IGS.Header_BrowserExt, MySettings.HH_BROWSER_EXT.Value)
End With
End With
WaitTimer()
Dim r$ = Responser.GetResponse(URL,, EDP.ThrowException)
If Not r.IsEmptyString Then
ParseTokens(r, 0)
If ID.IsEmptyString Then ID = RegexReplace(r, RParams.DMS("""props"":\{""user_id"":""(\d+)""", 1, EDP.ReturnValue))
End If
Return Valid
Catch ex As Exception
Dim notFound$ = String.Empty
ValidateBaseTokens(notFound)
If ID.IsEmptyString Then notFound.StringAppend("User ID")
DisableDownload()
Dim eex As New ErrorsDescriberException($"{ToStringForLog()}: failed to update some{IIf(notFound.IsEmptyString, String.Empty, $" ({notFound})")} credentials",,, ex) With {
.ReplaceMainMessage = True,
.SendToLogOnlyMessage = Responser.StatusCode = Net.HttpStatusCode.InternalServerError And Responser.Status = Net.WebExceptionStatus.ProtocolError
}
'LogError(ex, $"failed to update some{IIf(notFound.IsEmptyString, String.Empty, $" ({notFound})")} credentials", e)
LogError(eex, String.Empty, e)
Return False
Finally
If headers.ListExists Then
Responser.Headers.Clear()
Responser.Headers.AddRange(headers)
headers.Dispose()
End If
End Try
End Function
#End Region
#Region "ReparseMissing"
Protected Overrides Sub ReparseMissing(ByVal Token As CancellationToken)
Const varsPattern$ = """postID"":""{0}"",""userID"":""{1}"",""__relay_internal__pv__BarcelonaIsLoggedInrelayprovider"":true,""__relay_internal__pv__BarcelonaIsFeedbackHubEnabledrelayprovider"":false"
'Const varsPattern$ = "{""postID"":""{0}"",""__relay_internal__pv__BarcelonaIsLoggedInrelayprovider"":true,""__relay_internal__pv__BarcelonaIsFeedbackHubEnabledrelayprovider"":false}"
Const urlPattern$ = "https://www.threads.net/api/graphql?lsd={0}&variables={1}&fb_api_req_friendly_name=BarcelonaPostPageQuery&server_timestamps=true&fb_dtsg={2}&doc_id=25460088156920903"
Dim rList As New List(Of Integer)
Dim URL$ = String.Empty
DefaultParser_ElemNode = Nothing
DefaultParser_IgnorePass = True
Try
If ContentMissingExists Then
Responser.Method = "POST"
Responser.Referer = $"https://www.threads.net/@{NameTrue}"
If Not IsSingleObjectDownload AndAlso Not UpdateCredentials() Then Throw New Exception("Failed to update credentials")
Dim m As UserMedia
Dim vars$
Dim j As EContainer
ProgressPre.ChangeMax(_ContentList.Count)
For i% = 0 To _ContentList.Count - 1
ProgressPre.Perform()
m = _ContentList(i)
If m.State = UserMedia.States.Missing And Not m.Post.ID.IsEmptyString Then
ThrowAny(Token)
vars = SymbolsConverter.ASCII.EncodeSymbolsOnly("{" & String.Format(varsPattern, m.Post.ID.Split("_").FirstOrDefault, ID) & "}")
URL = String.Format(urlPattern, Token_lsd, vars, Token_dtsg_Var)
j = GetDocument(URL, Token)
If j.ListExists Then
With j.ItemF({"data", "data", "edges", 0, "node", "thread_items", 0, "post"})
If .ListExists AndAlso DefaultParser({ .Self}, Sections.Timeline, Token) Then rList.Add(i)
End With
j.Dispose()
End If
End If
Next
End If
Catch ex As Exception
ProcessException(ex, Token, $"ReparseMissing error [{URL}]")
Finally
DefaultParser_ElemNode = DefaultParser_ElemNode_Default
DefaultParser_IgnorePass = False
If rList.Count > 0 Then
For i% = rList.Count - 1 To 0 Step -1 : _ContentList.RemoveAt(rList(i)) : Next
rList.Clear()
End If
End Try
End Sub
#End Region
#Region "DownloadSingleObject"
Protected Overrides Sub DownloadSingleObject_GetPosts(ByVal Data As IYouTubeMediaContainer, ByVal Token As CancellationToken)
Dim url$ = Data.URL_BASE.IfNullOrEmpty(Data.URL)
Dim postCode$ = RegexReplace(url, RParams.DMS("post/([^/\?&]+)", 1, EDP.ReturnValue))
If Not postCode.IsEmptyString Then
Dim postId$ = CodeToID(postCode)
If Not postId.IsEmptyString Then
_NameTrue = MySettings.IsMyUser(url).UserName
DefaultParser_PostUrlCreator = Function(post) url
If Not _NameTrue.IsEmptyString AndAlso UpdateCredentials(EDP.ReturnValue) Then
_ContentList.Add(New UserMedia(url) With {.State = UserMedia.States.Missing, .Post = postId})
ReparseMissing(Token)
End If
End If
End If
End Sub
#End Region
#Region "ThrowAny"
Friend Overrides Sub ThrowAny(ByVal Token As CancellationToken)
ThrowAnyImpl(Token)
End Sub
#End Region
#Region "DownloadingException"
Protected Overrides Function DownloadingException(ByVal ex As Exception, ByVal Message As String, Optional ByVal FromPE As Boolean = False,
Optional ByVal EObj As Object = Nothing) As Integer
Return 0
End Function
#End Region
End Class
End Namespace