Site settings have been expanded, some functions and dependencies have been changed.
Removed unused elements in UserDataBase, added additional xml fields, added error executor.
Created a basic download function.
Added Instagram saved posts and 429 bypass.
Added channel statistics.
Added site redgifs.
Updated sites algorithms.
Other improvements.
Updated downloader algorithm.
This commit is contained in:
Andy
2022-01-23 05:34:09 +03:00
parent 7da1ccf1ae
commit f1ba2ecd77
56 changed files with 5056 additions and 1293 deletions

View File

@@ -19,6 +19,8 @@ Namespace API.Reddit
Private Const Name_ID As String = "ID"
Private Const Name_Date As String = "Date"
Private Const Name_PostsNode As String = "Posts"
Private Const Name_UsersAdded As String = "UsersAdded"
Private Const Name_PostsDownloaded As String = "PostsDownloaded"
#End Region
Friend Const DefaultDownloadLimitCount As Integer = 1000
#Region "IUserData Support"
@@ -311,6 +313,31 @@ Namespace API.Reddit
End Get
End Property
Private ReadOnly Property Range As RangeSwitcher(Of UserPost)
Friend ReadOnly Property CountOfAddedUsers As List(Of Integer)
Friend ReadOnly Property CountOfLoadedPostsPerSession As List(Of Integer)
Private _FirstUserAdded As Boolean = False
Friend Sub UserAdded(Optional ByVal IsAdded As Boolean = True)
If Not _FirstUserAdded Then CountOfAddedUsers.Add(0) : _FirstUserAdded = True
Dim v% = CountOfAddedUsers.Last
v += IIf(IsAdded, 1, -1)
If v < 0 Then v = 0
CountOfAddedUsers(CountOfAddedUsers.Count - 1) = v
End Sub
Friend Function GetChannelStats(ByVal Extended As Boolean) As String
Dim s$ = String.Empty
Dim p As New ANumbers With {.FormatOptions = ANumbers.Options.GroupIntegral}
If Extended Then
s.StringAppendLine($"Users added from this channel: {CountOfAddedUsers.Sum.NumToString(p)}")
s.StringAppendLine($"Users added from this channel (avg): {CountOfAddedUsers.DefaultIfEmpty(0).Average.RoundDown.NumToString(p)}")
s.StringAppendLine($"Users added from this channel (session): {CountOfAddedUsers.LastOrDefault.NumToString(p)}")
s.StringAppendLine($"Posts downloaded (avg): {CountOfLoadedPostsPerSession.DefaultIfEmpty(0).Average.RoundUp.NumToString(p)}")
s.StringAppendLine($"Posts downloaded (session): {CountOfLoadedPostsPerSession.LastOrDefault.NumToString(p)}")
Else
s.StringAppend($"Users: {CountOfAddedUsers.Sum.NumToString(p)} (avg: {CountOfAddedUsers.DefaultIfEmpty(0).Average.RoundDown.NumToString(p)}; s: {CountOfAddedUsers.LastOrDefault.NumToString(p)})")
s.StringAppend($"Posts: {CountOfLoadedPostsPerSession.DefaultIfEmpty(0).Average.RoundUp.NumToString(p)} (s: {CountOfLoadedPostsPerSession.LastOrDefault.NumToString(p)})", "; ")
End If
Return s
End Function
#Region "Limits Support"
Private _DownloadLimitCount As Integer? = Nothing
Friend Property DownloadLimitCount As Integer? Implements IChannelLimits.DownloadLimitCount
@@ -379,6 +406,8 @@ Namespace API.Reddit
Posts = New List(Of UserPost)
PostsLatest = New List(Of UserPost)
Range = New RangeSwitcher(Of UserPost)(Me)
CountOfAddedUsers = New List(Of Integer)
CountOfLoadedPostsPerSession = New List(Of Integer)
End Sub
Friend Sub New(ByVal f As SFile)
Me.New
@@ -422,7 +451,9 @@ Namespace API.Reddit
}
d.SetLimit(Me)
d.DownloadData(Token)
Dim b% = Posts.Count
Posts.ListAddList(d.GetNewChannelPosts(), LAP.NotContainsOnly)
If Posts.Count - b > 0 Then CountOfLoadedPostsPerSession.Add(Posts.Count - b)
Posts.Sort()
LatestParsedDate = If(Posts.FirstOrDefault(Function(pp) pp.Date.HasValue).Date, LatestParsedDate)
End Using
@@ -525,6 +556,8 @@ Namespace API.Reddit
Name = x.Value(Name_Name)
ID = x.Value(Name_ID)
LatestParsedDate = AConvert(Of Date)(x.Value(Name_Date), XMLDateProvider, Nothing)
CountOfAddedUsers.ListAddList(x.Value(Name_UsersAdded).StringToList(Of Integer)("|"), LAP.ClearBeforeAdd)
CountOfLoadedPostsPerSession.ListAddList(x.Value(Name_PostsDownloaded).StringToList(Of Integer)("|"), LAP.ClearBeforeAdd)
If Not PartialLoad Then
With x(Name_PostsNode).XmlIfNothing
If .Count > 0 Then .ForEach(Sub(ee) PostsLatest.Add(New UserPost With {
@@ -549,6 +582,8 @@ Namespace API.Reddit
LatestParsedDate = tmpPostList.FirstOrDefault(Function(pd) pd.Date.HasValue).Date
x.Add(Name_Date, AConvert(Of String)(LatestParsedDate, XMLDateProvider, String.Empty))
x.Add(Name_PostsNode, String.Empty)
x.Add(Name_UsersAdded, CountOfAddedUsers.ListToString(, "|"))
x.Add(Name_PostsDownloaded, CountOfLoadedPostsPerSession.ListToString(, "|"))
With x(Name_PostsNode)
tmpPostList.Take(200).ToList.ForEach(Sub(p) .Add(New EContainer("Post",
String.Empty,
@@ -578,6 +613,8 @@ Namespace API.Reddit
If disposing Then
Posts.Clear()
PostsLatest.Clear()
CountOfAddedUsers.Clear()
CountOfLoadedPostsPerSession.Clear()
Range.Dispose()
If Not Instance Is Nothing Then Instance.Dispose()
If CachePath.Exists(SFO.Path, False) Then CachePath.Delete(SFO.Path, False, False, EDP.SendInLog)

View File

@@ -74,7 +74,6 @@ Namespace API.Reddit
Next
End If
Throw New ArgumentException($"Channel ID [{ChannelID}] does not found in channels collection", "ChannelID") With {.HelpLink = 1}
'Return Nothing
End Get
End Property
Friend Sub DownloadData(ByVal Token As CancellationToken, Optional ByVal SkipExists As Boolean = True,
@@ -111,7 +110,7 @@ Namespace API.Reddit
Return Count > 0 AndAlso Channels.Contains(_Item)
End Function
Friend Function Remove(ByVal _Item As Channel) As Boolean Implements ICollection(Of Channel).Remove
If Count > 0 Then
If Count > 0 And Not _Item Is Nothing Then
Dim i% = Channels.IndexOf(_Item)
If i >= 0 Then
With Channels(i) : .Delete() : .Dispose() : End With

View File

@@ -16,9 +16,8 @@ Namespace API.Reddit
Friend ReadOnly VideoRegEx As New RegexStructure("http.{0,1}://[^" & Chr(34) & "]+?mp4", True, False)
Friend ReadOnly DateProvider As New JsonDate
Friend ReadOnly DateProviderChannel As New JsonDateChannel
Private ReadOnly EUR_PROVIDER As New ANumbers(ANumbers.Modes.EUR)
Private ReadOnly EUR_PROVIDER As New ANumbers(ANumbers.Cultures.EUR)
Friend Class JsonDate : Implements ICustomProvider
''' <inheritdoc cref="ADateTime.ParseUnicodeJS(Object, Object, ErrorsDescriber)"/>
Friend Function Convert(ByVal Value As Object, ByVal DestinationType As Type, ByVal Provider As IFormatProvider,
Optional ByVal NothingArg As Object = Nothing, Optional ByVal e As ErrorsDescriber = Nothing) As Object Implements ICustomProvider.Convert
Return ADateTime.ParseUnicodeJS(Value, NothingArg, e)
@@ -28,7 +27,6 @@ Namespace API.Reddit
End Function
End Class
Friend Class JsonDateChannel : Implements ICustomProvider
''' <inheritdoc cref="ADateTime.ParseUnicodeJS(Object, Object, ErrorsDescriber)"/>
Friend Function Convert(ByVal Value As Object, ByVal DestinationType As Type, ByVal Provider As IFormatProvider,
Optional ByVal NothingArg As Object = Nothing, Optional ByVal e As ErrorsDescriber = Nothing) As Object Implements ICustomProvider.Convert
Return ADateTime.ParseUnicode(AConvert(Of Integer)(Value, EUR_PROVIDER, Value), NothingArg, e)

View File

@@ -74,7 +74,7 @@ Namespace API.Reddit
ConcatFile.Extension = "mp4"
CachePath = $"{f.PathWithSeparator}_Cache\{SFile.GetDirectories($"{f.PathWithSeparator}_Cache\",,, EDP.ReturnValue).ListIfNothing.Count + 1}\"
If CachePath.Exists(SFO.Path) Then
Dim p As New SFileNumbers(ConcatFile.Name,,, New ANumbers(ANumbers.Modes.USA) With {.GroupSeparator = String.Empty, .FormatMode = ANumbers.Formats.General})
Dim p As New SFileNumbers(ConcatFile.Name,,, New ANumbers With {.Format = ANumbers.Formats.General})
ConcatFile = SFile.Indexed_IndexFile(ConcatFile,, p, EDP.ReturnValue)
Dim i%
Dim eFiles As New List(Of SFile)

View File

@@ -7,29 +7,23 @@
' This program is distributed in the hope that it will be useful,
' but WITHOUT ANY WARRANTY
Imports SCrawler.API.Base
Imports System.Threading
Imports PersonalUtilities.Forms.Toolbars
Namespace API.Reddit
Friend NotInheritable Class ProfileSaved
Friend Shared ReadOnly Property DataPath As SFile = $"{Settings(Sites.Reddit).Path.PathNoSeparator}\!Saved\"
Private Sub New()
End Sub
Friend Shared Sub Download(ByRef Toolbar As StatusStrip, ByRef PR As ToolStripProgressBar)
Friend Shared Sub Download(ByRef Bar As MyProgress, ByVal Token As CancellationToken)
Try
Dim Bar = New PersonalUtilities.Forms.Toolbars.MyProgress(Toolbar, PR, Nothing)
Dim u As New UserInfo(Settings(Sites.Reddit).SavedPostsUserName.Value, Sites.Reddit) With {
.IsChannel = True,
.SpecialPath = $"{Settings(Sites.Reddit).Path.PathWithSeparator}\!Saved\"
}
Dim u As New UserInfo(Settings(Sites.Reddit).SavedPostsUserName.Value, Sites.Reddit) With {.IsChannel = True, .SpecialPath = DataPath}
u.UpdateUserFile()
Using user As IUserData = UserDataBase.GetInstance(u)
Using user As New UserData(u,, False)
DirectCast(user.Self, UserDataBase).IsSavedPosts = True
Bar.Enabled = True
DirectCast(user.Self, UserData).Progress = Bar
user.Progress = Bar
If Not user.FileExists Then user.UpdateUserInformation()
user.DownloadData(Nothing)
Dim m As New MMessage("Reddit saved posts download complete", "Saved posts downloading", {"OK", "Open folder"})
m.Text.StringAppendLine($"Downloaded images: {user.DownloadedPictures}")
m.Text.StringAppendLine($"Downloaded videos: {user.DownloadedVideos}")
If MsgBoxE(m) = 1 Then u.File.CutPath.Open(SFO.Path)
Bar.Enabled = False
user.DownloadData(Token)
Bar.InformationTemporary = $"Images: {user.DownloadedPictures}; Videos: {user.DownloadedVideos}"
End Using
Catch ex As Exception
ErrorsDescriber.Execute(EDP.SendInLog, ex, "[API.Reddit.ProfileSaved.Download]")

View File

@@ -7,7 +7,6 @@
' This program is distributed in the hope that it will be useful,
' but WITHOUT ANY WARRANTY
Imports PersonalUtilities.Functions.XML
Imports PersonalUtilities.Forms.Toolbars
Imports PersonalUtilities.Tools.ImageRenderer
Imports PersonalUtilities.Tools.WebDocuments.JSON
Imports System.Net
@@ -50,15 +49,6 @@ Namespace API.Reddit
Select c.Post) Else Return Nothing
End Function
#End Region
Private _Progress As MyProgress
Friend Property Progress As MyProgress
Get
If _Progress Is Nothing Then Return MainProgress Else Return _Progress
End Get
Set(ByVal p As MyProgress)
_Progress = p
End Set
End Property
#Region "Initializers"
''' <summary>Video downloader initializer</summary>
Private Sub New()
@@ -74,9 +64,13 @@ Namespace API.Reddit
If _LoadUserInformation Then LoadUserInformation()
End Sub
#End Region
#Region "Load and Update user info"
Protected Overrides Sub LoadUserInformation_OptionalFields(ByRef Container As XmlFile, ByVal Loading As Boolean)
End Sub
#End Region
#Region "Download Overrides"
Friend Overrides Sub DownloadData(ByVal Token As CancellationToken)
If IsChannel AndAlso Not ChannelInfo.IsRegularChannel Then
If Not IsSavedPosts AndAlso (IsChannel AndAlso Not ChannelInfo.IsRegularChannel) Then
If Not Responser Is Nothing Then Responser.Dispose()
Responser = New PersonalUtilities.Tools.WEB.Response
Responser.Copy(Settings(Sites.Reddit).Responser)
@@ -92,7 +86,9 @@ Namespace API.Reddit
End Sub
Protected Overrides Sub DownloadDataF(ByVal Token As CancellationToken)
_TotalPostsDownloaded = 0
If IsChannel Then
If IsSavedPosts Then
DownloadDataChannel(String.Empty, Token)
ElseIf IsChannel Then
If ChannelInfo.IsRegularChannel Then
ChannelPostsNames.ListAddList(_TempPostsList, LNC)
If ChannelPostsNames.Count > 0 Then
@@ -129,7 +125,7 @@ Namespace API.Reddit
URL = $"https://gateway.reddit.com/desktopapi/v1/user/{Name}/posts?rtj=only&allow_quarantined=true&allow_over18=1&include=identity&after={POST}&dist=25&sort=new&t=all&layout=classic"
ThrowAny(Token)
Dim r$ = GetSiteResponse(URL)
Dim r$ = Responser.GetResponse(URL,, EDP.ThrowException)
If Not r.IsEmptyString Then
Using w As EContainer = JsonDocument.Parse(r).XmlIfNothing
If w.Count > 0 Then
@@ -203,17 +199,8 @@ Namespace API.Reddit
If POST.IsEmptyString And ExistsDetected Then Exit Sub
If Not PostID.IsEmptyString And NewPostDetected Then DownloadDataUser(PostID, Token)
End If
Catch oex As OperationCanceledException When Token.IsCancellationRequested
Catch dex As ObjectDisposedException When Disposed
Catch ex As Exception
If ex.HelpLink = NonExistendUserHelp Then
UserExists = False
ElseIf ex.HelpLink = SuspendedUserHelp Then
UserSuspended = True
Else
LogError(ex, $"data downloading error [{URL}]")
HasError = True
End If
ProcessException(ex, Token, $"data downloading error [{URL}]")
End Try
End Sub
Private Sub DownloadDataChannel(ByVal POST As String, ByVal Token As CancellationToken)
@@ -234,7 +221,7 @@ Namespace API.Reddit
End If
ThrowAny(Token)
Dim r$ = GetSiteResponse(URL)
Dim r$ = Responser.GetResponse(URL,, EDP.ThrowException)
If Not r.IsEmptyString Then
Using w As EContainer = JsonDocument.Parse(r).XmlIfNothing
If w.Count > 0 Then
@@ -296,17 +283,8 @@ Namespace API.Reddit
If POST.IsEmptyString And ExistsDetected Then Exit Sub
If Not PostID.IsEmptyString And NewPostDetected Then DownloadDataChannel(PostID, Token)
End If
Catch oex As OperationCanceledException When Token.IsCancellationRequested
Catch dex As ObjectDisposedException When Disposed
Catch ex As Exception
If ex.HelpLink = NonExistendUserHelp Then
UserExists = False
ElseIf ex.HelpLink = SuspendedUserHelp Then
UserSuspended = True
Else
LogError(ex, $"channel data downloading error [{URL}]")
HasError = True
End If
ProcessException(ex, Token, $"channel data downloading error [{URL}]")
End Try
End Sub
#End Region
@@ -366,8 +344,7 @@ Namespace API.Reddit
End If
Return added
Catch ex As Exception
LogError(ex, "gallery parsing error")
HasError = True
ProcessException(ex, Nothing, "gallery parsing error", False)
Return False
End Try
End Function
@@ -382,7 +359,7 @@ Namespace API.Reddit
ThrowAny(Token)
If _TempMediaList(i).Type = UTypes.VideoPre Then
m = _TempMediaList(i)
r = GetSiteResponse(m.URL, e)
r = Responser.GetResponse(m.URL,, e)
_TempMediaList(i) = New UserMedia
If Not r.IsEmptyString Then
v = RegexReplace(r, VideoRegEx)
@@ -395,10 +372,8 @@ Namespace API.Reddit
End If
Next
End If
Catch oex As OperationCanceledException When Token.IsCancellationRequested
Catch dex As ObjectDisposedException When Disposed
Catch ex As Exception
LogError(ex, "video reparsing error")
ProcessException(ex, Token, "video reparsing error", False)
End Try
End Sub
Friend Shared Function GetVideoInfo(ByVal URL As String) As IEnumerable(Of UserMedia)
@@ -433,7 +408,7 @@ Namespace API.Reddit
Try
If Not URL.IsEmptyString AndAlso URL.StringContains({".jpg", ".png", ".jpeg"}) Then
Dim f As SFile = CStr(RegexReplace(URL, FilesPattern))
Return Not f.IsEmptyString And Not f.File.IsEmptyString
Return Not f.File.IsEmptyString
End If
Return False
Catch ex As Exception
@@ -454,7 +429,7 @@ Namespace API.Reddit
If _ContentNew.Count > 0 Then
MyFile.Exists(SFO.Path)
Dim MyDir$
If IsChannel And SaveToCache Then
If Not IsSavedPosts AndAlso (IsChannel And SaveToCache) Then
MyDir = ChannelInfo.CachePath.PathNoSeparator
Else
MyDir = MyFile.CutPath.PathNoSeparator
@@ -605,29 +580,20 @@ Namespace API.Reddit
HasError = True
End Try
End Sub
Protected Function GetSiteResponse(ByVal URL As String, Optional ByVal e As ErrorsDescriber = Nothing) As String
Try
Return Responser.GetResponse(URL,, EDP.ThrowException)
Catch ex As Exception
HasError = True
Dim OptText$ = String.Empty
If Not e.Exists Then
Dim ee As EDP = EDP.SendInLog
If Responser.StatusCode = HttpStatusCode.NotFound Then
ee = EDP.ThrowException
OptText = ": USER NOT FOUND"
ex.HelpLink = NonExistendUserHelp
ElseIf Responser.StatusCode = HttpStatusCode.Forbidden Then
ee = EDP.ThrowException
OptText = ": USER PROFILE SUSPENDED"
ex.HelpLink = SuspendedUserHelp
Else
ee += EDP.ReturnValue
End If
e = New ErrorsDescriber(ee)
End If
Return ErrorsDescriber.Execute(e, ex, $"[{Site} - {Name}: GetSiteResponse([{URL}])]{OptText}", String.Empty)
End Try
Protected Overrides Function DownloadingException(ByVal ex As Exception, ByVal Message As String, Optional ByVal FromPE As Boolean = False) As Integer
If Responser.StatusCode = HttpStatusCode.NotFound Then
UserExists = False
ElseIf Responser.StatusCode = HttpStatusCode.Forbidden Then
UserSuspended = True
ElseIf Responser.StatusCode = HttpStatusCode.BadGateway Or
Responser.StatusCode = HttpStatusCode.ServiceUnavailable Or
Responser.StatusCode = HttpStatusCode.GatewayTimeout Then
MyMainLOG = "Reddit is currently unavailable"
Else
If Not FromPE Then LogError(ex, Message) : HasError = True
Return 0
End If
Return 1
End Function
Protected Overrides Sub Dispose(ByVal disposing As Boolean)
If Not disposedValue And disposing Then ChannelPostsNames.Clear() : _ExistsUsersNames.Clear()