mirror of
https://github.com/AAndyProgram/SCrawler.git
synced 2026-03-14 15:52:18 +00:00
2023.4.28.0
Plugins IPluginContentProvider: added DownloadSingleObject function; added tokens to GetMedia and Download functions; removed GetSpecialData function Add IDownloadableMedia interface Removed 'Channel' option from all functions and enums ISiteSettings: added GetSingleMediaInstance function ExchangeOptions: removed 'IsChannel' UserMediaTypes: added Audio and AudioPre enums IUserMedia, PluginUserMedia: changed ContentType and DownloadState from integers to their enums SCrawler Add YouTube standalone downloader Add gallery-dl & yt-dlp support Remove 'UserInfo' requirement from 'ProfilesSaved' Update 'SiteSettingsBase' to use domains and Netscape cookies UserDataBase: remove channels; remove old 'Merge' const; standardize SavedPosts file naming; move 'ValidateMD5' function from Twitter to UserDataBase to use it in other UserData classes; add 'DownloadSingleObject' environment for single posts; add validating file extension for m3u8 during download; add reindex of video file during download Rewritten DomainsContainer Create a universal settings form and PSettingsArttribute Gfycat, Imgur: turn these classes into IUserData to download a single object All plugins: update 'GetInstance' function for saved posts; update domains where implemented; remove 'OptionForm' where it exists; update options where they exist; update unix date providers; reconfigure channels where they exist LPSG: fix attachments; update converters and regex Add sites: ThisVid, Mastodon, Pinterest, YouTube, YouTube music Reddit: standardize container parsing for all data types; new channel environment; fix 'ReparseMissing' function; redirect data downloading to the base download function, saved crossposts support Twitter: fixed gif path bug; fixed downloading saved posts PornHub: hide unnecessary errors; photo galleries bug RedGifs: add 'UserAgent' option Added icons to download progress Rename some objects Completely redesigned standalone downloader form and rewritten its environment WebClient2: update to use tokens Labels: update label form (save labels to file only when OK button is clicked); change removing labels.txt from recycle bin to permanent; disable storing label 'NoParsedUser' UserCreatorForm: remove the 'Channel' checkbox and related functions; ability to extract the user's URL from the buffer and apply parameters if found Remove temporary 'EncryptCookies' module MainFrame: added simplified way to create new users (Ctrl+Insert to create a new user with default parameters from clipboard URL); removed SCrawler command line argument "-v" (remove the ability to run SCrawler as video downloader) PropertyValueHost: update for option forms compatibility SettingsHost: removed 'GetSpecialData' fork; added 'GetSingleMediaInstance' fork UserDataHost: update functions with tokens; update events; add 'DownloadSingleObject' function Settings: add the ability to get environment from 4 destinations; add the ability to set the program environment manually; add CMDEncoding; add cache; remove the old function 'RemoveUnusedPlugins'; add 'STDownloader' properties; add YT compatibility; add new notification options; add deleting user settings file when 'SettingsCLS.Dispose()' if where are no users in SCrawler UserFinder: remove old 'Merge' const; remove channel option UserInfo: remove channel option
This commit is contained in:
165
SCrawler.YouTube/Base/YouTubeFunctions.vb
Normal file
165
SCrawler.YouTube/Base/YouTubeFunctions.vb
Normal file
@@ -0,0 +1,165 @@
|
||||
' Copyright (C) 2023 Andy https://github.com/AAndyProgram
|
||||
' This program is free software: you can redistribute it and/or modify
|
||||
' it under the terms of the GNU General Public License as published by
|
||||
' the Free Software Foundation, either version 3 of the License, or
|
||||
' (at your option) any later version.
|
||||
'
|
||||
' This program is distributed in the hope that it will be useful,
|
||||
' but WITHOUT ANY WARRANTY
|
||||
Imports PersonalUtilities.Tools
|
||||
Imports PersonalUtilities.Forms.Toolbars
|
||||
Imports PersonalUtilities.Functions.XML
|
||||
Imports PersonalUtilities.Functions.RegularExpressions
|
||||
Imports SCrawler.API.YouTube.Objects
|
||||
Namespace API.YouTube.Base
|
||||
Public NotInheritable Class YouTubeFunctions
|
||||
Public Const YouTubeCachePathRoot As String = "_CacheYouTube\"
|
||||
Public Const UserChannelOption As String = "channel"
|
||||
Public Const TrueUrlPattern As String = "https?://[^/]*?youtube.com/[^\?/&]+((\??[^\?/&]+|/[^\?/&]+))"
|
||||
'2 - type; 5 - id
|
||||
Public Const UrlTypePattern As String = "(?<=https?://[^/]*?youtube.com/)((@|[^\?/&]+))([/\?]{0,1}(list=|v=|)([^\?/&]*))(?=(\S+|\Z|))"
|
||||
Private Sub New()
|
||||
End Sub
|
||||
Public Shared Function IsMyUrl(ByVal URL As String) As Boolean
|
||||
Return Not Info_GetUrlType(URL) = YouTubeMediaType.Undefined
|
||||
End Function
|
||||
Public Shared Function Info_GetUrlType(ByVal URL As String, Optional ByRef IsMusic As Boolean = False,
|
||||
Optional ByRef IsChannelUser As Boolean = False, Optional ByRef Id As String = Nothing) As YouTubeMediaType
|
||||
If Not URL.IsEmptyString Then
|
||||
IsMusic = URL.Contains("music.youtube.com")
|
||||
IsChannelUser = False
|
||||
Dim data As List(Of String) = RegexReplace(URL, RParams.DMS(UrlTypePattern, 0, RegexReturn.ListByMatch, EDP.ReturnValue))
|
||||
If data.ListExists Then
|
||||
If data.Count >= 6 Then Id = data(5)
|
||||
If data.Count >= 3 And Not data(2).IsEmptyString Then
|
||||
Select Case data(2).ToLower
|
||||
Case "watch" : Return YouTubeMediaType.Single
|
||||
Case "playlist" : Return YouTubeMediaType.PlayList
|
||||
Case UserChannelOption, "@" : IsChannelUser = data(2).ToLower = UserChannelOption : Return YouTubeMediaType.Channel
|
||||
End Select
|
||||
End If
|
||||
End If
|
||||
End If
|
||||
Return YouTubeMediaType.Undefined
|
||||
End Function
|
||||
''' <summary>'--no-cookies-from-browser --cookies CookiesFile'</summary>
|
||||
Public Shared Function GetCookiesCommand(ByVal UseCookies As Boolean, ByVal CookiesFile As SFile) As String
|
||||
If UseCookies And CookiesFile.Exists Then
|
||||
Return $"--no-cookies-from-browser --cookies ""{CookiesFile}"""
|
||||
Else
|
||||
Return String.Empty
|
||||
End If
|
||||
End Function
|
||||
''' <param name="DateAfter">Data with upload date 'more than or equal to' date will be downloaded</param>
|
||||
''' <param name="DateBefore">Data with upload date 'less than or equal to' date will be downloaded</param>
|
||||
''' <exception cref="ArgumentNullException"></exception>
|
||||
''' <exception cref="IO.FileNotFoundException"></exception>
|
||||
''' <exception cref="InvalidOperationException"></exception>
|
||||
Public Shared Function Parse(ByVal URL As String, Optional ByVal UseCookies As Boolean? = Nothing,
|
||||
Optional ByVal Token As Threading.CancellationToken = Nothing, Optional ByVal Progress As IMyProgress = Nothing,
|
||||
Optional ByVal GetDefault As Boolean? = Nothing, Optional ByVal GetShorts As Boolean? = Nothing,
|
||||
Optional ByVal DateAfter As Date? = Nothing, Optional ByVal DateBefore As Date? = Nothing) As IYouTubeMediaContainer
|
||||
If URL.IsEmptyString Then Throw New ArgumentNullException("URL", "URL cannot be null")
|
||||
If Not MyYouTubeSettings.YTDLP.Value.Exists Then Throw New IO.FileNotFoundException("Path to 'yt-dlp.exe' not set or program not found at destination", MyYouTubeSettings.YTDLP.Value.ToString)
|
||||
Dim urlOrig$ = URL
|
||||
URL = RegexReplace(URL, TrueUrlRegEx)
|
||||
If URL.IsEmptyString Then Throw New ArgumentNullException("URL", $"Can't get true URL from [{urlOrig}]")
|
||||
Dim isMusic As Boolean = False
|
||||
Dim objType As YouTubeMediaType = Info_GetUrlType(URL, isMusic)
|
||||
If Not objType = YouTubeMediaType.Undefined Then
|
||||
Dim __GetDefault As Boolean = If(GetDefault, True)
|
||||
Dim __GetShorts As Boolean = If(GetShorts, True)
|
||||
If isMusic Then __GetShorts = False
|
||||
Dim container As IYouTubeMediaContainer
|
||||
Dim pattern$ = "%(channel_id)s_%(id)s_%(playlist_index)s"
|
||||
|
||||
Select Case objType
|
||||
Case YouTubeMediaType.Single
|
||||
__GetShorts = False
|
||||
If isMusic Then container = New Track Else container = New Video
|
||||
Case YouTubeMediaType.PlayList : container = New PlayList : pattern = "%(playlist_index)s_%(id)s" : __GetShorts = False
|
||||
Case YouTubeMediaType.Channel
|
||||
container = New Channel
|
||||
If isMusic Then pattern = "%(playlist_id)s/%(channel_id)s_%(id)s_%(playlist_index)s"
|
||||
Case Else : Throw New InvalidOperationException($"Type '{objType}' is not supported by YouTubeDownloader")
|
||||
End Select
|
||||
|
||||
If UseCookies.HasValue Then container.UseCookies = UseCookies.Value
|
||||
Dim result As Boolean = False
|
||||
Dim cookiesExists As Boolean = YouTubeCookieNetscapeFile.Exists
|
||||
Dim _CachePathDefault As SFile = MyCache.NewPath(, EDP.ReturnValue)
|
||||
If _CachePathDefault.IsEmptyString Then _CachePathDefault = $"{YouTubeCachePathRoot}{SFile.GetDirectories(YouTubeCachePathRoot,,, EDP.ReturnValue).Count + 1}"
|
||||
_CachePathDefault.Exists(SFO.Path, True, EDP.ThrowException)
|
||||
pattern = $"{_CachePathDefault.PathWithSeparator}{pattern}"
|
||||
|
||||
Dim withCookieRequested As Boolean = False
|
||||
Dim useCookiesForce As Boolean = UseCookies.HasValue AndAlso UseCookies.Value AndAlso cookiesExists
|
||||
If UseCookies.HasValue AndAlso UseCookies.Value Then
|
||||
withCookieRequested = True
|
||||
result = Parse_Internal(URL, pattern, _CachePathDefault, True, YouTubeCookieNetscapeFile, DateAfter, DateBefore, __GetDefault, __GetShorts)
|
||||
End If
|
||||
If Not result And Not withCookieRequested Then
|
||||
If Not UseCookies.HasValue OrElse Not UseCookies.Value Then result = Parse_Internal(URL, pattern, _CachePathDefault, False, YouTubeCookieNetscapeFile, DateAfter, DateBefore, __GetDefault, __GetShorts)
|
||||
If Not result And Not UseCookies.HasValue And cookiesExists Then result = Parse_Internal(URL, pattern, _CachePathDefault, True, YouTubeCookieNetscapeFile, DateAfter, DateBefore, __GetDefault, __GetShorts)
|
||||
End If
|
||||
|
||||
If result Then
|
||||
container.Parse(Nothing, _CachePathDefault, isMusic, Token, Progress)
|
||||
If Not container.HasError Then container.URL = URL : Return container
|
||||
End If
|
||||
container.Dispose()
|
||||
End If
|
||||
Return Nothing
|
||||
End Function
|
||||
Private Shared Function Parse_Internal(ByVal URL As String, ByVal OutputPattern As String, ByVal OutputPath As SFile,
|
||||
ByVal UseCookies As Boolean, ByVal CookiesFile As SFile,
|
||||
ByVal DateAfter As Date?, ByVal DateBefore As Date?,
|
||||
ByVal GetDefault As Boolean, ByVal GetShorts As Boolean) As Boolean
|
||||
Try
|
||||
Dim command$ = "yt-dlp --write-info-json --skip-download"
|
||||
command.StringAppend(GetCookiesCommand(UseCookies, CookiesFile), " ")
|
||||
If DateAfter.HasValue Then command.StringAppend($"--dateafter {DateAfter.Value:yyyyMMdd}", " ")
|
||||
If DateBefore.HasValue Then command.StringAppend($"--datebefore {DateBefore.Value:yyyyMMdd}", " ")
|
||||
command.StringAppend("{0}" & $" -o ""{OutputPattern}""", " ")
|
||||
#If DEBUG Then
|
||||
Debug.WriteLine(String.Format(command, URL))
|
||||
#End If
|
||||
Using batch As New BatchExecutor(True)
|
||||
With batch
|
||||
.CommandPermanent = BatchExecutor.GetDirectoryCommand(MyYouTubeSettings.YTDLP.Value)
|
||||
If GetDefault Then .Execute(String.Format(command, URL))
|
||||
If GetShorts Then .Execute(String.Format(command, $"{URL.StringTrimEnd("/")}/shorts"))
|
||||
End With
|
||||
End Using
|
||||
Return SFile.GetFiles(OutputPath,, IO.SearchOption.AllDirectories, EDP.ReturnValue).Count > 0
|
||||
Catch ex As Exception
|
||||
Return ErrorsDescriber.Execute(EDP.SendToLog + EDP.ReturnValue, ex,
|
||||
$"[API.YouTube.Base.YouTubeFunctions.Parse_Internal({URL}, {UseCookies})]", False)
|
||||
End Try
|
||||
End Function
|
||||
Friend Shared Function CreateContainer(ByVal f As SFile) As IYouTubeMediaContainer
|
||||
Dim c As IYouTubeMediaContainer = Nothing
|
||||
If f.Exists(SFO.File, False) Then
|
||||
Using x As New XmlFile(f, Protector.Modes.All, False) With {.AllowSameNames = True, .XmlReadOnly = True}
|
||||
x.LoadData()
|
||||
If x.Value(YouTubeMediaContainerBase.Name_SiteKey) = YouTubeSiteKey Then
|
||||
Select Case x.Value(YouTubeMediaContainerBase.Name_ObjectType).FromXML(Of Integer)(YouTubeMediaType.Undefined)
|
||||
Case YouTubeMediaType.Channel : c = New Channel
|
||||
Case YouTubeMediaType.PlayList : c = New PlayList
|
||||
Case YouTubeMediaType.Single
|
||||
If x.Value(YouTubeMediaContainerBase.Name_IsMusic).FromXML(Of Boolean)(False) Then
|
||||
c = New Track
|
||||
Else
|
||||
c = New Video
|
||||
End If
|
||||
Case Else : Throw New ArgumentException($"Object type '{x.Value(YouTubeMediaContainerBase.Name_ObjectType)}' is not identified",
|
||||
"ObjectType") With {.HelpLink = NameOf(CreateContainer)}
|
||||
End Select
|
||||
End If
|
||||
End Using
|
||||
If Not c Is Nothing Then c.Load(f)
|
||||
End If
|
||||
Return c
|
||||
End Function
|
||||
End Class
|
||||
End Namespace
|
||||
Reference in New Issue
Block a user