mirror of
https://github.com/l1ving/youtube-dl
synced 2020-11-18 19:53:54 -08:00
Add files via upload
Feature added to search for youtube playlists and getting correct youtube playlist id results. Also added duration fetching for youtube videos, which already works, but the yield self.url_result(video_id, 'Youtube', video_id, video_title) return function has still to get changed to yield self.url_result(video_id, 'Youtube', video_id, video_title, video_duration) which I did not found where to do. Otherwise it returns missmatching count of arguments.
This commit is contained in:
parent
e942cfd1a7
commit
c12c50913f
@ -29,6 +29,7 @@ from ..compat import (
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
bool_or_none,
|
bool_or_none,
|
||||||
clean_html,
|
clean_html,
|
||||||
|
dict_get,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
@ -70,14 +71,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|||||||
|
|
||||||
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
|
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
|
||||||
|
|
||||||
_YOUTUBE_CLIENT_HEADERS = {
|
|
||||||
'x-youtube-client-name': '1',
|
|
||||||
'x-youtube-client-version': '1.20200609.04.02',
|
|
||||||
}
|
|
||||||
|
|
||||||
def _set_language(self):
|
def _set_language(self):
|
||||||
self._set_cookie(
|
self._set_cookie(
|
||||||
'.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
|
'.youtube.com', 'PREF', 'f1=50000000&hl=en',
|
||||||
# YouTube sets the expire time to about two months
|
# YouTube sets the expire time to about two months
|
||||||
expire_time=time.time() + 2 * 30 * 24 * 3600)
|
expire_time=time.time() + 2 * 30 * 24 * 3600)
|
||||||
|
|
||||||
@ -306,8 +302,7 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
'https://youtube.com/%s' % mobj.group('more'), playlist_id,
|
'https://youtube.com/%s' % mobj.group('more'), playlist_id,
|
||||||
'Downloading page #%s%s'
|
'Downloading page #%s%s'
|
||||||
% (page_num, ' (retry #%d)' % count if count else ''),
|
% (page_num, ' (retry #%d)' % count if count else ''),
|
||||||
transform_source=uppercase_escape,
|
transform_source=uppercase_escape)
|
||||||
headers=self._YOUTUBE_CLIENT_HEADERS)
|
|
||||||
break
|
break
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
|
||||||
@ -326,35 +321,52 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
||||||
def _process_page(self, content):
|
def _process_page(self, content):
|
||||||
for video_id, video_title in self.extract_videos_from_page(content):
|
for video_id, video_title, video_duration in self.extract_videos_from_page(content):
|
||||||
|
if len(video_id) == 11:
|
||||||
|
#youtube video id found
|
||||||
yield self.url_result(video_id, 'Youtube', video_id, video_title)
|
yield self.url_result(video_id, 'Youtube', video_id, video_title)
|
||||||
|
elif len(video_id) > 11:
|
||||||
|
#youtube playlist id found
|
||||||
|
yield self.url_result('https://www.youtube.com/playlist?list=%s' % video_id, 'YoutubePlaylist', video_id, video_title)
|
||||||
|
|
||||||
def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
|
def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page, durations_in_page):
|
||||||
for mobj in re.finditer(video_re, page):
|
for mobj in re.finditer(video_re, page):
|
||||||
# The link with index 0 is not the first video of the playlist (not sure if still actual)
|
# The link with index 0 is not the first video of the playlist (not sure if still actual)
|
||||||
if 'index' in mobj.groupdict() and mobj.group('id') == '0':
|
if 'index' in mobj.groupdict() and mobj.group('id') == '0':
|
||||||
continue
|
continue
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
video_title = unescapeHTML(
|
playlist_id = mobj.group('plid') if 'plid' in mobj.groupdict() else None
|
||||||
mobj.group('title')) if 'title' in mobj.groupdict() else None
|
if playlist_id != None:
|
||||||
|
video_id = playlist_id
|
||||||
|
video_title = unescapeHTML(mobj.group('title')) if 'title' in mobj.groupdict() else None
|
||||||
if video_title:
|
if video_title:
|
||||||
video_title = video_title.strip()
|
video_title = video_title.strip()
|
||||||
if video_title == '► Play all':
|
if video_title == '► Play all':
|
||||||
video_title = None
|
video_title = None
|
||||||
|
video_duration = mobj.group('duration') if 'duration' in mobj.groupdict() else None
|
||||||
|
if video_duration:
|
||||||
|
video_duration = video_duration.strip()
|
||||||
try:
|
try:
|
||||||
idx = ids_in_page.index(video_id)
|
idx = ids_in_page.index(video_id)
|
||||||
|
|
||||||
if video_title and not titles_in_page[idx]:
|
if video_title and not titles_in_page[idx]:
|
||||||
titles_in_page[idx] = video_title
|
titles_in_page[idx] = video_title
|
||||||
|
|
||||||
|
|
||||||
|
if video_duration and not durations_in_page[idx]:
|
||||||
|
durations_in_page[idx] = video_duration
|
||||||
|
|
||||||
except ValueError:
|
except ValueError:
|
||||||
ids_in_page.append(video_id)
|
ids_in_page.append(video_id)
|
||||||
titles_in_page.append(video_title)
|
titles_in_page.append(video_title)
|
||||||
|
durations_in_page.append(video_duration)
|
||||||
|
|
||||||
def extract_videos_from_page(self, page):
|
def extract_videos_from_page(self, page):
|
||||||
ids_in_page = []
|
ids_in_page = []
|
||||||
titles_in_page = []
|
titles_in_page = []
|
||||||
self.extract_videos_from_page_impl(
|
durations_in_page = []
|
||||||
self._VIDEO_RE, page, ids_in_page, titles_in_page)
|
self.extract_videos_from_page_impl(self._VIDEO_RE, page, ids_in_page, titles_in_page, durations_in_page)
|
||||||
return zip(ids_in_page, titles_in_page)
|
return zip(ids_in_page, titles_in_page, durations_in_page)
|
||||||
|
|
||||||
|
|
||||||
class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
|
||||||
@ -394,15 +406,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
(?:www\.)?invidious\.drycat\.fr/|
|
(?:www\.)?invidious\.drycat\.fr/|
|
||||||
(?:www\.)?tube\.poal\.co/|
|
(?:www\.)?tube\.poal\.co/|
|
||||||
(?:www\.)?vid\.wxzm\.sx/|
|
(?:www\.)?vid\.wxzm\.sx/|
|
||||||
(?:www\.)?yewtu\.be/|
|
|
||||||
(?:www\.)?yt\.elukerio\.org/|
|
(?:www\.)?yt\.elukerio\.org/|
|
||||||
(?:www\.)?yt\.lelux\.fi/|
|
(?:www\.)?yt\.lelux\.fi/|
|
||||||
(?:www\.)?invidious\.ggc-project\.de/|
|
|
||||||
(?:www\.)?yt\.maisputain\.ovh/|
|
|
||||||
(?:www\.)?invidious\.13ad\.de/|
|
|
||||||
(?:www\.)?invidious\.toot\.koeln/|
|
|
||||||
(?:www\.)?invidious\.fdn\.fr/|
|
|
||||||
(?:www\.)?watch\.nettohikari\.com/|
|
|
||||||
(?:www\.)?kgg2m7yk5aybusll\.onion/|
|
(?:www\.)?kgg2m7yk5aybusll\.onion/|
|
||||||
(?:www\.)?qklhadlycap4cnod\.onion/|
|
(?:www\.)?qklhadlycap4cnod\.onion/|
|
||||||
(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
|
(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
|
||||||
@ -410,7 +415,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
|
(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
|
||||||
(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
|
(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
|
||||||
(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
|
(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
|
||||||
(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
|
|
||||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||||
(?: # the various things that can precede the ID:
|
(?: # the various things that can precede the ID:
|
||||||
@ -440,10 +444,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
(?(1).+)? # if we found the ID, everything can follow
|
(?(1).+)? # if we found the ID, everything can follow
|
||||||
$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
|
$""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
|
||||||
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
|
_NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
|
||||||
_PLAYER_INFO_RE = (
|
|
||||||
r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
|
|
||||||
r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
|
|
||||||
)
|
|
||||||
_formats = {
|
_formats = {
|
||||||
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
'5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
||||||
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
'6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
|
||||||
@ -587,7 +587,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'upload_date': '20120506',
|
'upload_date': '20120506',
|
||||||
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
|
'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
|
||||||
'alt_title': 'I Love It (feat. Charli XCX)',
|
'alt_title': 'I Love It (feat. Charli XCX)',
|
||||||
'description': 'md5:19a2f98d9032b9311e686ed039564f63',
|
'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
|
||||||
'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
|
'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
|
||||||
'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
|
'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
|
||||||
'iconic ep', 'iconic', 'love', 'it'],
|
'iconic ep', 'iconic', 'love', 'it'],
|
||||||
@ -702,11 +702,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'id': 'nfWlot6h_JM',
|
'id': 'nfWlot6h_JM',
|
||||||
'ext': 'm4a',
|
'ext': 'm4a',
|
||||||
'title': 'Taylor Swift - Shake It Off',
|
'title': 'Taylor Swift - Shake It Off',
|
||||||
'description': 'md5:307195cd21ff7fa352270fe884570ef0',
|
'description': 'md5:bec2185232c05479482cb5a9b82719bf',
|
||||||
'duration': 242,
|
'duration': 242,
|
||||||
'uploader': 'TaylorSwiftVEVO',
|
'uploader': 'TaylorSwiftVEVO',
|
||||||
'uploader_id': 'TaylorSwiftVEVO',
|
'uploader_id': 'TaylorSwiftVEVO',
|
||||||
'upload_date': '20140818',
|
'upload_date': '20140818',
|
||||||
|
'creator': 'Taylor Swift',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'youtube_include_dash_manifest': True,
|
'youtube_include_dash_manifest': True,
|
||||||
@ -771,11 +772,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'upload_date': '20100430',
|
'upload_date': '20100430',
|
||||||
'uploader_id': 'deadmau5',
|
'uploader_id': 'deadmau5',
|
||||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
|
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
|
||||||
'creator': 'Dada Life, deadmau5',
|
'creator': 'deadmau5',
|
||||||
'description': 'md5:12c56784b8032162bb936a5f76d55360',
|
'description': 'md5:12c56784b8032162bb936a5f76d55360',
|
||||||
'uploader': 'deadmau5',
|
'uploader': 'deadmau5',
|
||||||
'title': 'Deadmau5 - Some Chords (HD)',
|
'title': 'Deadmau5 - Some Chords (HD)',
|
||||||
'alt_title': 'This Machine Kills Some Chords',
|
'alt_title': 'Some Chords',
|
||||||
},
|
},
|
||||||
'expected_warnings': [
|
'expected_warnings': [
|
||||||
'DASH manifest missing',
|
'DASH manifest missing',
|
||||||
@ -1151,7 +1152,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
'youtube_include_dash_manifest': False,
|
'youtube_include_dash_manifest': False,
|
||||||
},
|
},
|
||||||
'skip': 'not actual anymore',
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# Youtube Music Auto-generated description
|
# Youtube Music Auto-generated description
|
||||||
@ -1162,8 +1162,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'title': 'Voyeur Girl',
|
'title': 'Voyeur Girl',
|
||||||
'description': 'md5:7ae382a65843d6df2685993e90a8628f',
|
'description': 'md5:7ae382a65843d6df2685993e90a8628f',
|
||||||
'upload_date': '20190312',
|
'upload_date': '20190312',
|
||||||
'uploader': 'Stephen - Topic',
|
'uploader': 'Various Artists - Topic',
|
||||||
'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
|
'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
|
||||||
'artist': 'Stephen',
|
'artist': 'Stephen',
|
||||||
'track': 'Voyeur Girl',
|
'track': 'Voyeur Girl',
|
||||||
'album': 'it\'s too much love to know my dear',
|
'album': 'it\'s too much love to know my dear',
|
||||||
@ -1227,7 +1227,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'id': '-hcAI0g-f5M',
|
'id': '-hcAI0g-f5M',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Put It On Me',
|
'title': 'Put It On Me',
|
||||||
'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
|
'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
|
||||||
'upload_date': '20180426',
|
'upload_date': '20180426',
|
||||||
'uploader': 'Matt Maeson - Topic',
|
'uploader': 'Matt Maeson - Topic',
|
||||||
'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
|
'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
|
||||||
@ -1245,26 +1245,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
|
'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
# invalid -> valid video id redirection
|
|
||||||
'url': 'DJztXj2GPfl',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'DJztXj2GPfk',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
|
|
||||||
'description': 'md5:bf577a41da97918e94fa9798d9228825',
|
|
||||||
'upload_date': '20090125',
|
|
||||||
'uploader': 'Prochorowka',
|
|
||||||
'uploader_id': 'Prochorowka',
|
|
||||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
|
|
||||||
'artist': 'Panjabi MC',
|
|
||||||
'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
|
|
||||||
'album': 'Beware of the Boys (Mundian To Bach Ke)',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
@ -1291,18 +1271,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
""" Return a string representation of a signature """
|
""" Return a string representation of a signature """
|
||||||
return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
|
return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def _extract_player_info(cls, player_url):
|
|
||||||
for player_re in cls._PLAYER_INFO_RE:
|
|
||||||
id_m = re.search(player_re, player_url)
|
|
||||||
if id_m:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
|
||||||
return id_m.group('ext'), id_m.group('id')
|
|
||||||
|
|
||||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||||
player_type, player_id = self._extract_player_info(player_url)
|
id_m = re.match(
|
||||||
|
r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
|
||||||
|
player_url)
|
||||||
|
if not id_m:
|
||||||
|
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||||
|
player_type = id_m.group('ext')
|
||||||
|
player_id = id_m.group('id')
|
||||||
|
|
||||||
# Read from filesystem cache
|
# Read from filesystem cache
|
||||||
func_id = '%s_%s_%s' % (
|
func_id = '%s_%s_%s' % (
|
||||||
@ -1384,7 +1360,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
funcname = self._search_regex(
|
funcname = self._search_regex(
|
||||||
(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
(r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'\b(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
|
||||||
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
||||||
# Obsolete patterns
|
# Obsolete patterns
|
||||||
r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
@ -1658,63 +1633,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
video_id = mobj.group(2)
|
video_id = mobj.group(2)
|
||||||
return video_id
|
return video_id
|
||||||
|
|
||||||
def _extract_chapters_from_json(self, webpage, video_id, duration):
|
|
||||||
if not webpage:
|
|
||||||
return
|
|
||||||
player = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'RELATED_PLAYER_ARGS["\']\s*:\s*({.+})\s*,?\s*\n', webpage,
|
|
||||||
'player args', default='{}'),
|
|
||||||
video_id, fatal=False)
|
|
||||||
if not player or not isinstance(player, dict):
|
|
||||||
return
|
|
||||||
watch_next_response = player.get('watch_next_response')
|
|
||||||
if not isinstance(watch_next_response, compat_str):
|
|
||||||
return
|
|
||||||
response = self._parse_json(watch_next_response, video_id, fatal=False)
|
|
||||||
if not response or not isinstance(response, dict):
|
|
||||||
return
|
|
||||||
chapters_list = try_get(
|
|
||||||
response,
|
|
||||||
lambda x: x['playerOverlays']
|
|
||||||
['playerOverlayRenderer']
|
|
||||||
['decoratedPlayerBarRenderer']
|
|
||||||
['decoratedPlayerBarRenderer']
|
|
||||||
['playerBar']
|
|
||||||
['chapteredPlayerBarRenderer']
|
|
||||||
['chapters'],
|
|
||||||
list)
|
|
||||||
if not chapters_list:
|
|
||||||
return
|
|
||||||
|
|
||||||
def chapter_time(chapter):
|
|
||||||
return float_or_none(
|
|
||||||
try_get(
|
|
||||||
chapter,
|
|
||||||
lambda x: x['chapterRenderer']['timeRangeStartMillis'],
|
|
||||||
int),
|
|
||||||
scale=1000)
|
|
||||||
chapters = []
|
|
||||||
for next_num, chapter in enumerate(chapters_list, start=1):
|
|
||||||
start_time = chapter_time(chapter)
|
|
||||||
if start_time is None:
|
|
||||||
continue
|
|
||||||
end_time = (chapter_time(chapters_list[next_num])
|
|
||||||
if next_num < len(chapters_list) else duration)
|
|
||||||
if end_time is None:
|
|
||||||
continue
|
|
||||||
title = try_get(
|
|
||||||
chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
|
|
||||||
compat_str)
|
|
||||||
chapters.append({
|
|
||||||
'start_time': start_time,
|
|
||||||
'end_time': end_time,
|
|
||||||
'title': title,
|
|
||||||
})
|
|
||||||
return chapters
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_chapters_from_description(description, duration):
|
def _extract_chapters(description, duration):
|
||||||
if not description:
|
if not description:
|
||||||
return None
|
return None
|
||||||
chapter_lines = re.findall(
|
chapter_lines = re.findall(
|
||||||
@ -1748,10 +1668,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
})
|
})
|
||||||
return chapters
|
return chapters
|
||||||
|
|
||||||
def _extract_chapters(self, webpage, description, video_id, duration):
|
|
||||||
return (self._extract_chapters_from_json(webpage, video_id, duration)
|
|
||||||
or self._extract_chapters_from_description(description, duration))
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
url, smuggled_data = unsmuggle_url(url, {})
|
url, smuggled_data = unsmuggle_url(url, {})
|
||||||
|
|
||||||
@ -1779,10 +1695,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
# Get video webpage
|
# Get video webpage
|
||||||
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
|
url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
|
||||||
video_webpage, urlh = self._download_webpage_handle(url, video_id)
|
video_webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
|
|
||||||
video_id = qs.get('v', [None])[0] or video_id
|
|
||||||
|
|
||||||
# Attempt to extract SWF player URL
|
# Attempt to extract SWF player URL
|
||||||
mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
|
mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
|
||||||
@ -1811,6 +1724,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
def extract_view_count(v_info):
|
def extract_view_count(v_info):
|
||||||
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
|
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
|
||||||
|
|
||||||
|
def extract_token(v_info):
|
||||||
|
return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
|
||||||
|
|
||||||
def extract_player_response(player_response, video_id):
|
def extract_player_response(player_response, video_id):
|
||||||
pl_response = str_or_none(player_response)
|
pl_response = str_or_none(player_response)
|
||||||
if not pl_response:
|
if not pl_response:
|
||||||
@ -1823,7 +1739,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
player_response = {}
|
player_response = {}
|
||||||
|
|
||||||
# Get video info
|
# Get video info
|
||||||
video_info = {}
|
|
||||||
embed_webpage = None
|
embed_webpage = None
|
||||||
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
||||||
age_gate = True
|
age_gate = True
|
||||||
@ -1838,14 +1753,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
|
r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
|
||||||
})
|
})
|
||||||
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
||||||
try:
|
|
||||||
video_info_webpage = self._download_webpage(
|
video_info_webpage = self._download_webpage(
|
||||||
video_info_url, video_id,
|
video_info_url, video_id,
|
||||||
note='Refetching age-gated info webpage',
|
note='Refetching age-gated info webpage',
|
||||||
errnote='unable to download video info webpage')
|
errnote='unable to download video info webpage')
|
||||||
except ExtractorError:
|
|
||||||
video_info_webpage = None
|
|
||||||
if video_info_webpage:
|
|
||||||
video_info = compat_parse_qs(video_info_webpage)
|
video_info = compat_parse_qs(video_info_webpage)
|
||||||
pl_response = video_info.get('player_response', [None])[0]
|
pl_response = video_info.get('player_response', [None])[0]
|
||||||
player_response = extract_player_response(pl_response, video_id)
|
player_response = extract_player_response(pl_response, video_id)
|
||||||
@ -1853,6 +1764,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
view_count = extract_view_count(video_info)
|
view_count = extract_view_count(video_info)
|
||||||
else:
|
else:
|
||||||
age_gate = False
|
age_gate = False
|
||||||
|
video_info = None
|
||||||
|
sts = None
|
||||||
# Try looking directly into the video webpage
|
# Try looking directly into the video webpage
|
||||||
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
|
ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
|
||||||
if ytplayer_config:
|
if ytplayer_config:
|
||||||
@ -1869,10 +1782,61 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
|
args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
|
||||||
if args.get('livestream') == '1' or args.get('live_playback') == 1:
|
if args.get('livestream') == '1' or args.get('live_playback') == 1:
|
||||||
is_live = True
|
is_live = True
|
||||||
|
sts = ytplayer_config.get('sts')
|
||||||
if not player_response:
|
if not player_response:
|
||||||
player_response = extract_player_response(args.get('player_response'), video_id)
|
player_response = extract_player_response(args.get('player_response'), video_id)
|
||||||
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
|
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||||
add_dash_mpd_pr(player_response)
|
add_dash_mpd_pr(player_response)
|
||||||
|
# We also try looking in get_video_info since it may contain different dashmpd
|
||||||
|
# URL that points to a DASH manifest with possibly different itag set (some itags
|
||||||
|
# are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
|
||||||
|
# manifest pointed by get_video_info's dashmpd).
|
||||||
|
# The general idea is to take a union of itags of both DASH manifests (for example
|
||||||
|
# video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
|
||||||
|
self.report_video_info_webpage_download(video_id)
|
||||||
|
for el in ('embedded', 'detailpage', 'vevo', ''):
|
||||||
|
query = {
|
||||||
|
'video_id': video_id,
|
||||||
|
'ps': 'default',
|
||||||
|
'eurl': '',
|
||||||
|
'gl': 'US',
|
||||||
|
'hl': 'en',
|
||||||
|
}
|
||||||
|
if el:
|
||||||
|
query['el'] = el
|
||||||
|
if sts:
|
||||||
|
query['sts'] = sts
|
||||||
|
video_info_webpage = self._download_webpage(
|
||||||
|
'%s://www.youtube.com/get_video_info' % proto,
|
||||||
|
video_id, note=False,
|
||||||
|
errnote='unable to download video info webpage',
|
||||||
|
fatal=False, query=query)
|
||||||
|
if not video_info_webpage:
|
||||||
|
continue
|
||||||
|
get_video_info = compat_parse_qs(video_info_webpage)
|
||||||
|
if not player_response:
|
||||||
|
pl_response = get_video_info.get('player_response', [None])[0]
|
||||||
|
player_response = extract_player_response(pl_response, video_id)
|
||||||
|
add_dash_mpd(get_video_info)
|
||||||
|
if view_count is None:
|
||||||
|
view_count = extract_view_count(get_video_info)
|
||||||
|
if not video_info:
|
||||||
|
video_info = get_video_info
|
||||||
|
get_token = extract_token(get_video_info)
|
||||||
|
if get_token:
|
||||||
|
# Different get_video_info requests may report different results, e.g.
|
||||||
|
# some may report video unavailability, but some may serve it without
|
||||||
|
# any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
|
||||||
|
# the original webpage as well as el=info and el=embedded get_video_info
|
||||||
|
# requests report video unavailability due to geo restriction while
|
||||||
|
# el=detailpage succeeds and returns valid data). This is probably
|
||||||
|
# due to YouTube measures against IP ranges of hosting providers.
|
||||||
|
# Working around by preferring the first succeeded video_info containing
|
||||||
|
# the token if no such video_info yet was found.
|
||||||
|
token = extract_token(video_info)
|
||||||
|
if not token:
|
||||||
|
video_info = get_video_info
|
||||||
|
break
|
||||||
|
|
||||||
def extract_unavailable_message():
|
def extract_unavailable_message():
|
||||||
messages = []
|
messages = []
|
||||||
@ -1885,22 +1849,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if messages:
|
if messages:
|
||||||
return '\n'.join(messages)
|
return '\n'.join(messages)
|
||||||
|
|
||||||
if not video_info and not player_response:
|
if not video_info:
|
||||||
unavailable_message = extract_unavailable_message()
|
unavailable_message = extract_unavailable_message()
|
||||||
if not unavailable_message:
|
if not unavailable_message:
|
||||||
unavailable_message = 'Unable to extract video data'
|
unavailable_message = 'Unable to extract video data'
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
|
'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
|
||||||
|
|
||||||
if not isinstance(video_info, dict):
|
|
||||||
video_info = {}
|
|
||||||
|
|
||||||
video_details = try_get(
|
video_details = try_get(
|
||||||
player_response, lambda x: x['videoDetails'], dict) or {}
|
player_response, lambda x: x['videoDetails'], dict) or {}
|
||||||
|
|
||||||
microformat = try_get(
|
|
||||||
player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
|
|
||||||
|
|
||||||
video_title = video_info.get('title', [None])[0] or video_details.get('title')
|
video_title = video_info.get('title', [None])[0] or video_details.get('title')
|
||||||
if not video_title:
|
if not video_title:
|
||||||
self._downloader.report_warning('Unable to extract video title')
|
self._downloader.report_warning('Unable to extract video title')
|
||||||
@ -1947,26 +1905,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
# fields may contain comma as well (see
|
# fields may contain comma as well (see
|
||||||
# https://github.com/ytdl-org/youtube-dl/issues/8536)
|
# https://github.com/ytdl-org/youtube-dl/issues/8536)
|
||||||
feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
|
feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
|
||||||
|
|
||||||
def feed_entry(name):
|
|
||||||
return try_get(feed_data, lambda x: x[name][0], compat_str)
|
|
||||||
|
|
||||||
feed_id = feed_entry('id')
|
|
||||||
if not feed_id:
|
|
||||||
continue
|
|
||||||
feed_title = feed_entry('title')
|
|
||||||
title = video_title
|
|
||||||
if feed_title:
|
|
||||||
title += ' (%s)' % feed_title
|
|
||||||
entries.append({
|
entries.append({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': 'Youtube',
|
'ie_key': 'Youtube',
|
||||||
'url': smuggle_url(
|
'url': smuggle_url(
|
||||||
'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
|
'%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
|
||||||
{'force_singlefeed': True}),
|
{'force_singlefeed': True}),
|
||||||
'title': title,
|
'title': '%s (%s)' % (video_title, feed_data['title'][0]),
|
||||||
})
|
})
|
||||||
feed_ids.append(feed_id)
|
feed_ids.append(feed_data['id'][0])
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
|
'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
|
||||||
% (', '.join(feed_ids), video_id))
|
% (', '.join(feed_ids), video_id))
|
||||||
@ -1978,8 +1925,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
view_count = extract_view_count(video_info)
|
view_count = extract_view_count(video_info)
|
||||||
if view_count is None and video_details:
|
if view_count is None and video_details:
|
||||||
view_count = int_or_none(video_details.get('viewCount'))
|
view_count = int_or_none(video_details.get('viewCount'))
|
||||||
if view_count is None and microformat:
|
|
||||||
view_count = int_or_none(microformat.get('viewCount'))
|
|
||||||
|
|
||||||
if is_live is None:
|
if is_live is None:
|
||||||
is_live = bool_or_none(video_details.get('isLive'))
|
is_live = bool_or_none(video_details.get('isLive'))
|
||||||
@ -2039,12 +1984,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
for fmt in streaming_formats:
|
for fmt in streaming_formats:
|
||||||
if fmt.get('drmFamilies') or fmt.get('drm_families'):
|
if fmt.get('drm_families'):
|
||||||
continue
|
continue
|
||||||
url = url_or_none(fmt.get('url'))
|
url = url_or_none(fmt.get('url'))
|
||||||
|
|
||||||
if not url:
|
if not url:
|
||||||
cipher = fmt.get('cipher') or fmt.get('signatureCipher')
|
cipher = fmt.get('cipher')
|
||||||
if not cipher:
|
if not cipher:
|
||||||
continue
|
continue
|
||||||
url_data = compat_parse_qs(cipher)
|
url_data = compat_parse_qs(cipher)
|
||||||
@ -2095,10 +2040,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
if self._downloader.params.get('verbose'):
|
if self._downloader.params.get('verbose'):
|
||||||
if player_url is None:
|
if player_url is None:
|
||||||
|
player_version = 'unknown'
|
||||||
player_desc = 'unknown'
|
player_desc = 'unknown'
|
||||||
else:
|
else:
|
||||||
player_type, player_version = self._extract_player_info(player_url)
|
if player_url.endswith('swf'):
|
||||||
player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
|
player_version = self._search_regex(
|
||||||
|
r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
|
||||||
|
'flash player', fatal=False)
|
||||||
|
player_desc = 'flash player %s' % player_version
|
||||||
|
else:
|
||||||
|
player_version = self._search_regex(
|
||||||
|
[r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
|
||||||
|
r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
|
||||||
|
player_url,
|
||||||
|
'html5 player', fatal=False)
|
||||||
|
player_desc = 'html5 player %s' % player_version
|
||||||
|
|
||||||
parts_sizes = self._signature_cache_id(encrypted_sig)
|
parts_sizes = self._signature_cache_id(encrypted_sig)
|
||||||
self.to_screen('{%s} signature length %s, %s' %
|
self.to_screen('{%s} signature length %s, %s' %
|
||||||
(format_id, parts_sizes, player_desc))
|
(format_id, parts_sizes, player_desc))
|
||||||
@ -2231,12 +2188,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
video_uploader_id = mobj.group('uploader_id')
|
video_uploader_id = mobj.group('uploader_id')
|
||||||
video_uploader_url = mobj.group('uploader_url')
|
video_uploader_url = mobj.group('uploader_url')
|
||||||
else:
|
else:
|
||||||
owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
|
self._downloader.report_warning('unable to extract uploader nickname')
|
||||||
if owner_profile_url:
|
|
||||||
video_uploader_id = self._search_regex(
|
|
||||||
r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
|
|
||||||
default=None)
|
|
||||||
video_uploader_url = owner_profile_url
|
|
||||||
|
|
||||||
channel_id = (
|
channel_id = (
|
||||||
str_or_none(video_details.get('channelId'))
|
str_or_none(video_details.get('channelId'))
|
||||||
@ -2247,33 +2199,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
video_webpage, 'channel id', default=None, group='id'))
|
video_webpage, 'channel id', default=None, group='id'))
|
||||||
channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
|
channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
|
||||||
|
|
||||||
thumbnails = []
|
# thumbnail image
|
||||||
thumbnails_list = try_get(
|
|
||||||
video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
|
|
||||||
for t in thumbnails_list:
|
|
||||||
if not isinstance(t, dict):
|
|
||||||
continue
|
|
||||||
thumbnail_url = url_or_none(t.get('url'))
|
|
||||||
if not thumbnail_url:
|
|
||||||
continue
|
|
||||||
thumbnails.append({
|
|
||||||
'url': thumbnail_url,
|
|
||||||
'width': int_or_none(t.get('width')),
|
|
||||||
'height': int_or_none(t.get('height')),
|
|
||||||
})
|
|
||||||
|
|
||||||
if not thumbnails:
|
|
||||||
video_thumbnail = None
|
|
||||||
# We try first to get a high quality image:
|
# We try first to get a high quality image:
|
||||||
m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
|
m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
|
||||||
video_webpage, re.DOTALL)
|
video_webpage, re.DOTALL)
|
||||||
if m_thumb is not None:
|
if m_thumb is not None:
|
||||||
video_thumbnail = m_thumb.group(1)
|
video_thumbnail = m_thumb.group(1)
|
||||||
thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
|
elif 'thumbnail_url' not in video_info:
|
||||||
if thumbnail_url:
|
self._downloader.report_warning('unable to extract video thumbnail')
|
||||||
video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
|
video_thumbnail = None
|
||||||
if video_thumbnail:
|
else: # don't panic if we can't find it
|
||||||
thumbnails.append({'url': video_thumbnail})
|
video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
|
||||||
|
|
||||||
# upload date
|
# upload date
|
||||||
upload_date = self._html_search_meta(
|
upload_date = self._html_search_meta(
|
||||||
@ -2283,8 +2219,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
[r'(?s)id="eow-date.*?>(.*?)</span>',
|
[r'(?s)id="eow-date.*?>(.*?)</span>',
|
||||||
r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
|
r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
|
||||||
video_webpage, 'upload date', default=None)
|
video_webpage, 'upload date', default=None)
|
||||||
if not upload_date:
|
|
||||||
upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
|
|
||||||
upload_date = unified_strdate(upload_date)
|
upload_date = unified_strdate(upload_date)
|
||||||
|
|
||||||
video_license = self._html_search_regex(
|
video_license = self._html_search_regex(
|
||||||
@ -2356,21 +2290,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
m_cat_container = self._search_regex(
|
m_cat_container = self._search_regex(
|
||||||
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
|
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
|
||||||
video_webpage, 'categories', default=None)
|
video_webpage, 'categories', default=None)
|
||||||
category = None
|
|
||||||
if m_cat_container:
|
if m_cat_container:
|
||||||
category = self._html_search_regex(
|
category = self._html_search_regex(
|
||||||
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
|
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
|
||||||
default=None)
|
default=None)
|
||||||
if not category:
|
|
||||||
category = try_get(
|
|
||||||
microformat, lambda x: x['category'], compat_str)
|
|
||||||
video_categories = None if category is None else [category]
|
video_categories = None if category is None else [category]
|
||||||
|
else:
|
||||||
|
video_categories = None
|
||||||
|
|
||||||
video_tags = [
|
video_tags = [
|
||||||
unescapeHTML(m.group('content'))
|
unescapeHTML(m.group('content'))
|
||||||
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
|
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
|
||||||
if not video_tags:
|
|
||||||
video_tags = try_get(video_details, lambda x: x['keywords'], list)
|
|
||||||
|
|
||||||
def _extract_count(count_name):
|
def _extract_count(count_name):
|
||||||
return str_to_int(self._search_regex(
|
return str_to_int(self._search_regex(
|
||||||
@ -2421,7 +2351,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
errnote='Unable to download video annotations', fatal=False,
|
errnote='Unable to download video annotations', fatal=False,
|
||||||
data=urlencode_postdata({xsrf_field_name: xsrf_token}))
|
data=urlencode_postdata({xsrf_field_name: xsrf_token}))
|
||||||
|
|
||||||
chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
|
chapters = self._extract_chapters(description_original, video_duration)
|
||||||
|
|
||||||
# Look for the DASH manifest
|
# Look for the DASH manifest
|
||||||
if self._downloader.params.get('youtube_include_dash_manifest', True):
|
if self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||||
@ -2478,6 +2408,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
f['stretched_ratio'] = ratio
|
f['stretched_ratio'] = ratio
|
||||||
|
|
||||||
if not formats:
|
if not formats:
|
||||||
|
token = extract_token(video_info)
|
||||||
|
if not token:
|
||||||
if 'reason' in video_info:
|
if 'reason' in video_info:
|
||||||
if 'The uploader has not made this video available in your country.' in video_info['reason']:
|
if 'The uploader has not made this video available in your country.' in video_info['reason']:
|
||||||
regions_allowed = self._html_search_meta(
|
regions_allowed = self._html_search_meta(
|
||||||
@ -2493,7 +2425,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'YouTube said: %s' % reason,
|
'YouTube said: %s' % reason,
|
||||||
expected=True, video_id=video_id)
|
expected=True, video_id=video_id)
|
||||||
if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
|
else:
|
||||||
|
raise ExtractorError(
|
||||||
|
'"token" parameter not in video info for unknown reason',
|
||||||
|
video_id=video_id)
|
||||||
|
|
||||||
|
if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
|
||||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
@ -2512,7 +2449,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'creator': video_creator or artist,
|
'creator': video_creator or artist,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'alt_title': video_alt_title or track,
|
'alt_title': video_alt_title or track,
|
||||||
'thumbnails': thumbnails,
|
'thumbnail': video_thumbnail,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'categories': video_categories,
|
'categories': video_categories,
|
||||||
'tags': video_tags,
|
'tags': video_tags,
|
||||||
@ -2574,23 +2511,20 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
|||||||
_VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
|
_VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
|
||||||
IE_NAME = 'youtube:playlist'
|
IE_NAME = 'youtube:playlist'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
|
'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
|
'title': 'ytdl test PL',
|
||||||
'uploader': 'Sergey M.',
|
'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
|
||||||
'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
|
|
||||||
'title': 'youtube-dl public playlist',
|
|
||||||
},
|
},
|
||||||
'playlist_count': 1,
|
'playlist_count': 3,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
|
'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
|
'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
|
||||||
'uploader': 'Sergey M.',
|
'title': 'YDL_Empty_List',
|
||||||
'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
|
|
||||||
'title': 'youtube-dl empty playlist',
|
|
||||||
},
|
},
|
||||||
'playlist_count': 0,
|
'playlist_count': 0,
|
||||||
|
'skip': 'This playlist is private',
|
||||||
}, {
|
}, {
|
||||||
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
|
'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
|
||||||
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
|
'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
|
||||||
@ -2600,7 +2534,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
|||||||
'uploader': 'Christiaan008',
|
'uploader': 'Christiaan008',
|
||||||
'uploader_id': 'ChRiStIaAn008',
|
'uploader_id': 'ChRiStIaAn008',
|
||||||
},
|
},
|
||||||
'playlist_count': 96,
|
'playlist_count': 95,
|
||||||
}, {
|
}, {
|
||||||
'note': 'issue #673',
|
'note': 'issue #673',
|
||||||
'url': 'PLBB231211A4F62143',
|
'url': 'PLBB231211A4F62143',
|
||||||
@ -3116,7 +3050,7 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
|
class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
|
||||||
IE_DESC = 'YouTube.com user/channel playlists'
|
IE_DESC = 'YouTube.com user/channel playlists'
|
||||||
_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
|
_VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
|
||||||
IE_NAME = 'youtube:playlists'
|
IE_NAME = 'youtube:playlists'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -3142,14 +3076,11 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
|
|||||||
'title': 'Chem Player',
|
'title': 'Chem Player',
|
||||||
},
|
},
|
||||||
'skip': 'Blocked',
|
'skip': 'Blocked',
|
||||||
}, {
|
|
||||||
'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
|
class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
|
||||||
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
|
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(&list=(?P<plid>[0-9A-Za-z_-]+))?(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?(.*Duration:\s*(?P<duration>([0-1]?[0-9]|2[0-3]):[0-5][0-9]))?'
|
||||||
|
|
||||||
|
|
||||||
class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
|
class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
|
||||||
@ -3291,8 +3222,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
|||||||
more = self._download_json(
|
more = self._download_json(
|
||||||
'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
|
'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
|
||||||
'Downloading page #%s' % page_num,
|
'Downloading page #%s' % page_num,
|
||||||
transform_source=uppercase_escape,
|
transform_source=uppercase_escape)
|
||||||
headers=self._YOUTUBE_CLIENT_HEADERS)
|
|
||||||
content_html = more['content_html']
|
content_html = more['content_html']
|
||||||
more_widget_html = more['load_more_widget_html']
|
more_widget_html = more['load_more_widget_html']
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user