From a72a13dc4a3868abede1c4c49f1bd6256fb35ebc Mon Sep 17 00:00:00 2001 From: siikamiika Date: Mon, 3 Aug 2020 23:54:52 +0300 Subject: [PATCH 01/12] use dl function for subtitles --- youtube_dl/YoutubeDL.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 19370f62b..f9aa91f30 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1805,6 +1805,14 @@ class YoutubeDL(object): self.report_error('Cannot write annotations file: ' + annofn) return + def dl(name, info): + fd = get_suitable_downloader(info, self.params)(self, self.params) + for ph in self._progress_hooks: + fd.add_progress_hook(ph) + if self.params.get('verbose'): + self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) + return fd.download(name, info) + subtitles_are_requested = any([self.params.get('writesubtitles', False), self.params.get('writeautomaticsub')]) @@ -1819,7 +1827,6 @@ class YoutubeDL(object): if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format)) else: - self.to_screen('[info] Writing video subtitles to: ' + sub_filename) if sub_info.get('data') is not None: try: # Use newline='' to prevent conversion of newline characters @@ -1831,10 +1838,9 @@ class YoutubeDL(object): return else: try: - sub_data = ie._request_webpage( - sub_info['url'], info_dict['id'], note=False).read() - with io.open(encodeFilename(sub_filename), 'wb') as subfile: - subfile.write(sub_data) + # TODO does this transfer session...? + # TODO exceptions + dl(sub_filename, sub_info) except (ExtractorError, IOError, OSError, ValueError) as err: self.report_warning('Unable to download subtitle for "%s": %s' % (sub_lang, error_to_compat_str(err))) @@ -1856,14 +1862,6 @@ class YoutubeDL(object): if not self.params.get('skip_download', False): try: - def dl(name, info): - fd = get_suitable_downloader(info, self.params)(self, self.params) - for ph in self._progress_hooks: - fd.add_progress_hook(ph) - if self.params.get('verbose'): - self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) - return fd.download(name, info) - if info_dict.get('requested_formats') is not None: downloaded = [] success = True From 2c4a7e0f68f595c0ee0bc1098e8082ea4b12daf9 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Wed, 5 Aug 2020 01:02:23 +0300 Subject: [PATCH 02/12] support youtube live chat replay --- youtube_dl/downloader/__init__.py | 2 + youtube_dl/downloader/youtube_live_chat.py | 88 ++++++++++++++++++++++ youtube_dl/extractor/youtube.py | 8 ++ 3 files changed, 98 insertions(+) create mode 100644 youtube_dl/downloader/youtube_live_chat.py diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index 2e485df9d..4ae81f516 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -8,6 +8,7 @@ from .rtmp import RtmpFD from .dash import DashSegmentsFD from .rtsp import RtspFD from .ism import IsmFD +from .youtube_live_chat import YoutubeLiveChatReplayFD from .external import ( get_external_downloader, FFmpegFD, @@ -26,6 +27,7 @@ PROTOCOL_MAP = { 'f4m': F4mFD, 'http_dash_segments': DashSegmentsFD, 'ism': IsmFD, + 'youtube_live_chat_replay': YoutubeLiveChatReplayFD, } diff --git a/youtube_dl/downloader/youtube_live_chat.py b/youtube_dl/downloader/youtube_live_chat.py new file mode 100644 index 000000000..64d1d20b2 --- /dev/null +++ b/youtube_dl/downloader/youtube_live_chat.py @@ -0,0 +1,88 @@ +from __future__ import division, unicode_literals + +import re +import json + +from .fragment import FragmentFD + + +class YoutubeLiveChatReplayFD(FragmentFD): + """ Downloads YouTube live chat replays fragment by fragment """ + + FD_NAME = 'youtube_live_chat_replay' + + def real_download(self, filename, info_dict): + video_id = info_dict['video_id'] + self.to_screen('[%s] Downloading live chat' % self.FD_NAME) + + test = self.params.get('test', False) + + ctx = { + 'filename': filename, + 'live': True, + 'total_frags': None, + } + + def dl_fragment(url): + headers = info_dict.get('http_headers', {}) + return self._download_fragment(ctx, url, info_dict, headers) + + def parse_yt_initial_data(data): + raw_json = re.search(b'window\["ytInitialData"\]\s*=\s*(.*);', data).group(1) + return json.loads(raw_json) + + self._prepare_and_start_frag_download(ctx) + + success, raw_fragment = dl_fragment( + 'https://www.youtube.com/watch?v={}'.format(video_id)) + if not success: + return False + data = parse_yt_initial_data(raw_fragment) + continuation_id = data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] + # no data yet but required to call _append_fragment + self._append_fragment(ctx, b'') + + first = True + offset = None + while continuation_id is not None: + data = None + if first: + url = 'https://www.youtube.com/live_chat_replay?continuation={}'.format(continuation_id) + success, raw_fragment = dl_fragment(url) + if not success: + return False + data = parse_yt_initial_data(raw_fragment) + else: + url = ('https://www.youtube.com/live_chat_replay/get_live_chat_replay' + + '?continuation={}'.format(continuation_id) + + '&playerOffsetMs={}'.format(offset - 5000) + + '&hidden=false' + + '&pbj=1') + success, raw_fragment = dl_fragment(url) + if not success: + return False + data = json.loads(raw_fragment)['response'] + + first = False + continuation_id = None + + live_chat_continuation = data['continuationContents']['liveChatContinuation'] + offset = None + processed_fragment = bytearray() + if 'actions' in live_chat_continuation: + for action in live_chat_continuation['actions']: + if 'replayChatItemAction' in action: + replay_chat_item_action = action['replayChatItemAction'] + offset = int(replay_chat_item_action['videoOffsetTimeMsec']) + processed_fragment.extend( + json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') + continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation'] + + self._append_fragment(ctx, processed_fragment) + + if test or offset is None: + break + + self._finish_frag_download(ctx) + + return True diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6ae2e58c1..7ef8d26ac 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1462,6 +1462,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': ext, }) sub_lang_list[lang] = sub_formats + # TODO check that live chat replay actually exists + sub_lang_list['live_chat'] = [ + { + 'video_id': video_id, + 'ext': 'json', + 'protocol': 'youtube_live_chat_replay', + }, + ] if not sub_lang_list: self._downloader.report_warning('video doesn\'t have subtitles') return {} From f671e41ffefa551e72de8717aaa3165aef1157f8 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Wed, 5 Aug 2020 03:30:10 +0300 Subject: [PATCH 03/12] check live chat replay existence --- youtube_dl/YoutubeDL.py | 7 +++--- youtube_dl/extractor/youtube.py | 39 ++++++++++++++++++++++++--------- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f9aa91f30..1b8a938e5 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1838,10 +1838,11 @@ class YoutubeDL(object): return else: try: - # TODO does this transfer session...? - # TODO exceptions dl(sub_filename, sub_info) - except (ExtractorError, IOError, OSError, ValueError) as err: + except ( + ExtractorError, IOError, OSError, ValueError, + compat_urllib_error.URLError, + compat_http_client.HTTPException, socket.error) as err: self.report_warning('Unable to download subtitle for "%s": %s' % (sub_lang, error_to_compat_str(err))) continue diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7ef8d26ac..96f6df647 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1435,7 +1435,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): raise ExtractorError( 'Signature extraction failed: ' + tb, cause=e) - def _get_subtitles(self, video_id, webpage): + def _get_subtitles(self, video_id, webpage, is_live_content): try: subs_doc = self._download_xml( 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, @@ -1462,14 +1462,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': ext, }) sub_lang_list[lang] = sub_formats - # TODO check that live chat replay actually exists - sub_lang_list['live_chat'] = [ - { - 'video_id': video_id, - 'ext': 'json', - 'protocol': 'youtube_live_chat_replay', - }, - ] + if is_live_content: + sub_lang_list['live_chat'] = [ + { + 'video_id': video_id, + 'ext': 'json', + 'protocol': 'youtube_live_chat_replay', + }, + ] if not sub_lang_list: self._downloader.report_warning('video doesn\'t have subtitles') return {} @@ -1493,6 +1493,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return self._parse_json( uppercase_escape(config), video_id, fatal=False) + def _get_yt_initial_data(self, video_id, webpage): + config = self._search_regex( + r'window\["ytInitialData"\]\s*=\s*(.*);', + webpage, 'ytInitialData', default=None) + if config: + return self._parse_json( + uppercase_escape(config), video_id, fatal=False) + def _get_automatic_captions(self, video_id, webpage): """We need the webpage for getting the captions url, pass it as an argument to speed up the process.""" @@ -1993,6 +2001,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if is_live is None: is_live = bool_or_none(video_details.get('isLive')) + has_live_chat_replay = False + is_live_content = bool_or_none(video_details.get('isLiveContent')) + if not is_live and is_live_content: + yt_initial_data = self._get_yt_initial_data(video_id, video_webpage) + try: + yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] + has_live_chat_replay = True + except (KeyError, IndexError): + pass + # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True) @@ -2400,7 +2418,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0]))) # subtitles - video_subtitles = self.extract_subtitles(video_id, video_webpage) + video_subtitles = self.extract_subtitles( + video_id, video_webpage, has_live_chat_replay) automatic_captions = self.extract_automatic_captions(video_id, video_webpage) video_duration = try_get( From d3fd1d8b931f49c77c1e05bb0c22bc6cffd4dd77 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Wed, 5 Aug 2020 03:38:07 +0300 Subject: [PATCH 04/12] run flake8 --- youtube_dl/YoutubeDL.py | 9 ++++----- youtube_dl/downloader/youtube_live_chat.py | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 1b8a938e5..0dc869d56 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1820,7 +1820,6 @@ class YoutubeDL(object): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE subtitles = info_dict['requested_subtitles'] - ie = self.get_info_extractor(info_dict['extractor_key']) for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) @@ -1839,10 +1838,10 @@ class YoutubeDL(object): else: try: dl(sub_filename, sub_info) - except ( - ExtractorError, IOError, OSError, ValueError, - compat_urllib_error.URLError, - compat_http_client.HTTPException, socket.error) as err: + except (ExtractorError, IOError, OSError, ValueError, + compat_urllib_error.URLError, + compat_http_client.HTTPException, + socket.error) as err: self.report_warning('Unable to download subtitle for "%s": %s' % (sub_lang, error_to_compat_str(err))) continue diff --git a/youtube_dl/downloader/youtube_live_chat.py b/youtube_dl/downloader/youtube_live_chat.py index 64d1d20b2..214a37203 100644 --- a/youtube_dl/downloader/youtube_live_chat.py +++ b/youtube_dl/downloader/youtube_live_chat.py @@ -28,7 +28,7 @@ class YoutubeLiveChatReplayFD(FragmentFD): return self._download_fragment(ctx, url, info_dict, headers) def parse_yt_initial_data(data): - raw_json = re.search(b'window\["ytInitialData"\]\s*=\s*(.*);', data).group(1) + raw_json = re.search(rb'window\["ytInitialData"\]\s*=\s*(.*);', data).group(1) return json.loads(raw_json) self._prepare_and_start_frag_download(ctx) From b50548d893e730658c4484f89fa41edaa35db9d2 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Wed, 5 Aug 2020 04:04:36 +0300 Subject: [PATCH 05/12] rename variable --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 96f6df647..b91745d9d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1435,7 +1435,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): raise ExtractorError( 'Signature extraction failed: ' + tb, cause=e) - def _get_subtitles(self, video_id, webpage, is_live_content): + def _get_subtitles(self, video_id, webpage, has_live_chat_replay): try: subs_doc = self._download_xml( 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, @@ -1462,7 +1462,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': ext, }) sub_lang_list[lang] = sub_formats - if is_live_content: + if has_live_chat_replay: sub_lang_list['live_chat'] = [ { 'video_id': video_id, From 8ff9f315b04554878db9405e0bbd7168fb0614f8 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Wed, 5 Aug 2020 04:14:25 +0300 Subject: [PATCH 06/12] attempt to fix syntax error on older python --- youtube_dl/downloader/youtube_live_chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/youtube_live_chat.py b/youtube_dl/downloader/youtube_live_chat.py index 214a37203..e7eb4bbfe 100644 --- a/youtube_dl/downloader/youtube_live_chat.py +++ b/youtube_dl/downloader/youtube_live_chat.py @@ -28,7 +28,7 @@ class YoutubeLiveChatReplayFD(FragmentFD): return self._download_fragment(ctx, url, info_dict, headers) def parse_yt_initial_data(data): - raw_json = re.search(rb'window\["ytInitialData"\]\s*=\s*(.*);', data).group(1) + raw_json = re.search(b'window\\["ytInitialData"\\]\s*=\\s*(.*);', data).group(1) return json.loads(raw_json) self._prepare_and_start_frag_download(ctx) From b8eff9125ac5f9a45ea45d203c924c7dacad60c6 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Wed, 5 Aug 2020 04:19:44 +0300 Subject: [PATCH 07/12] flake8 --- youtube_dl/downloader/youtube_live_chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/youtube_live_chat.py b/youtube_dl/downloader/youtube_live_chat.py index e7eb4bbfe..f7478c336 100644 --- a/youtube_dl/downloader/youtube_live_chat.py +++ b/youtube_dl/downloader/youtube_live_chat.py @@ -28,7 +28,7 @@ class YoutubeLiveChatReplayFD(FragmentFD): return self._download_fragment(ctx, url, info_dict, headers) def parse_yt_initial_data(data): - raw_json = re.search(b'window\\["ytInitialData"\\]\s*=\\s*(.*);', data).group(1) + raw_json = re.search(b'window\\["ytInitialData"\\]\\s*=\\s*(.*);', data).group(1) return json.loads(raw_json) self._prepare_and_start_frag_download(ctx) From 17d14e05f750faf2aa48a6b26e6608d20452f16d Mon Sep 17 00:00:00 2001 From: siikamiika Date: Wed, 5 Aug 2020 23:29:41 +0300 Subject: [PATCH 08/12] fix premiere live chat They have isLiveContent = false so just check if the live chat renderer continuation exists --- youtube_dl/extractor/youtube.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b91745d9d..47cc45910 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2002,13 +2002,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): is_live = bool_or_none(video_details.get('isLive')) has_live_chat_replay = False - is_live_content = bool_or_none(video_details.get('isLiveContent')) - if not is_live and is_live_content: + if not is_live: yt_initial_data = self._get_yt_initial_data(video_id, video_webpage) try: yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] has_live_chat_replay = True - except (KeyError, IndexError): + except (KeyError, IndexError, TypeError): pass # Check for "rental" videos From 13114df673261e98b1faaa7a77745fce1f3c81d0 Mon Sep 17 00:00:00 2001 From: siikamiika Date: Tue, 11 Aug 2020 00:05:32 +0300 Subject: [PATCH 09/12] fix ytInitialData parsing --- youtube_dl/downloader/youtube_live_chat.py | 10 ++++++++-- youtube_dl/extractor/youtube.py | 3 ++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/youtube_dl/downloader/youtube_live_chat.py b/youtube_dl/downloader/youtube_live_chat.py index f7478c336..697e52550 100644 --- a/youtube_dl/downloader/youtube_live_chat.py +++ b/youtube_dl/downloader/youtube_live_chat.py @@ -28,8 +28,14 @@ class YoutubeLiveChatReplayFD(FragmentFD): return self._download_fragment(ctx, url, info_dict, headers) def parse_yt_initial_data(data): - raw_json = re.search(b'window\\["ytInitialData"\\]\\s*=\\s*(.*);', data).group(1) - return json.loads(raw_json) + window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?);' + var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?);' + for patt in window_patt, var_patt: + try: + raw_json = re.search(patt, data).group(1) + return json.loads(raw_json) + except AttributeError: + continue self._prepare_and_start_frag_download(ctx) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 47cc45910..059702c54 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1495,7 +1495,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _get_yt_initial_data(self, video_id, webpage): config = self._search_regex( - r'window\["ytInitialData"\]\s*=\s*(.*);', + (r'window\["ytInitialData"\]\s*=\s*(.*);', + r'var\s+ytInitialData\s*=\s*(.*?);'), webpage, 'ytInitialData', default=None) if config: return self._parse_json( From b8d4a87560e9820fbf63ee2581f26ebd5d4daefd Mon Sep 17 00:00:00 2001 From: siikamiika Date: Tue, 11 Aug 2020 00:13:43 +0300 Subject: [PATCH 10/12] harden regex with lookbehind --- youtube_dl/downloader/youtube_live_chat.py | 4 ++-- youtube_dl/extractor/youtube.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/youtube_dl/downloader/youtube_live_chat.py b/youtube_dl/downloader/youtube_live_chat.py index 697e52550..4932dd9c5 100644 --- a/youtube_dl/downloader/youtube_live_chat.py +++ b/youtube_dl/downloader/youtube_live_chat.py @@ -28,8 +28,8 @@ class YoutubeLiveChatReplayFD(FragmentFD): return self._download_fragment(ctx, url, info_dict, headers) def parse_yt_initial_data(data): - window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?);' - var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?);' + window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?)(?<=});' + var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?)(?<=});' for patt in window_patt, var_patt: try: raw_json = re.search(patt, data).group(1) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 059702c54..865373378 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1495,8 +1495,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _get_yt_initial_data(self, video_id, webpage): config = self._search_regex( - (r'window\["ytInitialData"\]\s*=\s*(.*);', - r'var\s+ytInitialData\s*=\s*(.*?);'), + (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});', + r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'), webpage, 'ytInitialData', default=None) if config: return self._parse_json( From 74e939dae286c47b89e002fae03453a6fc47cd2e Mon Sep 17 00:00:00 2001 From: siikamiika Date: Tue, 8 Sep 2020 00:11:30 +0300 Subject: [PATCH 11/12] handle errors --- youtube_dl/downloader/youtube_live_chat.py | 41 +++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/youtube_dl/downloader/youtube_live_chat.py b/youtube_dl/downloader/youtube_live_chat.py index 4932dd9c5..edd717d4c 100644 --- a/youtube_dl/downloader/youtube_live_chat.py +++ b/youtube_dl/downloader/youtube_live_chat.py @@ -36,15 +36,43 @@ class YoutubeLiveChatReplayFD(FragmentFD): return json.loads(raw_json) except AttributeError: continue + return None + + def get_obj_value(obj, path): + return None + cur = obj + try: + for key in path: + cur = cur[key] + except (KeyError, IndexError, TypeError): + return None + return cur + + def continuation_get_id(data): + continuations = get_obj_value(data, ['continuations']) or [] + for continuation in continuations: + if 'reloadContinuationData' in continuation: + return get_obj_value(continuation, ['reloadContinuationData', 'continuation']) + if 'liveChatReplayContinuationData' in continuation: + return get_obj_value(continuation, ['liveChatReplayContinuationData', 'continuation']) + return None self._prepare_and_start_frag_download(ctx) success, raw_fragment = dl_fragment( 'https://www.youtube.com/watch?v={}'.format(video_id)) if not success: + self.report_error('Video page download unsuccessful.') return False data = parse_yt_initial_data(raw_fragment) - continuation_id = data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] + if data is None: + self.report_error('Unable to parse ytInitialData.') + return False + data = get_obj_value(data, ['contents', 'twoColumnWatchNextResults', 'conversationBar', 'liveChatRenderer']) + if data is None: + self.report_error('Cannot find liveChatRenderer from ytInitialData.') + return False + continuation_id = continuation_get_id(data) # no data yet but required to call _append_fragment self._append_fragment(ctx, b'') @@ -56,6 +84,7 @@ class YoutubeLiveChatReplayFD(FragmentFD): url = 'https://www.youtube.com/live_chat_replay?continuation={}'.format(continuation_id) success, raw_fragment = dl_fragment(url) if not success: + self.report_error('Live chat continuation download unsuccessful.') return False data = parse_yt_initial_data(raw_fragment) else: @@ -66,13 +95,17 @@ class YoutubeLiveChatReplayFD(FragmentFD): + '&pbj=1') success, raw_fragment = dl_fragment(url) if not success: + self.report_error('Live chat continuation download unsuccessful.') return False - data = json.loads(raw_fragment)['response'] + data = get_obj_value(json.loads(raw_fragment), ['response']) first = False continuation_id = None - live_chat_continuation = data['continuationContents']['liveChatContinuation'] + live_chat_continuation = get_obj_value(data, ['continuationContents', 'liveChatContinuation']) + if live_chat_continuation is None: + self.report_error('Cannot find liveChatContinuation.') + return False offset = None processed_fragment = bytearray() if 'actions' in live_chat_continuation: @@ -82,7 +115,7 @@ class YoutubeLiveChatReplayFD(FragmentFD): offset = int(replay_chat_item_action['videoOffsetTimeMsec']) processed_fragment.extend( json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') - continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation'] + continuation_id = continuation_get_id(live_chat_continuation) self._append_fragment(ctx, processed_fragment) From 942fffbfc1f6b04b04374c16b2f1dad4ce0a56bf Mon Sep 17 00:00:00 2001 From: siikamiika Date: Tue, 8 Sep 2020 00:27:04 +0300 Subject: [PATCH 12/12] remove debug code --- youtube_dl/downloader/youtube_live_chat.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/downloader/youtube_live_chat.py b/youtube_dl/downloader/youtube_live_chat.py index edd717d4c..d56cc22e3 100644 --- a/youtube_dl/downloader/youtube_live_chat.py +++ b/youtube_dl/downloader/youtube_live_chat.py @@ -39,7 +39,6 @@ class YoutubeLiveChatReplayFD(FragmentFD): return None def get_obj_value(obj, path): - return None cur = obj try: for key in path: