From 786552faf119f679f5de74426ba0a8baf68bf6ac Mon Sep 17 00:00:00 2001 From: lyngai Date: Mon, 18 Mar 2019 19:27:42 +0800 Subject: [PATCH 1/2] add paged video support for bilibili --- youtube_dl/extractor/bilibili.py | 47 ++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 3746671d3..1faae6f65 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -23,9 +23,41 @@ from ..utils import ( class BiliBiliIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P\d+)/play#)(?P\d+)' + _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P\d+)/play#)(?P\d+)(?:/?\?p=(?P\d+))?' _TESTS = [{ + 'url': 'https://www.bilibili.com/video/av41213189?p=1', + 'md5': '166c3e684970fbb4f834f24ddd19b275', + 'info_dict': { + 'id': '41213189_p1', + 'cid': '72383807', + 'ext': 'flv', + 'title': '【春晚鬼畜】宋丹丹:我就是念诗女王!【改革春风吹进门】_p1', + 'description': 'md5:a29fb90e0aff106d062a38658b0b75e2', + 'duration': 152.024, + 'timestamp': 1548014429, + 'upload_date': '20190120', + 'thumbnail': r're:^https?://.+\.jpg', + 'uploader': '吃素的狮子', + 'uploader_id': '808171', + }, + }, { + 'url': 'https://www.bilibili.com/video/av41213189?p=2', + 'md5': 'bda0939f327f2ead942e89d7f028ecc3', + 'info_dict': { + 'id': '41213189_p2', + 'cid': '72387898', + 'ext': 'flv', + 'title': '【春晚鬼畜】宋丹丹:我就是念诗女王!【改革春风吹进门】_p2', + 'description': 'md5:a29fb90e0aff106d062a38658b0b75e2', + 'duration': 152.024, + 'timestamp': 1548014429, + 'upload_date': '20190120', + 'thumbnail': r're:^https?://.+\.jpg', + 'uploader': '吃素的狮子', + 'uploader_id': '808171', + }, + }, { 'url': 'http://www.bilibili.tv/video/av1074402/', 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788', 'info_dict': { @@ -110,16 +142,20 @@ class BiliBiliIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') anime_id = mobj.group('anime_id') + page_id = mobj.group('page') webpage = self._download_webpage(url, video_id) if 'anime/' not in url: cid = self._search_regex( + r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + str(page_id), webpage, 'cid', + default=None + ) or self._search_regex( r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid', default=None ) or compat_parse_qs(self._search_regex( [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', - r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)', - r']+src="https://secure\.bilibili\.com/secure,([^"]+)"'], + r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)', + r']+src="https://secure\.bilibili\.com/secure,([^"]+)"'], webpage, 'player parameters'))['cid'][0] else: if 'no_bangumi_tip' not in smuggled_data: @@ -193,7 +229,7 @@ class BiliBiliIE(InfoExtractor): title = self._html_search_regex( (']+\btitle=(["\'])(?P(?:(?!\1).)+)\1', '(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title', - group='title') + group='title') + ('_p' + str(page_id) if page_id is not None else '') description = self._html_search_meta('description', webpage) timestamp = unified_timestamp(self._html_search_regex( r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', @@ -203,7 +239,8 @@ class BiliBiliIE(InfoExtractor): # TODO 'view_count' requires deobfuscating Javascript info = { - 'id': video_id, + 'id': video_id if page_id is None else str(video_id) + '_p' + str(page_id), + 'cid': cid, 'title': title, 'description': description, 'timestamp': timestamp, From 36fbfef616fc6c44b77f97c42e7a6e75a65aa8a0 Mon Sep 17 00:00:00 2001 From: lyngai <wlxjust@gmail.com> Date: Mon, 18 Mar 2019 19:37:32 +0800 Subject: [PATCH 2/2] code check with flake8 and fix --- youtube_dl/extractor/bilibili.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 1faae6f65..5eec78a7e 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -154,8 +154,8 @@ class BiliBiliIE(InfoExtractor): default=None ) or compat_parse_qs(self._search_regex( [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', - r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)', - r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], + r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)', + r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], webpage, 'player parameters'))['cid'][0] else: if 'no_bangumi_tip' not in smuggled_data: