From 826bdd8e44773098183517b441b14a82bbf7ee7f Mon Sep 17 00:00:00 2001 From: Jean-Nicolas Date: Fri, 9 Sep 2016 15:49:43 -0400 Subject: [PATCH 01/55] Added tvanouvelles.py !WARNING! still being built. --- youtube_dl/extractor/tvanouvelles.py | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 youtube_dl/extractor/tvanouvelles.py diff --git a/youtube_dl/extractor/tvanouvelles.py b/youtube_dl/extractor/tvanouvelles.py new file mode 100644 index 000000000..55a6f0ef1 --- /dev/null +++ b/youtube_dl/extractor/tvanouvelles.py @@ -0,0 +1,30 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class TVANouvellesIE(InfoExtractor): + _VALID_URL = r'https?://www\.tvanouvelles\.com/.*?' + + _TEST = { + 'url': 'http://www.tvanouvelles.ca/videos/5117035533001', + 'info_dict': { + 'id': '3792260579001', + 'ext': 'mp4', + 'title': 'title', + 'description': 'description', + 'uploader_id': '1741764581', + 'timestamp': 1411116829, + 'upload_date': '20140919', + }, + 'add_ie': ['BrightcoveNew'], + 'skip': 'Not accessible from Travis CI server', + } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1741764581/default_default/index.html?videoId=%s' + + def _real_extract(self, url): + program_name = self._match_id(url) + webpage = self._download_webpage(url, program_name) + brightcove_id = self._search_regex( + r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id') + return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) From e32c651baa224111808d0c706c93f8148bbe9bd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 9 Sep 2016 22:16:21 +0700 Subject: [PATCH 02/55] [canvas] Add support for een.be (Closes #10605) --- youtube_dl/extractor/canvas.py | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py index ec6d24d96..ef0691dcd 100644 --- a/youtube_dl/extractor/canvas.py +++ b/youtube_dl/extractor/canvas.py @@ -1,11 +1,13 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import float_or_none class CanvasIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?canvas\.be/video/(?:[^/]+/)*(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?(?Pcanvas|een)\.be/(?:[^/]+/)*(?P[^/?#&]+)' _TESTS = [{ 'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week', 'md5': 'ea838375a547ac787d4064d8c7860a6c', @@ -38,22 +40,42 @@ class CanvasIE(InfoExtractor): 'params': { 'skip_download': True, } + }, { + 'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles', + 'info_dict': { + 'id': 'mz-ast-11a587f8-b921-4266-82e2-0bce3e80d07f', + 'display_id': 'herbekijk-sorry-voor-alles', + 'ext': 'mp4', + 'title': 'Herbekijk Sorry voor alles', + 'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3', + 'thumbnail': 're:^https?://.*\.jpg$', + 'duration': 3788.06, + }, + 'params': { + 'skip_download': True, + } + }, { + 'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend', + 'only_matching': True, }] def _real_extract(self, url): - display_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + site_id, display_id = mobj.group('site_id'), mobj.group('id') webpage = self._download_webpage(url, display_id) - title = self._search_regex( + title = (self._search_regex( r']+class="video__body__header__title"[^>]*>(.+?)', - webpage, 'title', default=None) or self._og_search_title(webpage) + webpage, 'title', default=None) or self._og_search_title( + webpage)).strip() video_id = self._html_search_regex( r'data-video=(["\'])(?P.+?)\1', webpage, 'video id', group='id') data = self._download_json( - 'https://mediazone.vrt.be/api/v1/canvas/assets/%s' % video_id, display_id) + 'https://mediazone.vrt.be/api/v1/%s/assets/%s' + % (site_id, video_id), display_id) formats = [] for target in data['targetUrls']: From c8ef47c6e3b016748e433e58f385de58358f42b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 9 Sep 2016 23:20:45 +0700 Subject: [PATCH 03/55] [ketnet] Add extractor (Closes #10343) --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/ketnet.py | 52 ++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 youtube_dl/extractor/ketnet.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b7b630e9d..38dc33674 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -407,6 +407,7 @@ from .kankan import KankanIE from .karaoketv import KaraoketvIE from .karrierevideos import KarriereVideosIE from .keezmovies import KeezMoviesIE +from .ketnet import KetnetIE from .khanacademy import KhanAcademyIE from .kickstarter import KickStarterIE from .keek import KeekIE diff --git a/youtube_dl/extractor/ketnet.py b/youtube_dl/extractor/ketnet.py new file mode 100644 index 000000000..aaf3f807a --- /dev/null +++ b/youtube_dl/extractor/ketnet.py @@ -0,0 +1,52 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class KetnetIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P[^/?#&]+)' + _TESTS = [{ + 'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes', + 'md5': 'd907f7b1814ef0fa285c0475d9994ed7', + 'info_dict': { + 'id': 'zomerse-filmpjes', + 'ext': 'mp4', + 'title': 'Gluur mee op de filmset en op Pennenzakkenrock', + 'description': 'Gluur mee met Ghost Rockers op de filmset', + 'thumbnail': 're:^https?://.*\.jpg$', + } + }, { + 'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016', + 'only_matching': True, + }, { + 'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + config = self._parse_json( + self._search_regex( + r'(?s)playerConfig\s*=\s*({.+?})\s*;', webpage, + 'player config'), + video_id) + + title = config['title'] + + formats = self._extract_m3u8_formats( + config['source']['hls'], video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': config.get('description'), + 'thumbnail': config.get('image'), + 'series': config.get('program'), + 'episode': config.get('episode'), + 'formats': formats, + } From dff32d0be6af0127d99040b5deb6b35f8ad696ad Mon Sep 17 00:00:00 2001 From: Jean-Nicolas Boulay Date: Sat, 10 Sep 2016 19:59:56 -0400 Subject: [PATCH 04/55] Added TVA Nouvelles to extractors.py. Also added the regex for TVA Nouvelles and added the attribute to get the video id. --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tvanouvelles.py | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 38dc33674..b41035c23 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -920,6 +920,7 @@ from .tv2 import ( ) from .tv3 import TV3IE from .tv4 import TV4IE +from .tvanouvelles import TVANouvellesIE from .tvc import ( TVCIE, TVCArticleIE, diff --git a/youtube_dl/extractor/tvanouvelles.py b/youtube_dl/extractor/tvanouvelles.py index 55a6f0ef1..50ab815fd 100644 --- a/youtube_dl/extractor/tvanouvelles.py +++ b/youtube_dl/extractor/tvanouvelles.py @@ -1,10 +1,11 @@ from __future__ import unicode_literals from .common import InfoExtractor +from .youtube import YoutubeIE class TVANouvellesIE(InfoExtractor): - _VALID_URL = r'https?://www\.tvanouvelles\.com/.*?' + _VALID_URL = r'https?://(www\.|)tvanouvelles\.(ca|com|qc)/.*.?/(?P[^/]+)' _TEST = { 'url': 'http://www.tvanouvelles.ca/videos/5117035533001', @@ -18,7 +19,7 @@ class TVANouvellesIE(InfoExtractor): 'upload_date': '20140919', }, 'add_ie': ['BrightcoveNew'], - 'skip': 'Not accessible from Travis CI server', + } BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1741764581/default_default/index.html?videoId=%s' @@ -26,5 +27,5 @@ class TVANouvellesIE(InfoExtractor): program_name = self._match_id(url) webpage = self._download_webpage(url, program_name) brightcove_id = self._search_regex( - r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id') + r'data-video-id\=(.+[0-9]?)', webpage, 'brightcove id') return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) From f0960002164d4c727f89e33f1c98492ee8b67014 Mon Sep 17 00:00:00 2001 From: Jean-Nicolas Boulay Date: Sat, 10 Sep 2016 20:51:53 -0400 Subject: [PATCH 05/55] Updated the TEST values to remove errors from test. Also removed the youtube import at the top. --- youtube_dl/extractor/tvanouvelles.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/tvanouvelles.py b/youtube_dl/extractor/tvanouvelles.py index 50ab815fd..64b627c69 100644 --- a/youtube_dl/extractor/tvanouvelles.py +++ b/youtube_dl/extractor/tvanouvelles.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from .youtube import YoutubeIE class TVANouvellesIE(InfoExtractor): @@ -10,13 +9,13 @@ class TVANouvellesIE(InfoExtractor): _TEST = { 'url': 'http://www.tvanouvelles.ca/videos/5117035533001', 'info_dict': { - 'id': '3792260579001', + 'id': '5117035533001', 'ext': 'mp4', - 'title': 'title', - 'description': 'description', + 'title': 'L\u2019industrie du taxi d\xe9nonce l\u2019entente entre Qu\xe9bec et Uber: explications', + 'description': 'L\u2019industrie du taxi a unanimement a d\xe9nonc\xe9 l\u2019entente avec le gouvernement du Qu\xe9bec qui permet \xe0 l\u2019entreprise de covoiturage Uber de faire des affaires l\xe9galement dans le cadre d\u2019un projet pilote d\u2019un an.', 'uploader_id': '1741764581', - 'timestamp': 1411116829, - 'upload_date': '20140919', + 'timestamp': 1473352030, + 'upload_date': '20160908', }, 'add_ie': ['BrightcoveNew'], From 2e7dfbea784ed2f22f48f2fd3910ba51dd9f57f9 Mon Sep 17 00:00:00 2001 From: Jean-Nicolas Boulay Date: Sat, 10 Sep 2016 21:00:17 -0400 Subject: [PATCH 06/55] [tvanouvelles] Add new extractor --- youtube_dl/extractor/tvanouvelles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvanouvelles.py b/youtube_dl/extractor/tvanouvelles.py index 64b627c69..6c522f3dd 100644 --- a/youtube_dl/extractor/tvanouvelles.py +++ b/youtube_dl/extractor/tvanouvelles.py @@ -18,7 +18,7 @@ class TVANouvellesIE(InfoExtractor): 'upload_date': '20160908', }, 'add_ie': ['BrightcoveNew'], - + 'skip': 'Not accessible from Travis CI server', } BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1741764581/default_default/index.html?videoId=%s' From 1b26315cc0650d8eb664594b241c540d351b6051 Mon Sep 17 00:00:00 2001 From: Jakub Adam Wieczorek Date: Thu, 25 Aug 2016 23:04:59 +0200 Subject: [PATCH 07/55] [polskieradio] Add support for downloading whole programmes. This extends the Polskie Radio (the Polish national radio) extractor to enable the user to download all the broadcasts of a single programme. --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/polskieradio.py | 79 ++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b41035c23..dae8cc56b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -671,7 +671,7 @@ from .pluralsight import ( ) from .podomatic import PodomaticIE from .pokemon import PokemonIE -from .polskieradio import PolskieRadioIE +from .polskieradio import PolskieRadioIE, PolskieRadioProgrammeIE from .porn91 import Porn91IE from .porncom import PornComIE from .pornhd import PornHdIE diff --git a/youtube_dl/extractor/polskieradio.py b/youtube_dl/extractor/polskieradio.py index f559b899f..c51d3d9be 100644 --- a/youtube_dl/extractor/polskieradio.py +++ b/youtube_dl/extractor/polskieradio.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..compat import ( compat_str, compat_urllib_parse_unquote, + compat_urlparse ) from ..utils import ( int_or_none, @@ -15,6 +16,84 @@ from ..utils import ( ) +class PolskieRadioProgrammeIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(,[^/]+)?/(?P\d+)' + _TESTS = [{ + 'url': 'http://www.polskieradio.pl/7/5102,HISTORIA-ZYWA', + 'info_dict': { + 'id': '5102', + 'title': 'HISTORIA ŻYWA', + }, + 'playlist_mincount': 34, + }, { + 'url': 'http://www.polskieradio.pl/7/4807', + 'info_dict': { + 'id': '4807', + 'title': 'Vademecum 1050. rocznicy Chrztu Polski' + }, + 'playlist_mincount': 5 + }, { + 'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source', + 'only_matching': True + }, { + 'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow', + 'info_dict': { + 'id': '4143', + 'title': 'Kierunek Kraków', + }, + 'playlist_mincount': 61 + }, { + 'url': 'http://www.polskieradio.pl/7,Jedynka/5102,HISTORIA-ZYWA', + 'only_matching': True + }] + + def _get_entries_from_page_content(self, base_url, content): + entries = [] + + articles = re.findall( + r'
\s+', + content) + for article_id, article_url, _, article_title in articles: + resolved_article_url = compat_urlparse.urljoin(base_url, article_url) + entries.append(self.url_result( + resolved_article_url, + ie='PolskieRadio', + video_id=article_id, + video_title=article_title)) + + return entries + + @classmethod + def suitable(cls, url): + return False if PolskieRadioIE.suitable(url) else super(PolskieRadioProgrammeIE, cls).suitable(url) + + def _real_extract(self, url): + programme_id = self._match_id(url) + webpage = self._download_webpage(url, programme_id) + + title = self._html_search_regex( + r'(.+?)', + webpage, 'title', fatal=False) + description = None + + entries = self._get_entries_from_page_content(url, webpage) + + pages = re.findall(r' 1: + page_url_root = next(url for _, url, _ in pages if len(url) > 0) + for page_number in range(2, page_count + 1): + page_url = page_url_root + str(page_number) + resolved_page_url = compat_urlparse.urljoin(url, page_url) + page_content = self._download_webpage( + resolved_page_url, programme_id, + note="Downloading page number %d" % page_number) + entries.extend(self._get_entries_from_page_content(url, page_content)) + + return self.playlist_result(entries, programme_id, title, description) + + class PolskieRadioIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P[0-9]+)' _TESTS = [{ From f8d12ef59da5defedaaafb6d0d1b66eb62281def Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sat, 10 Sep 2016 20:09:09 +0800 Subject: [PATCH 08/55] [newgrounds] Fix uploader extraction Closes #10584 Also change test URLs to HTTPS, as proposed by @stepshal in #10593. Closes #10593 --- ChangeLog | 6 ++++++ youtube_dl/extractor/newgrounds.py | 23 ++++++++--------------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/ChangeLog b/ChangeLog index d84f447ba..fafe445cb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors +* [newgrounds] Fix uploader extraction (#10584) + + version 2016.09.08 Extractors diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dl/extractor/newgrounds.py index 705940323..9bea610c8 100644 --- a/youtube_dl/extractor/newgrounds.py +++ b/youtube_dl/extractor/newgrounds.py @@ -1,15 +1,12 @@ from __future__ import unicode_literals -import json -import re - from .common import InfoExtractor class NewgroundsIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P[0-9]+)' _TESTS = [{ - 'url': 'http://www.newgrounds.com/audio/listen/549479', + 'url': 'https://www.newgrounds.com/audio/listen/549479', 'md5': 'fe6033d297591288fa1c1f780386f07a', 'info_dict': { 'id': '549479', @@ -18,7 +15,7 @@ class NewgroundsIE(InfoExtractor): 'uploader': 'Burn7', } }, { - 'url': 'http://www.newgrounds.com/portal/view/673111', + 'url': 'https://www.newgrounds.com/portal/view/673111', 'md5': '3394735822aab2478c31b1004fe5e5bc', 'info_dict': { 'id': '673111', @@ -29,24 +26,20 @@ class NewgroundsIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - music_id = mobj.group('id') - webpage = self._download_webpage(url, music_id) + media_id = self._match_id(url) + webpage = self._download_webpage(url, media_id) title = self._html_search_regex( r'([^>]+)', webpage, 'title') uploader = self._html_search_regex( - [r',"artist":"([^"]+)",', r'[\'"]owner[\'"]\s*:\s*[\'"]([^\'"]+)[\'"],'], - webpage, 'uploader') + r'Author\s*]+>([^<]+)', webpage, 'uploader', fatal=False) - music_url_json_string = self._html_search_regex( - r'({"url":"[^"]+"),', webpage, 'music url') + '}' - music_url_json = json.loads(music_url_json_string) - music_url = music_url_json['url'] + music_url = self._parse_json(self._search_regex( + r'"url":("[^"]+"),', webpage, ''), media_id) return { - 'id': music_id, + 'id': media_id, 'title': title, 'url': music_url, 'uploader': uploader, From 58d8c43f358d373a3e898c6463db885f9bf9d8b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 10 Sep 2016 20:46:45 +0700 Subject: [PATCH 09/55] [canalplus] Add support for c8.fr (Closes #10577) --- youtube_dl/extractor/canalplus.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py index 61463f249..69e8f4f57 100644 --- a/youtube_dl/extractor/canalplus.py +++ b/youtube_dl/extractor/canalplus.py @@ -23,6 +23,7 @@ class CanalplusIE(InfoExtractor): (?:(?:www|m)\.)?canalplus\.fr| (?:www\.)?piwiplus\.fr| (?:www\.)?d8\.tv| + (?:www\.)?c8\.fr| (?:www\.)?d17\.tv| (?:www\.)?itele\.fr )/(?:(?:[^/]+/)*(?P[^/?#&]+))?(?:\?.*\bvid=(?P\d+))?| @@ -35,6 +36,7 @@ class CanalplusIE(InfoExtractor): 'canalplus': 'cplus', 'piwiplus': 'teletoon', 'd8': 'd8', + 'c8': 'd8', 'd17': 'd17', 'itele': 'itele', } From b8fdab23de5e29ff356748737b38f3d4569b96e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 10 Sep 2016 22:01:49 +0700 Subject: [PATCH 10/55] [polskieradio:category] Improve extraction --- youtube_dl/extractor/extractors.py | 5 +- youtube_dl/extractor/polskieradio.py | 158 ++++++++++++++------------- 2 files changed, 84 insertions(+), 79 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dae8cc56b..81523d037 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -671,7 +671,10 @@ from .pluralsight import ( ) from .podomatic import PodomaticIE from .pokemon import PokemonIE -from .polskieradio import PolskieRadioIE, PolskieRadioProgrammeIE +from .polskieradio import ( + PolskieRadioIE, + PolskieRadioCategoryIE, +) from .porn91 import Porn91IE from .porncom import PornComIE from .pornhd import PornHdIE diff --git a/youtube_dl/extractor/polskieradio.py b/youtube_dl/extractor/polskieradio.py index c51d3d9be..5ff173774 100644 --- a/youtube_dl/extractor/polskieradio.py +++ b/youtube_dl/extractor/polskieradio.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor @@ -10,90 +11,13 @@ from ..compat import ( compat_urlparse ) from ..utils import ( + extract_attributes, int_or_none, strip_or_none, unified_timestamp, ) -class PolskieRadioProgrammeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(,[^/]+)?/(?P\d+)' - _TESTS = [{ - 'url': 'http://www.polskieradio.pl/7/5102,HISTORIA-ZYWA', - 'info_dict': { - 'id': '5102', - 'title': 'HISTORIA ŻYWA', - }, - 'playlist_mincount': 34, - }, { - 'url': 'http://www.polskieradio.pl/7/4807', - 'info_dict': { - 'id': '4807', - 'title': 'Vademecum 1050. rocznicy Chrztu Polski' - }, - 'playlist_mincount': 5 - }, { - 'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source', - 'only_matching': True - }, { - 'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow', - 'info_dict': { - 'id': '4143', - 'title': 'Kierunek Kraków', - }, - 'playlist_mincount': 61 - }, { - 'url': 'http://www.polskieradio.pl/7,Jedynka/5102,HISTORIA-ZYWA', - 'only_matching': True - }] - - def _get_entries_from_page_content(self, base_url, content): - entries = [] - - articles = re.findall( - r'
\s+', - content) - for article_id, article_url, _, article_title in articles: - resolved_article_url = compat_urlparse.urljoin(base_url, article_url) - entries.append(self.url_result( - resolved_article_url, - ie='PolskieRadio', - video_id=article_id, - video_title=article_title)) - - return entries - - @classmethod - def suitable(cls, url): - return False if PolskieRadioIE.suitable(url) else super(PolskieRadioProgrammeIE, cls).suitable(url) - - def _real_extract(self, url): - programme_id = self._match_id(url) - webpage = self._download_webpage(url, programme_id) - - title = self._html_search_regex( - r'(.+?)', - webpage, 'title', fatal=False) - description = None - - entries = self._get_entries_from_page_content(url, webpage) - - pages = re.findall(r' 1: - page_url_root = next(url for _, url, _ in pages if len(url) > 0) - for page_number in range(2, page_count + 1): - page_url = page_url_root + str(page_number) - resolved_page_url = compat_urlparse.urljoin(url, page_url) - page_content = self._download_webpage( - resolved_page_url, programme_id, - note="Downloading page number %d" % page_number) - entries.extend(self._get_entries_from_page_content(url, page_content)) - - return self.playlist_result(entries, programme_id, title, description) - - class PolskieRadioIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P[0-9]+)' _TESTS = [{ @@ -176,3 +100,81 @@ class PolskieRadioIE(InfoExtractor): description = strip_or_none(self._og_search_description(webpage)) return self.playlist_result(entries, playlist_id, title, description) + + +class PolskieRadioCategoryIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+(?:,[^/]+)?/(?P\d+)' + _TESTS = [{ + 'url': 'http://www.polskieradio.pl/7/5102,HISTORIA-ZYWA', + 'info_dict': { + 'id': '5102', + 'title': 'HISTORIA ŻYWA', + }, + 'playlist_mincount': 38, + }, { + 'url': 'http://www.polskieradio.pl/7/4807', + 'info_dict': { + 'id': '4807', + 'title': 'Vademecum 1050. rocznicy Chrztu Polski' + }, + 'playlist_mincount': 5 + }, { + 'url': 'http://www.polskieradio.pl/7/129,Sygnaly-dnia?ref=source', + 'only_matching': True + }, { + 'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow', + 'info_dict': { + 'id': '4143', + 'title': 'Kierunek Kraków', + }, + 'playlist_mincount': 61 + }, { + 'url': 'http://www.polskieradio.pl/10,czworka/214,muzyka', + 'info_dict': { + 'id': '214', + 'title': 'Muzyka', + }, + 'playlist_mincount': 61 + }, { + 'url': 'http://www.polskieradio.pl/7,Jedynka/5102,HISTORIA-ZYWA', + 'only_matching': True, + }, { + 'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka', + 'only_matching': True, + }] + + @classmethod + def suitable(cls, url): + return False if PolskieRadioIE.suitable(url) else super(PolskieRadioCategoryIE, cls).suitable(url) + + def _entries(self, url, page, category_id): + content = page + for page_num in itertools.count(2): + for a_entry, entry_id in re.findall( + r'(?s)]+>.*?(]+href=["\']/\d+/\d+/Artykul/(\d+)[^>]+>).*?
', + content): + entry = extract_attributes(a_entry) + href = entry.get('href') + if not href: + continue + yield self.url_result( + compat_urlparse.urljoin(url, href), PolskieRadioIE.ie_key(), + entry_id, entry.get('title')) + mobj = re.search( + r']+class=["\']next["\'][^>]*>\s*]+href=(["\'])(?P(?:(?!\1).)+)\1', + content) + if not mobj: + break + next_url = compat_urlparse.urljoin(url, mobj.group('url')) + content = self._download_webpage( + next_url, category_id, 'Downloading page %s' % page_num) + + def _real_extract(self, url): + category_id = self._match_id(url) + webpage = self._download_webpage(url, category_id) + title = self._html_search_regex( + r'([^<]+) - [^<]+ - [^<]+', + webpage, 'title', fatal=False) + return self.playlist_result( + self._entries(url, webpage, category_id), + category_id, title) From 3fd40a7ece154992762c3af1b944f9984597abf1 Mon Sep 17 00:00:00 2001 From: Scott Leggett Date: Mon, 5 Sep 2016 22:41:08 +1000 Subject: [PATCH 11/55] [9now] Fix extraction --- youtube_dl/extractor/ninenow.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dl/extractor/ninenow.py index faa577237..907b42609 100644 --- a/youtube_dl/extractor/ninenow.py +++ b/youtube_dl/extractor/ninenow.py @@ -44,7 +44,14 @@ class NineNowIE(InfoExtractor): page_data = self._parse_json(self._search_regex( r'window\.__data\s*=\s*({.*?});', webpage, 'page data'), display_id) - common_data = page_data.get('episode', {}).get('episode') or page_data.get('clip', {}).get('clip') + current_key = ( + page_data.get('episode', {}).get('currentEpisodeKey') or + page_data.get('clip', {}).get('currentClipKey') + ) + common_data = ( + page_data.get('episode', {}).get('episodeCache', {}).get(current_key, {}).get('episode') or + page_data.get('clip', {}).get('clipCache', {}).get(current_key, {}).get('clip') + ) video_data = common_data['video'] if video_data.get('drm'): From 86c50a9b7a5643ca5abdeecaf1aadc02bc2fec58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 00:42:13 +0700 Subject: [PATCH 12/55] [9now] Improve video data extraction (Closes #10561) --- youtube_dl/extractor/ninenow.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dl/extractor/ninenow.py index 907b42609..351bea7ba 100644 --- a/youtube_dl/extractor/ninenow.py +++ b/youtube_dl/extractor/ninenow.py @@ -44,14 +44,20 @@ class NineNowIE(InfoExtractor): page_data = self._parse_json(self._search_regex( r'window\.__data\s*=\s*({.*?});', webpage, 'page data'), display_id) - current_key = ( - page_data.get('episode', {}).get('currentEpisodeKey') or - page_data.get('clip', {}).get('currentClipKey') - ) - common_data = ( - page_data.get('episode', {}).get('episodeCache', {}).get(current_key, {}).get('episode') or - page_data.get('clip', {}).get('clipCache', {}).get(current_key, {}).get('clip') - ) + + for kind in ('episode', 'clip'): + current_key = page_data.get(kind, {}).get( + 'current%sKey' % kind.capitalize()) + if not current_key: + continue + cache = page_data.get(kind, {}).get('%sCache' % kind, {}) + if not cache: + continue + common_data = (cache.get(current_key) or list(cache.values())[0])[kind] + break + else: + raise ExtractorError('Unable to find video data') + video_data = common_data['video'] if video_data.get('drm'): From 7005252fbd2e09fddeca03ce9b9b9b35e3dd1871 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 01:27:20 +0700 Subject: [PATCH 13/55] [lrt] Fix audio extraction (Closes #10566) --- youtube_dl/extractor/lrt.py | 46 ++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/lrt.py b/youtube_dl/extractor/lrt.py index 1072405b3..f5c997ef4 100644 --- a/youtube_dl/extractor/lrt.py +++ b/youtube_dl/extractor/lrt.py @@ -1,8 +1,11 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( + determine_ext, int_or_none, parse_duration, remove_end, @@ -12,8 +15,10 @@ from ..utils import ( class LRTIE(InfoExtractor): IE_NAME = 'lrt.lt' _VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P[0-9]+)' - _TEST = { + _TESTS = [{ + # m3u8 download 'url': 'http://www.lrt.lt/mediateka/irasas/54391/', + 'md5': 'fe44cf7e4ab3198055f2c598fc175cb0', 'info_dict': { 'id': '54391', 'ext': 'mp4', @@ -23,20 +28,45 @@ class LRTIE(InfoExtractor): 'view_count': int, 'like_count': int, }, - 'params': { - 'skip_download': True, # m3u8 download + }, { + # direct mp3 download + 'url': 'http://www.lrt.lt/mediateka/irasas/1013074524/', + 'md5': '389da8ca3cad0f51d12bed0c844f6a0a', + 'info_dict': { + 'id': '1013074524', + 'ext': 'mp3', + 'title': 'Kita tema 2016-09-05 15:05', + 'description': 'md5:1b295a8fc7219ed0d543fc228c931fb5', + 'duration': 3008, + 'view_count': int, + 'like_count': int, }, - } + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = remove_end(self._og_search_title(webpage), ' - LRT') - m3u8_url = self._search_regex( - r'file\s*:\s*(["\'])(?P.+?)\1\s*\+\s*location\.hash\.substring\(1\)', - webpage, 'm3u8 url', group='url') - formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') + + formats = [] + for _, file_url in re.findall( + r'file\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage): + ext = determine_ext(file_url) + if ext not in ('m3u8', 'mp3'): + continue + # mp3 served as m3u8 produces stuttered media file + if ext == 'm3u8' and '.mp3' in file_url: + continue + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + file_url, video_id, 'mp4', entry_protocol='m3u8_native', + fatal=False)) + elif ext == 'mp3': + formats.append({ + 'url': file_url, + 'vcodec': 'none', + }) self._sort_formats(formats) thumbnail = self._og_search_thumbnail(webpage) From 32572e7f6457bca3a2558574413dca8723f9c53e Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sat, 10 Sep 2016 19:43:09 +0100 Subject: [PATCH 14/55] [tfo] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/tfo.py | 53 ++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 youtube_dl/extractor/tfo.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 81523d037..ac5f2a71a 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -870,6 +870,7 @@ from .teletask import TeleTaskIE from .telewebion import TelewebionIE from .testurl import TestURLIE from .tf1 import TF1IE +from .tfo import TFOIE from .theintercept import TheInterceptIE from .theplatform import ( ThePlatformIE, diff --git a/youtube_dl/extractor/tfo.py b/youtube_dl/extractor/tfo.py new file mode 100644 index 000000000..6f1eeac57 --- /dev/null +++ b/youtube_dl/extractor/tfo.py @@ -0,0 +1,53 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import json + +from .common import InfoExtractor +from ..utils import ( + HEADRequest, + ExtractorError, + int_or_none, +) + + +class TFOIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P\d+)' + _TEST = { + 'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon', + 'md5': '47c987d0515561114cf03d1226a9d4c7', + 'info_dict': { + 'id': '100463871', + 'ext': 'mp4', + 'title': 'Video Game Hackathon', + 'description': 'md5:558afeba217c6c8d96c60e5421795c07', + 'upload_date': '20160212', + 'timestamp': 1455310233, + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + self._request_webpage(HEADRequest('http://www.tfo.org/'), video_id) + infos = self._download_json( + 'http://www.tfo.org/api/web/video/get_infos', video_id, data=json.dumps({ + 'product_id': video_id, + }).encode(), headers={ + 'X-tfo-session': self._get_cookies('http://www.tfo.org/')['tfo-session'].value, + }) + if infos.get('success') == 0: + raise ExtractorError('%s said: %s' % (self.IE_NAME, infos['msg']), expected=True) + video_data = infos['data'] + + return { + '_type': 'url_transparent', + 'id': video_id, + 'url': 'limelight:media:' + video_data['llid'], + 'title': video_data['title'], + 'description': video_data.get('description'), + 'series': video_data.get('collection'), + 'season_number': int_or_none(video_data.get('season')), + 'episode_number': int_or_none(video_data.get('episode')), + 'duration': int_or_none(video_data.get('duration')), + 'ie_key': 'LimelightMedia', + } From fa7374ddcf2ba4a8909dccf5b96d3ff6d5ef85ce Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 03:02:00 +0800 Subject: [PATCH 15/55] [iwara] Fix extraction after relaunch Closes #10462, closes #3215 --- ChangeLog | 1 + youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/iwara.py | 77 ++++++++++++++++++++++++++++++ youtube_dl/extractor/trollvids.py | 36 -------------- 4 files changed, 79 insertions(+), 37 deletions(-) create mode 100644 youtube_dl/extractor/iwara.py delete mode 100644 youtube_dl/extractor/trollvids.py diff --git a/ChangeLog b/ChangeLog index fafe445cb..387dc7bf6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [iwara] Fix extraction after relaunch (#10462, #3215) * [newgrounds] Fix uploader extraction (#10584) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index ac5f2a71a..3a10546b8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -395,6 +395,7 @@ from .ivi import ( IviCompilationIE ) from .ivideon import IvideonIE +from .iwara import IwaraIE from .izlesene import IzleseneIE from .jeuxvideo import JeuxVideoIE from .jove import JoveIE @@ -899,7 +900,6 @@ from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE -from .trollvids import TrollvidsIE from .trutv import TruTVIE from .tube8 import Tube8IE from .tubitv import TubiTvIE diff --git a/youtube_dl/extractor/iwara.py b/youtube_dl/extractor/iwara.py new file mode 100644 index 000000000..8d7e7f472 --- /dev/null +++ b/youtube_dl/extractor/iwara.py @@ -0,0 +1,77 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_urlparse +from ..utils import remove_end + + +class IwaraIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P[a-zA-Z0-9]+)' + _TESTS = [{ + 'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD', + 'md5': '1d53866b2c514b23ed69e4352fdc9839', + 'info_dict': { + 'id': 'amVwUl1EHpAD9RD', + 'ext': 'mp4', + 'title': '【MMD R-18】ガールフレンド carry_me_off', + 'age_limit': 18, + }, + }, { + 'url': 'http://ecchi.iwara.tv/videos/Vb4yf2yZspkzkBO', + 'md5': '7e5f1f359cd51a027ba4a7b7710a50f0', + 'info_dict': { + 'id': '0B1LvuHnL-sRFNXB1WHNqbGw4SXc', + 'ext': 'mp4', + 'title': '[3D Hentai] Kyonyu Ã\x97 Genkai Ã\x97 Emaki Shinobi Girls.mp4', + 'age_limit': 18, + }, + 'add_ie': ['GoogleDrive'], + }, { + 'url': 'http://www.iwara.tv/videos/nawkaumd6ilezzgq', + 'md5': '1d85f1e5217d2791626cff5ec83bb189', + 'info_dict': { + 'id': '6liAP9s2Ojc', + 'ext': 'mp4', + 'age_limit': 0, + 'title': '[MMD] Do It Again Ver.2 [1080p 60FPS] (Motion,Camera,Wav+DL)', + 'description': 'md5:590c12c0df1443d833fbebe05da8c47a', + 'upload_date': '20160910', + 'uploader': 'aMMDsork', + 'uploader_id': 'UCVOFyOSCyFkXTYYHITtqB7A', + }, + 'add_ie': ['Youtube'], + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage, urlh = self._download_webpage_handle(url, video_id) + + hostname = compat_urllib_parse_urlparse(urlh.geturl()).hostname + # ecchi is 'sexy' in Japanese + age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0 + + entries = self._parse_html5_media_entries(url, webpage, video_id) + + if not entries: + iframe_url = self._html_search_regex( + r']+src=([\'"])(?P[^\'"]+)\1', + webpage, 'iframe URL', group='url') + return { + '_type': 'url_transparent', + 'url': iframe_url, + 'age_limit': age_limit, + } + + title = remove_end(self._html_search_regex( + r'([^<]+)', webpage, 'title'), ' | Iwara') + + info_dict = entries[0] + info_dict.update({ + 'id': video_id, + 'title': title, + 'age_limit': age_limit, + }) + + return info_dict diff --git a/youtube_dl/extractor/trollvids.py b/youtube_dl/extractor/trollvids.py deleted file mode 100644 index 657705623..000000000 --- a/youtube_dl/extractor/trollvids.py +++ /dev/null @@ -1,36 +0,0 @@ -# encoding: utf-8 -from __future__ import unicode_literals - -import re - -from .nuevo import NuevoBaseIE - - -class TrollvidsIE(NuevoBaseIE): - _VALID_URL = r'https?://(?:www\.)?trollvids\.com/video/(?P\d+)/(?P[^/?#&]+)' - IE_NAME = 'trollvids' - _TEST = { - 'url': 'http://trollvids.com/video/2349002/%E3%80%90MMD-R-18%E3%80%91%E3%82%AC%E3%83%BC%E3%83%AB%E3%83%95%E3%83%AC%E3%83%B3%E3%83%89-carrymeoff', - 'md5': '1d53866b2c514b23ed69e4352fdc9839', - 'info_dict': { - 'id': '2349002', - 'ext': 'mp4', - 'title': '【MMD R-18】ガールフレンド carry_me_off', - 'age_limit': 18, - 'duration': 216.78, - }, - } - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - - info = self._extract_nuevo( - 'http://trollvids.com/nuevo/player/config.php?v=%s' % video_id, - video_id) - info.update({ - 'display_id': display_id, - 'age_limit': 18 - }) - return info From 8bc3fe47327e412a0044c2c64318f992f91b43b6 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 04:06:00 +0800 Subject: [PATCH 16/55] [abc:iview] Skip the test. They are removed soon --- youtube_dl/extractor/abc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index c7b6df7d0..3792bd232 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -100,6 +100,7 @@ class ABCIViewIE(InfoExtractor): IE_NAME = 'abc.net.au:iview' _VALID_URL = r'https?://iview\.abc\.net\.au/programs/[^/]+/(?P[^/?#]+)' + # ABC iview programs are normally available for 14 days only. _TESTS = [{ 'url': 'http://iview.abc.net.au/programs/gardening-australia/FA1505V024S00', 'md5': '979d10b2939101f0d27a06b79edad536', @@ -112,6 +113,7 @@ class ABCIViewIE(InfoExtractor): 'uploader_id': 'abc1', 'timestamp': 1471719600, }, + 'skip': 'Video gone', }] def _real_extract(self, url): From 68b7261ae5ecd5dedb85e0925cbe5820eb85e99b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 14:59:14 +0700 Subject: [PATCH 17/55] [viafree] Improve video id extraction (Closes #10615) --- youtube_dl/extractor/tvplay.py | 36 +++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index c0fec2594..5548ff2ac 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -16,6 +16,7 @@ from ..utils import ( parse_iso8601, qualities, try_get, + js_to_json, update_url_query, ) @@ -367,6 +368,10 @@ class ViafreeIE(InfoExtractor): 'skip_download': True, }, 'add_ie': [TVPlayIE.ie_key()], + }, { + # Different og:image URL schema + 'url': 'www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2', + 'only_matching': True, }, { 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1', 'only_matching': True, @@ -384,14 +389,35 @@ class ViafreeIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + data = self._parse_json( + self._search_regex( + r'(?s)window\.App\s*=\s*({.+?})\s*;\s* Date: Sun, 11 Sep 2016 18:32:45 +0800 Subject: [PATCH 18/55] [foxnews] Support Fox News Articles (closes #10598) --- ChangeLog | 1 + youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/foxnews.py | 40 +++++++++++++++++++++++++++--- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 387dc7bf6..a73a35e88 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [foxnews] Support Fox News articles (#10598) * [iwara] Fix extraction after relaunch (#10462, #3215) * [newgrounds] Fix uploader extraction (#10584) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3a10546b8..974660a78 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -292,6 +292,7 @@ from .fourtube import FourTubeIE from .fox import FOXIE from .foxgay import FoxgayIE from .foxnews import ( + FoxNewsVideoIE, FoxNewsIE, FoxNewsInsiderIE, ) diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index 5c7acd795..3e9a6a08c 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -6,7 +6,8 @@ from .amp import AMPIE from .common import InfoExtractor -class FoxNewsIE(AMPIE): +class FoxNewsVideoIE(AMPIE): + IE_NAME = 'foxnews:video' IE_DESC = 'Fox News and Fox Business Video' _VALID_URL = r'https?://(?Pvideo\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' _TESTS = [ @@ -66,6 +67,35 @@ class FoxNewsIE(AMPIE): return info +class FoxNewsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P[a-z-]+)' + IE_NAME = 'foxnews' + + _TEST = { + 'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', + 'md5': '62aa5a781b308fdee212ebb6f33ae7ef', + 'info_dict': { + 'id': '5116295019001', + 'ext': 'mp4', + 'title': 'Trump and Clinton asked to defend positions on Iraq War', + 'description': 'Veterans react on \'The Kelly File\'', + 'timestamp': 1473299755, + 'upload_date': '20160908', + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + video_id = self._html_search_regex( + r'data-video-id=([\'"])(?P[^\'"]+)\1', + webpage, 'video ID', group='id') + return self.url_result( + 'http://video.foxnews.com/v/' + video_id, + FoxNewsVideoIE.ie_key()) + + class FoxNewsInsiderIE(InfoExtractor): _VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P[a-z-]+)' IE_NAME = 'foxnews:insider' @@ -83,7 +113,11 @@ class FoxNewsInsiderIE(InfoExtractor): 'upload_date': '20160825', 'thumbnail': 're:^https?://.*\.jpg$', }, - 'add_ie': [FoxNewsIE.ie_key()], + 'params': { + # m3u8 download + 'skip_download': True, + }, + 'add_ie': [FoxNewsVideoIE.ie_key()], } def _real_extract(self, url): @@ -98,7 +132,7 @@ class FoxNewsInsiderIE(InfoExtractor): return { '_type': 'url_transparent', - 'ie_key': FoxNewsIE.ie_key(), + 'ie_key': FoxNewsVideoIE.ie_key(), 'url': embed_url, 'display_id': display_id, 'title': title, From a114abc8bef1a3e558c98730f2e50f57e65a4d6e Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 18:36:59 +0800 Subject: [PATCH 19/55] [openload] Temporary fix (#10408) --- youtube_dl/extractor/openload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 03baf8e32..76316ca2f 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -60,7 +60,7 @@ class OpenloadIE(InfoExtractor): if j >= 33 and j <= 126: j = ((j + 14) % 94) + 33 if idx == len(enc_data) - 1: - j += 1 + j += 3 video_url_chars += compat_chr(j) video_url = 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars) From ba8bee6fee4444159d0c8ccf4d825136a6dca55b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 18:53:05 +0800 Subject: [PATCH 20/55] [foxnews] Revert to old extractor names --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/foxnews.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 974660a78..47d5c6b38 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -292,8 +292,8 @@ from .fourtube import FourTubeIE from .fox import FOXIE from .foxgay import FoxgayIE from .foxnews import ( - FoxNewsVideoIE, FoxNewsIE, + FoxNewsArticleIE, FoxNewsInsiderIE, ) from .foxsports import FoxSportsIE diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index 3e9a6a08c..229bcb175 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -6,8 +6,8 @@ from .amp import AMPIE from .common import InfoExtractor -class FoxNewsVideoIE(AMPIE): - IE_NAME = 'foxnews:video' +class FoxNewsIE(AMPIE): + IE_NAME = 'foxnews' IE_DESC = 'Fox News and Fox Business Video' _VALID_URL = r'https?://(?Pvideo\.(?:insider\.)?fox(?:news|business)\.com)/v/(?:video-embed\.html\?video_id=)?(?P\d+)' _TESTS = [ @@ -67,9 +67,9 @@ class FoxNewsVideoIE(AMPIE): return info -class FoxNewsIE(InfoExtractor): +class FoxNewsArticleIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P[a-z-]+)' - IE_NAME = 'foxnews' + IE_NAME = 'foxnews:article' _TEST = { 'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', @@ -93,7 +93,7 @@ class FoxNewsIE(InfoExtractor): webpage, 'video ID', group='id') return self.url_result( 'http://video.foxnews.com/v/' + video_id, - FoxNewsVideoIE.ie_key()) + FoxNewsIE.ie_key()) class FoxNewsInsiderIE(InfoExtractor): @@ -117,7 +117,7 @@ class FoxNewsInsiderIE(InfoExtractor): # m3u8 download 'skip_download': True, }, - 'add_ie': [FoxNewsVideoIE.ie_key()], + 'add_ie': [FoxNewsIE.ie_key()], } def _real_extract(self, url): @@ -132,7 +132,7 @@ class FoxNewsInsiderIE(InfoExtractor): return { '_type': 'url_transparent', - 'ie_key': FoxNewsVideoIE.ie_key(), + 'ie_key': FoxNewsIE.ie_key(), 'url': embed_url, 'display_id': display_id, 'title': title, From f02c002bb5bb15c8c6f761f6b4ba4fbe1dfaa31b Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Sun, 11 Sep 2016 19:22:51 +0800 Subject: [PATCH 21/55] [pornhub] Extract categories and tags (closes #10499) --- ChangeLog | 1 + youtube_dl/extractor/pornhub.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/ChangeLog b/ChangeLog index a73a35e88..5d6609987 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors ++ [pornhub] Extract categories and tags (#10499) + [foxnews] Support Fox News articles (#10598) * [iwara] Fix extraction after relaunch (#10462, #3215) * [newgrounds] Fix uploader extraction (#10584) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 20976c101..0724efc09 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -15,6 +15,7 @@ from ..compat import ( from ..utils import ( ExtractorError, int_or_none, + js_to_json, orderedSet, sanitized_Request, str_to_int, @@ -48,6 +49,8 @@ class PornHubIE(InfoExtractor): 'dislike_count': int, 'comment_count': int, 'age_limit': 18, + 'tags': list, + 'categories': list, }, }, { # non-ASCII title @@ -63,6 +66,8 @@ class PornHubIE(InfoExtractor): 'dislike_count': int, 'comment_count': int, 'age_limit': 18, + 'tags': list, + 'categories': list, }, 'params': { 'skip_download': True, @@ -183,6 +188,15 @@ class PornHubIE(InfoExtractor): }) self._sort_formats(formats) + page_params = self._parse_json(self._search_regex( + r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P{[^}]+})', + webpage, 'page parameters', group='data', default='{}'), + video_id, transform_source=js_to_json, fatal=False) + tags = categories = None + if page_params: + tags = page_params.get('tags', '').split(',') + categories = page_params.get('categories', '').split(',') + return { 'id': video_id, 'uploader': video_uploader, @@ -195,6 +209,8 @@ class PornHubIE(InfoExtractor): 'comment_count': comment_count, 'formats': formats, 'age_limit': 18, + 'tags': tags, + 'categories': categories, } From 47213ee30e83ade1f6373ab0ea972fc1f4ea2dd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 22:50:36 +0700 Subject: [PATCH 22/55] [tube8] Extract categories and tags (Closes #10579) --- youtube_dl/extractor/tube8.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index 4053f6c21..e937b2396 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from ..utils import ( int_or_none, str_to_int, @@ -21,7 +23,13 @@ class Tube8IE(KeezMoviesIE): 'title': 'Kasia music video', 'age_limit': 18, 'duration': 230, + 'categories': ['Teen'], + 'tags': ['dancing'], + }, + 'params': { + 'proxy': '127.0.0.1:8118', } + }, { 'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/', 'only_matching': True, @@ -51,6 +59,17 @@ class Tube8IE(KeezMoviesIE): r'(\d+)', webpage, 'comment count', fatal=False)) + category = self._search_regex( + r'Category:\s*\s*]+href=[^>]+>([^<]+)', + webpage, 'category', fatal=False) + categories = [category] if category else None + + tags_str = self._search_regex( + r'(?s)Tags:\s*(.+?)]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None + info.update({ 'description': description, 'uploader': uploader, @@ -58,6 +77,8 @@ class Tube8IE(KeezMoviesIE): 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, + 'categories': categories, + 'tags': tags, }) return info From 114d320d7babe6befea2ae991a43e20c3ba44187 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 22:51:12 +0700 Subject: [PATCH 23/55] [tvplay] Remove unused import --- youtube_dl/extractor/tvplay.py | 1 - 1 file changed, 1 deletion(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 5548ff2ac..58ffc0e6f 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -16,7 +16,6 @@ from ..utils import ( parse_iso8601, qualities, try_get, - js_to_json, update_url_query, ) From 21b1b3f59a61fd610563e1342123f0fd1f531e10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:20:09 +0700 Subject: [PATCH 24/55] release 2016.09.11 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- docs/supportedsites.md | 9 +++++++-- youtube_dl/version.py | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index a983bf432..d7195712b 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.08*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.08** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.08 +[debug] youtube-dl version 2016.09.11 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 5d6609987..21d9f6275 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.09.11 Extractors + [pornhub] Extract categories and tags (#10499) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e6be746a8..7a7b268d3 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -247,7 +247,8 @@ - **Formula1** - **FOX** - **Foxgay** - - **FoxNews**: Fox News and Fox Business Video + - **foxnews**: Fox News and Fox Business Video + - **foxnews:article** - **foxnews:insider** - **FoxSports** - **france2.fr:generation-quoi** @@ -326,6 +327,7 @@ - **ivi**: ivi.ru - **ivi:compilation**: ivi.ru compilations - **ivideon**: Ivideon TV + - **Iwara** - **Izlesene** - **JeuxVideo** - **Jove** @@ -339,6 +341,7 @@ - **KarriereVideos** - **keek** - **KeezMovies** + - **Ketnet** - **KhanAcademy** - **KickStarter** - **KonserthusetPlay** @@ -540,6 +543,7 @@ - **podomatic** - **Pokemon** - **PolskieRadio** + - **PolskieRadioCategory** - **PornCom** - **PornHd** - **PornHub**: PornHub and Thumbzilla @@ -701,9 +705,11 @@ - **Telecinco**: telecinco.es, cuatro.com and mediaset.es - **Telegraaf** - **TeleMB** + - **TeleQuebec** - **TeleTask** - **Telewebion** - **TF1** + - **TFO** - **TheIntercept** - **ThePlatform** - **ThePlatformFeed** @@ -725,7 +731,6 @@ - **ToypicsUser**: Toypics user profile - **TrailerAddict** (Currently broken) - **Trilulilu** - - **trollvids** - **TruTV** - **Tube8** - **TubiTv** diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 941ffb3f6..5f572391c 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.08' +__version__ = '2016.09.11' From f9c097c9b1067bc503088650f5410832d45da96a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:29:25 +0700 Subject: [PATCH 25/55] [devscripts/release.sh] Add ChangeLog reminder prompt --- devscripts/release.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/devscripts/release.sh b/devscripts/release.sh index ca6ae1b49..981d37ca7 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -60,6 +60,9 @@ if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; e if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi +read -p "Is ChangeLog up to date? (y/n) " -n 1 +if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; + /bin/echo -e "\n### First of all, testing..." make clean if $skip_tests ; then From adc6c43c81384a841957a6072e4b1a945a8a6f87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:30:18 +0700 Subject: [PATCH 26/55] [ChangeLog] Actualize --- ChangeLog | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 21d9f6275..9183f29e8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,10 +1,22 @@ -version 2016.09.11 +version Extractors ++ [tube8] Extract categories and tags (#10579) + [pornhub] Extract categories and tags (#10499) -+ [foxnews] Support Fox News articles (#10598) +* [openload] Temporary fix (#10408) ++ [foxnews] Add support Fox News articles (#10598) +* [viafree] Improve video id extraction (#10615) * [iwara] Fix extraction after relaunch (#10462, #3215) ++ [tfo] Add extractor for tfo.org +* [lrt] Fix audio extraction (#10566) +* [9now] Fix extraction (#10561) ++ [canalplus] Add support for c8.fr (#10577) * [newgrounds] Fix uploader extraction (#10584) ++ [polskieradio:category] Add support for category lists (#10576) ++ [ketnet] Add extractor for ketnet.be (#10343) ++ [canvas] Add support for een.be (#10605) ++ [telequebec] Add extractor for telequebec.tv (#1999) +* [parliamentliveuk] Fix extraction (#9137) version 2016.09.08 From 0a7354195c601f656c9494eb67efcec58e8cc8f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:32:01 +0700 Subject: [PATCH 27/55] [devscripts/release.sh] Add missing fi --- devscripts/release.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/devscripts/release.sh b/devscripts/release.sh index 981d37ca7..1af61aa0b 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -61,7 +61,7 @@ if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missi if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi read -p "Is ChangeLog up to date? (y/n) " -n 1 -if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; +if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi /bin/echo -e "\n### First of all, testing..." make clean From ba36e0b2426b4817d208c7adbf2f82594a633265 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:33:20 +0700 Subject: [PATCH 28/55] release 2016.09.11.1 --- .github/ISSUE_TEMPLATE.md | 6 +++--- ChangeLog | 2 +- youtube_dl/version.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index d7195712b..e87fed573 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -6,8 +6,8 @@ --- -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11** +### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.09.11.1*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. +- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.09.11.1** ### Before submitting an *issue* make sure you have: - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections @@ -35,7 +35,7 @@ $ youtube-dl -v [debug] User config: [] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2016.09.11 +[debug] youtube-dl version 2016.09.11.1 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] Proxy map: {} diff --git a/ChangeLog b/ChangeLog index 9183f29e8..669544815 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,4 @@ -version +version 2016.09.11.1 Extractors + [tube8] Extract categories and tags (#10579) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5f572391c..903aede58 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2016.09.11' +__version__ = '2016.09.11.1' From 80d55ef07b4b6211c17723ce0117000474f32c57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 11 Sep 2016 23:44:22 +0700 Subject: [PATCH 29/55] [tube8] Remove proxy settings from test --- youtube_dl/extractor/tube8.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index e937b2396..1853a1104 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -26,10 +26,6 @@ class Tube8IE(KeezMoviesIE): 'categories': ['Teen'], 'tags': ['dancing'], }, - 'params': { - 'proxy': '127.0.0.1:8118', - } - }, { 'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/', 'only_matching': True, From dc9dae7a19e75b8869248bdc0ae04b82cef6a537 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Mon, 12 Sep 2016 02:55:15 +0800 Subject: [PATCH 30/55] [nbc] Add new extractor for NBC Olympics (#10295, #10361) --- ChangeLog | 6 +++++ youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/nbc.py | 40 ++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/ChangeLog b/ChangeLog index 669544815..46eea0626 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +version + +Extractors ++ [nbc] Add support for NBC Olympics (#10361) + + version 2016.09.11.1 Extractors diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 47d5c6b38..4cad5bc5b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -534,6 +534,7 @@ from .nbc import ( CSNNEIE, NBCIE, NBCNewsIE, + NBCOlympicsIE, NBCSportsIE, NBCSportsVPlayerIE, ) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index f694e210b..f37bf2f30 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -335,3 +335,43 @@ class NBCNewsIE(ThePlatformIE): 'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id, 'ie_key': 'ThePlatformFeed', } + + +class NBCOlympicsIE(InfoExtractor): + _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P[a-z-]+)' + + _TEST = { + # Geo-restricted to US + 'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold', + 'md5': '54fecf846d05429fbaa18af557ee523a', + 'info_dict': { + 'id': 'WjTBzDXx5AUq', + 'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold', + 'ext': 'mp4', + 'title': 'Rose\'s son Leo was in tears after his dad won gold', + 'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.', + 'timestamp': 1471274964, + 'upload_date': '20160815', + 'uploader': 'NBCU-SPORTS', + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + drupal_settings = self._parse_json(self._search_regex( + r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), display_id) + + iframe_url = drupal_settings['vod']['iframe_url'] + theplatform_url = iframe_url.replace( + 'vplayer.nbcolympics.com', 'player.theplatform.com') + + return { + '_type': 'url_transparent', + 'url': theplatform_url, + 'ie_key': ThePlatformIE.ie_key(), + 'display_id': display_id, + } From 73b8ba1d85a162043c780106bf9719d86a2fa577 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 21:01:31 +0700 Subject: [PATCH 31/55] [ISSUE_TEMPLATE_tmpl.md] Fix typo --- .github/ISSUE_TEMPLATE_tmpl.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md index a5e6a4233..4112f53bb 100644 --- a/.github/ISSUE_TEMPLATE_tmpl.md +++ b/.github/ISSUE_TEMPLATE_tmpl.md @@ -55,4 +55,4 @@ $ youtube-dl -v ### Description of your *issue*, suggested solution and other information Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. -If work on your *issue* required an account credentials please provide them or explain how one can obtain them. +If work on your *issue* requires an account credentials please provide them or explain how one can obtain them. From 5f340047deeee6d41fdcdbb45d9ecb25f80773e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 21:48:45 +0700 Subject: [PATCH 32/55] [vimeo:ondemand] Pass Referer along with embed URL (#10624) --- youtube_dl/extractor/vimeo.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 7e854f326..50aacc6ac 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -350,6 +350,10 @@ class VimeoIE(VimeoBaseInfoExtractor): } ] + @staticmethod + def _smuggle_referrer(url, referrer_url): + return smuggle_url(url, {'http_headers': {'Referer': referrer_url}}) + @staticmethod def _extract_vimeo_url(url, webpage): # Look for embedded (iframe) Vimeo player @@ -357,8 +361,7 @@ class VimeoIE(VimeoBaseInfoExtractor): r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) if mobj: player_url = unescapeHTML(mobj.group('url')) - surl = smuggle_url(player_url, {'http_headers': {'Referer': url}}) - return surl + return VimeoIE._smuggle_referrer(player_url, url) # Look for embedded (swf embed) Vimeo player mobj = re.search( r']+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) @@ -585,6 +588,20 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor): 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/gumfilms', 'uploader_id': 'gumfilms', }, + }, { + # requires Referer to be passed along with og:video:url + 'url': 'https://vimeo.com/ondemand/36938/126682985', + 'info_dict': { + 'id': '126682985', + 'ext': 'mp4', + 'title': 'Rävlock, rätt läte på rätt plats', + 'uploader': 'Lindroth & Norin', + 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/user14430847', + 'uploader_id': 'user14430847', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'https://vimeo.com/ondemand/nazmaalik', 'only_matching': True, @@ -599,7 +616,12 @@ class VimeoOndemandIE(VimeoBaseInfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - return self.url_result(self._og_search_video_url(webpage), VimeoIE.ie_key()) + return self.url_result( + # Some videos require Referer to be passed along with og:video:url + # similarly to generic vimeo embeds (e.g. + # https://vimeo.com/ondemand/36938/126682985). + VimeoIE._smuggle_referrer(self._og_search_video_url(webpage), url), + VimeoIE.ie_key()) class VimeoChannelIE(VimeoBaseInfoExtractor): From c6d9372d2dd50207a0e6b7509a4b98b23c0dfd70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 21:49:31 +0700 Subject: [PATCH 33/55] [extractor/generic] Add vimeo embed that requires Referer passed --- youtube_dl/extractor/generic.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 24b217715..2e46ca179 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1369,6 +1369,11 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Vimeo'], }, + { + # generic vimeo embed that requires original URL passed as Referer + 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/', + 'only_matching': True, + }, { 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video', 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365', From 91551000ee018285c554a0f2597818295a1d508e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 22:33:00 +0700 Subject: [PATCH 34/55] [kaltura] Skip chun format --- youtube_dl/extractor/kaltura.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 6a8464998..22a06e4ae 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -262,6 +262,10 @@ class KalturaIE(InfoExtractor): # Continue if asset is not ready if f.get('status') != 2: continue + # Original format that's not available (e.g. kaltura:1926081:0_c03e1b5g) + # skip for now. + if f.get('fileExt') == 'chun': + continue video_url = sign_url( '%s/flavorId/%s' % (data_url, f['id'])) formats.append({ From 9438c5aee4e15ed3689e469418a46982b5b75c67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 22:43:45 +0700 Subject: [PATCH 35/55] [kaltura] Improve audio detection --- youtube_dl/extractor/kaltura.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py index 22a06e4ae..5a8403777 100644 --- a/youtube_dl/extractor/kaltura.py +++ b/youtube_dl/extractor/kaltura.py @@ -268,6 +268,10 @@ class KalturaIE(InfoExtractor): continue video_url = sign_url( '%s/flavorId/%s' % (data_url, f['id'])) + # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g + # -f mp4-56) + vcodec = 'none' if 'videoCodecId' not in f and f.get( + 'frameRate') == 0 else f.get('videoCodecId') formats.append({ 'format_id': '%(fileExt)s-%(bitrate)s' % f, 'ext': f.get('fileExt'), @@ -275,7 +279,7 @@ class KalturaIE(InfoExtractor): 'fps': int_or_none(f.get('frameRate')), 'filesize_approx': int_or_none(f.get('size'), invscale=1024), 'container': f.get('containerFormat'), - 'vcodec': f.get('videoCodecId'), + 'vcodec': vcodec, 'height': int_or_none(f.get('height')), 'width': int_or_none(f.get('width')), 'url': video_url, From c59a212911ac05809abdde237b8af2be67e82b47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 23:05:52 +0700 Subject: [PATCH 36/55] [safari] Improve ids regexes (#10617) --- youtube_dl/extractor/safari.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 08ddbe3c4..eabe41efe 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -103,13 +103,13 @@ class SafariIE(SafariBaseIE): webpage = self._download_webpage(url, video_id) reference_id = self._search_regex( - r'data-reference-id=(["\'])(?P.+?)\1', + r'data-reference-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura reference id', group='id') partner_id = self._search_regex( - r'data-partner-id=(["\'])(?P.+?)\1', + r'data-partner-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura widget id', group='id') ui_id = self._search_regex( - r'data-ui-id=(["\'])(?P.+?)\1', + r'data-ui-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura uiconf id', group='id') query = { From e12ade612ff16b020493e9392f1c8265e37423e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Mon, 12 Sep 2016 23:29:43 +0700 Subject: [PATCH 37/55] [ISSUE_TEMPLATE_tmpl.md] Fix typo --- .github/ISSUE_TEMPLATE_tmpl.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md index 4112f53bb..ab9968129 100644 --- a/.github/ISSUE_TEMPLATE_tmpl.md +++ b/.github/ISSUE_TEMPLATE_tmpl.md @@ -55,4 +55,4 @@ $ youtube-dl -v ### Description of your *issue*, suggested solution and other information Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/rg3/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. -If work on your *issue* requires an account credentials please provide them or explain how one can obtain them. +If work on your *issue* requires account credentials please provide them or explain how one can obtain them. From 7ca31fa7f6eeef9897290f95d461172e63fcbe0a Mon Sep 17 00:00:00 2001 From: Jean-Nicolas Date: Mon, 12 Sep 2016 20:28:12 -0400 Subject: [PATCH 38/55] [tvanouvelles] Changed the regex. Also kept the original text encoding for the description and title. --- youtube_dl/extractor/tvanouvelles.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/tvanouvelles.py b/youtube_dl/extractor/tvanouvelles.py index 6c522f3dd..0a714810d 100644 --- a/youtube_dl/extractor/tvanouvelles.py +++ b/youtube_dl/extractor/tvanouvelles.py @@ -1,18 +1,19 @@ +# -*- coding: utf-8 -*- from __future__ import unicode_literals from .common import InfoExtractor class TVANouvellesIE(InfoExtractor): - _VALID_URL = r'https?://(www\.|)tvanouvelles\.(ca|com|qc)/.*.?/(?P[^/]+)' + _VALID_URL = r'https?://[[\w].*]?tvanouvelles\.ca/.*?/(?P[^/]+)/?$' _TEST = { 'url': 'http://www.tvanouvelles.ca/videos/5117035533001', 'info_dict': { 'id': '5117035533001', 'ext': 'mp4', - 'title': 'L\u2019industrie du taxi d\xe9nonce l\u2019entente entre Qu\xe9bec et Uber: explications', - 'description': 'L\u2019industrie du taxi a unanimement a d\xe9nonc\xe9 l\u2019entente avec le gouvernement du Qu\xe9bec qui permet \xe0 l\u2019entreprise de covoiturage Uber de faire des affaires l\xe9galement dans le cadre d\u2019un projet pilote d\u2019un an.', + 'title': 'L’industrie du taxi dénonce l’entente entre Québec et Uber - explications', + 'description': '"L’industrie du taxi a unanimement a dénoncé l’entente avec le gouvernement du Québec qui permet à l’entreprise de covoiturage Uber de faire des affaires légalement dans le cadre d’un projet pilote d’un an.', 'uploader_id': '1741764581', 'timestamp': 1473352030, 'upload_date': '20160908', @@ -26,5 +27,5 @@ class TVANouvellesIE(InfoExtractor): program_name = self._match_id(url) webpage = self._download_webpage(url, program_name) brightcove_id = self._search_regex( - r'data-video-id\=(.+[0-9]?)', webpage, 'brightcove id') + r'data-video-id\=["\']?(.+[0-9])["\']?', webpage, 'brightcove id') return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id) From e3a30ae9f907fc6399b16a7bbefe06ceabdc46d2 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Mon, 12 Sep 2016 23:39:11 +0100 Subject: [PATCH 39/55] [adobepass] add an option to specify mso_id and support for ROGERS TV Provider(closes #10606) --- youtube_dl/YoutubeDL.py | 1 + youtube_dl/__init__.py | 1 + youtube_dl/extractor/adobepass.py | 49 +++++++++++++++++++++++-------- youtube_dl/options.py | 4 +++ 4 files changed, 42 insertions(+), 13 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 805733fb7..f70d5f49a 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -131,6 +131,7 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. + ap_mso_id Adobe Pass Multiple-system operator Identifier. usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 42128272a..2b1b841c9 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -293,6 +293,7 @@ def _real_main(argv=None): 'password': opts.password, 'twofactor': opts.twofactor, 'videopassword': opts.videopassword, + 'ap_mso_id': opts.ap_mso_id, 'quiet': (opts.quiet or any_getting or any_printing), 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 68ec37e00..454a6af8d 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -6,10 +6,12 @@ import time import xml.etree.ElementTree as etree from .common import InfoExtractor +from ..compat import compat_urlparse from ..utils import ( unescapeHTML, urlencode_postdata, unified_timestamp, + ExtractorError, ) @@ -41,6 +43,11 @@ class AdobePassIE(InfoExtractor): token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) return token_expires and token_expires <= int(time.time()) + def raise_mvpd_required(): + raise ExtractorError('This video is only available for users of participating TV providers. ' + 'Use --ap-mso-id to specify Adobe Pass Multiple-system operator Identifier ' + 'and --netrc to provide account credentials.', expected=True) + mvpd_headers = { 'ap_42': 'anonymous', 'ap_11': 'Linux i686', @@ -55,19 +62,26 @@ class AdobePassIE(InfoExtractor): authn_token = None if not authn_token: # TODO add support for other TV Providers - mso_id = 'DTV' + mso_id = self._downloader.params.get('ap_mso_id') + if not mso_id: + raise_mvpd_required() username, password = self._get_netrc_login_info(mso_id) if not username or not password: - return '' + return raise_mvpd_required() - def post_form(form_page, note, data={}): + def post_form(form_page_res, note, data={}): + form_page, urlh = form_page_res post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') - return self._download_webpage( - post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={ + if not re.match(r'https?://', post_url): + post_url = compat_urlparse.urljoin(urlh.geturl(), post_url) + form_data = self._hidden_inputs(form_page) + form_data.update(data) + return self._download_webpage_handle( + post_url, video_id, note, data=urlencode_postdata(form_data), headers={ 'Content-Type': 'application/x-www-form-urlencoded', }) - provider_redirect_page = self._download_webpage( + provider_redirect_page_res = self._download_webpage_handle( self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, 'Downloading Provider Redirect Page', query={ 'noflash': 'true', @@ -77,13 +91,22 @@ class AdobePassIE(InfoExtractor): 'domain_name': 'adobe.com', 'redirect_url': url, }) - provider_login_page = post_form( - provider_redirect_page, 'Downloading Provider Login Page') - mvpd_confirm_page = post_form(provider_login_page, 'Logging in', { - 'username': username, - 'password': password, - }) - post_form(mvpd_confirm_page, 'Confirming Login') + provider_login_page_res = post_form( + provider_redirect_page_res, 'Downloading Provider Login Page') + login_data = {} + if mso_id == 'DTV': + login_data = { + 'username': username, + 'password': password, + } + elif mso_id == 'Rogers': + login_data = { + 'UserName': username, + 'UserPassword': password, + } + mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', login_data) + if mso_id == 'DTV': + post_form(mvpd_confirm_page_res, 'Confirming Login') session = self._download_webpage( self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, diff --git a/youtube_dl/options.py b/youtube_dl/options.py index 56f312f57..c4057ce59 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -350,6 +350,10 @@ def parseOpts(overrideArguments=None): '--video-password', dest='videopassword', metavar='PASSWORD', help='Video password (vimeo, smotri, youku)') + authentication.add_option( + '--ap-mso-id', + dest='ap_mso_id', metavar='APMSOID', + help='Adobe Pass Multiple-system operator Identifier(DTV, Rogers)') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( From 667abb59461590b98375af3987e256e31998f540 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 13 Sep 2016 23:20:25 +0700 Subject: [PATCH 40/55] [nhk] Fix extraction (Closes #10633) --- youtube_dl/extractor/nhk.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py index 691bdfa4e..5c8cd76dc 100644 --- a/youtube_dl/extractor/nhk.py +++ b/youtube_dl/extractor/nhk.py @@ -1,14 +1,15 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import ExtractorError class NhkVodIE(InfoExtractor): - _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P.+?)\.html' + _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/en/vod/(?P[^/]+/[^/?#&]+)' _TEST = { # Videos available only for a limited period of time. Visit # http://www3.nhk.or.jp/nhkworld/en/vod/ for working samples. - 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815.html', + 'url': 'http://www3.nhk.or.jp/nhkworld/en/vod/tokyofashion/20160815', 'info_dict': { 'id': 'A1bnNiNTE6nY3jLllS-BIISfcC_PpvF5', 'ext': 'flv', @@ -19,25 +20,25 @@ class NhkVodIE(InfoExtractor): }, 'skip': 'Videos available only for a limited period of time', } + _API_URL = 'http://api.nhk.or.jp/nhkworld/vodesdlist/v1/all/all/all.json?apikey=EJfK8jdS57GqlupFgAfAAwr573q01y6k' def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + data = self._download_json(self._API_URL, video_id) - embed_code = self._search_regex( - r'nw_vod_ooplayer\([^,]+,\s*(["\'])(?P(?:(?!\1).)+)\1', - webpage, 'ooyala embed code', group='id') + try: + episode = next( + e for e in data['data']['episodes'] + if e.get('url') and video_id in e['url']) + except StopIteration: + raise ExtractorError('Unable to find episode') - title = self._search_regex( - r']+class=["\']episode-detail["\']>\s*([^<]+)', - webpage, 'title', default=None) - description = self._html_search_regex( - r'(?s)]+class=["\']description["\'][^>]*>(.+?)

', - webpage, 'description', default=None) - series = self._search_regex( - r']+class=["\']detail-top-player-title[^>]+>]+>([^<]+)', - webpage, 'series', default=None) + embed_code = episode['vod_id'] + + title = episode.get('sub_title_clean') or episode['sub_title'] + description = episode.get('description_clean') or episode.get('description') + series = episode.get('title_clean') or episode.get('title') return { '_type': 'url_transparent', From c2a6877dcd2a69cfd5ddc19ae638563e0c2b7d58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 13 Sep 2016 23:22:16 +0700 Subject: [PATCH 41/55] [adobepass] PEP 8 --- youtube_dl/extractor/adobepass.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 454a6af8d..50a208085 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -44,7 +44,8 @@ class AdobePassIE(InfoExtractor): return token_expires and token_expires <= int(time.time()) def raise_mvpd_required(): - raise ExtractorError('This video is only available for users of participating TV providers. ' + raise ExtractorError( + 'This video is only available for users of participating TV providers. ' 'Use --ap-mso-id to specify Adobe Pass Multiple-system operator Identifier ' 'and --netrc to provide account credentials.', expected=True) From 8e0af492d5e2df35e53b0e853eb70c531f7cfd97 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 13 Sep 2016 22:16:01 +0100 Subject: [PATCH 42/55] [adobepass] add specific options for adobe pass authentication - add --ap-username and --ap-password option to specify TV provider username and password in the cmd line - add --ap-retries option to limit the number of retries - add --list-ap-msi-ids to list the supported TV Providers --- youtube_dl/YoutubeDL.py | 4 +- youtube_dl/__init__.py | 15 +++ youtube_dl/extractor/adobepass.py | 206 ++++++++++++++++-------------- youtube_dl/extractor/common.py | 10 +- youtube_dl/options.py | 24 +++- 5 files changed, 155 insertions(+), 104 deletions(-) diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index f70d5f49a..9c2c26280 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -131,7 +131,9 @@ class YoutubeDL(object): username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. - ap_mso_id Adobe Pass Multiple-system operator Identifier. + ap_mso_id: Adobe Pass Multiple-system operator Identifier. + ap_username: TV Provider username for authentication purposes. + ap_password: TV Provider password for authentication purposes. usenetrc: Use netrc for authentication instead. verbose: Print additional info to stdout. quiet: Do not print messages to stdout. diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 2b1b841c9..052f20ee7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -34,12 +34,14 @@ from .utils import ( setproctitle, std_headers, write_string, + render_table, ) from .update import update_self from .downloader import ( FileDownloader, ) from .extractor import gen_extractors, list_extractors +from .extractor.adobepass import MSO_INFO from .YoutubeDL import YoutubeDL @@ -118,18 +120,26 @@ def _real_main(argv=None): desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES)) write_string(desc + '\n', out=sys.stdout) sys.exit(0) + if opts.list_ap_mso_ids: + table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()] + write_string('Supported TV Providers:\n' + render_table(['mso id', 'mso name'], table) + '\n', out=sys.stdout) + sys.exit(0) # Conflicting, missing and erroneous options if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error('using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: parser.error('account username missing\n') + if opts.ap_password is not None and opts.ap_username is None: + parser.error('TV Provider account username missing\n') if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid): parser.error('using output template conflicts with using title, video ID or auto number') if opts.usetitle and opts.useid: parser.error('using title conflicts with using video ID') if opts.username is not None and opts.password is None: opts.password = compat_getpass('Type account password and press [Return]: ') + if opts.ap_username is not None and opts.ap_password is None: + opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ') if opts.ratelimit is not None: numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) if numeric_limit is None: @@ -169,6 +179,8 @@ def _real_main(argv=None): opts.retries = parse_retries(opts.retries) if opts.fragment_retries is not None: opts.fragment_retries = parse_retries(opts.fragment_retries) + if opts.ap_retries is not None: + opts.ap_retries = parse_retries(opts.ap_retries) if opts.buffersize is not None: numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) if numeric_buffersize is None: @@ -294,6 +306,9 @@ def _real_main(argv=None): 'twofactor': opts.twofactor, 'videopassword': opts.videopassword, 'ap_mso_id': opts.ap_mso_id, + 'ap_username': opts.ap_username, + 'ap_password': opts.ap_password, + 'ap_retries': opts.ap_retries, 'quiet': (opts.quiet or any_getting or any_printing), 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 50a208085..9add6c0f8 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -15,6 +15,20 @@ from ..utils import ( ) +MSO_INFO = { + 'DTV': { + 'name': 'DirecTV', + 'username_field': 'username', + 'password_field': 'password', + }, + 'Rogers': { + 'name': 'Rogers Cable', + 'username_field': 'UserName', + 'password_field': 'UserPassword', + }, +} + + class AdobePassIE(InfoExtractor): _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' @@ -43,6 +57,18 @@ class AdobePassIE(InfoExtractor): token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele))) return token_expires and token_expires <= int(time.time()) + def post_form(form_page_res, note, data={}): + form_page, urlh = form_page_res + post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') + if not re.match(r'https?://', post_url): + post_url = compat_urlparse.urljoin(urlh.geturl(), post_url) + form_data = self._hidden_inputs(form_page) + form_data.update(data) + return self._download_webpage_handle( + post_url, video_id, note, data=urlencode_postdata(form_data), headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }) + def raise_mvpd_required(): raise ExtractorError( 'This video is only available for users of participating TV providers. ' @@ -57,105 +83,95 @@ class AdobePassIE(InfoExtractor): } guid = xml_text(resource, 'guid') - requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} - authn_token = requestor_info.get('authn_token') - if authn_token and is_expired(authn_token, 'simpleTokenExpires'): - authn_token = None - if not authn_token: - # TODO add support for other TV Providers - mso_id = self._downloader.params.get('ap_mso_id') - if not mso_id: - raise_mvpd_required() - username, password = self._get_netrc_login_info(mso_id) - if not username or not password: - return raise_mvpd_required() + retries = self._downloader.params.get('ap_retries', 3) + count = 0 + while count < retries: + requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} + authn_token = requestor_info.get('authn_token') + if authn_token and is_expired(authn_token, 'simpleTokenExpires'): + authn_token = None + if not authn_token: + # TODO add support for other TV Providers + mso_id = self._downloader.params.get('ap_mso_id') + if not mso_id: + raise_mvpd_required() + if mso_id not in MSO_INFO: + raise ExtractorError( + 'Unsupported TV Provider, use --list-ap-mso-ids to get a list of supported TV Providers' % mso_id, expected=True) + username, password = self._get_login_info('ap_username', 'ap_password', mso_id) + if not username or not password: + raise_mvpd_required() + mso_info = MSO_INFO[mso_id] - def post_form(form_page_res, note, data={}): - form_page, urlh = form_page_res - post_url = self._html_search_regex(r']+action=(["\'])(?P.+?)\1', form_page, 'post url', group='url') - if not re.match(r'https?://', post_url): - post_url = compat_urlparse.urljoin(urlh.geturl(), post_url) - form_data = self._hidden_inputs(form_page) - form_data.update(data) - return self._download_webpage_handle( - post_url, video_id, note, data=urlencode_postdata(form_data), headers={ - 'Content-Type': 'application/x-www-form-urlencoded', + provider_redirect_page_res = self._download_webpage_handle( + self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, + 'Downloading Provider Redirect Page', query={ + 'noflash': 'true', + 'mso_id': mso_id, + 'requestor_id': requestor_id, + 'no_iframe': 'false', + 'domain_name': 'adobe.com', + 'redirect_url': url, }) - - provider_redirect_page_res = self._download_webpage_handle( - self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, - 'Downloading Provider Redirect Page', query={ - 'noflash': 'true', - 'mso_id': mso_id, - 'requestor_id': requestor_id, - 'no_iframe': 'false', - 'domain_name': 'adobe.com', - 'redirect_url': url, + provider_login_page_res = post_form( + provider_redirect_page_res, 'Downloading Provider Login Page') + mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', { + mso_info['username_field']: username, + mso_info['password_field']: password, }) - provider_login_page_res = post_form( - provider_redirect_page_res, 'Downloading Provider Login Page') - login_data = {} - if mso_id == 'DTV': - login_data = { - 'username': username, - 'password': password, - } - elif mso_id == 'Rogers': - login_data = { - 'UserName': username, - 'UserPassword': password, - } - mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', login_data) - if mso_id == 'DTV': - post_form(mvpd_confirm_page_res, 'Confirming Login') + if mso_id == 'DTV': + post_form(mvpd_confirm_page_res, 'Confirming Login') - session = self._download_webpage( - self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, - 'Retrieving Session', data=urlencode_postdata({ - '_method': 'GET', + session = self._download_webpage( + self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, + 'Retrieving Session', data=urlencode_postdata({ + '_method': 'GET', + 'requestor_id': requestor_id, + }), headers=mvpd_headers) + if '' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$') def _scrub_eq(o): @@ -350,10 +350,28 @@ def parseOpts(overrideArguments=None): '--video-password', dest='videopassword', metavar='PASSWORD', help='Video password (vimeo, smotri, youku)') - authentication.add_option( + + adobe_pass = optparse.OptionGroup(parser, 'Adobe Pass Options') + adobe_pass.add_option( '--ap-mso-id', dest='ap_mso_id', metavar='APMSOID', - help='Adobe Pass Multiple-system operator Identifier(DTV, Rogers)') + help='Adobe Pass Multiple-system operator Identifier') + adobe_pass.add_option( + '--ap-username', + dest='ap_username', metavar='APUSERNAME', + help='TV Provider Login with this account ID') + adobe_pass.add_option( + '--ap-password', + dest='ap_password', metavar='APPASSWORD', + help='TV Provider Account password. If this option is left out, youtube-dl will ask interactively.') + adobe_pass.add_option( + '--list-ap-mso-ids', + action='store_true', dest='list_ap_mso_ids', default=False, + help='List all supported TV Providers') + adobe_pass.add_option( + '--ap-retries', + dest='ap_retries', metavar='APRETRIES', default=3, + help='Number of retries for Adobe Pass Authorization requests') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( From 7ead562295994beaf72e25c0a42066faebba6929 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 14 Sep 2016 22:01:31 +0800 Subject: [PATCH 43/55] [bilibili] Remove copyrighted test cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I can't find any English or Chinese material that claims BiliBili has bought legal redistribution permissions for copyrighted products from copyrighted holders. References for removed test cases: "刀语": https://en.wikipedia.org/wiki/Katanagatari, by White Fox "哆啦A梦": https://en.wikipedia.org/wiki/Doraemon, by Shin-Ei Animation "岳父岳母真难当": https://en.wikipedia.org/wiki/Serial_(Bad)_Weddings, by Les films du 24 "混沌武士": https://en.wikipedia.org/wiki/Samurai_Champloo, by Manglobe I shouldn't have added them to _TESTS --- youtube_dl/extractor/bilibili.py | 61 ++------------------------------ 1 file changed, 2 insertions(+), 59 deletions(-) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 8fa96d3a0..9f5c12ab9 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -17,7 +17,7 @@ from ..utils import ( class BiliBiliIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/v/)(?P\d+)' - _TESTS = [{ + _TEST = { 'url': 'http://www.bilibili.tv/video/av1074402/', 'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e', 'info_dict': { @@ -32,64 +32,7 @@ class BiliBiliIE(InfoExtractor): 'uploader': '菊子桑', 'uploader_id': '156160', }, - }, { - 'url': 'http://www.bilibili.com/video/av1041170/', - 'info_dict': { - 'id': '1041170', - 'ext': 'mp4', - 'title': '【BD1080P】刀语【诸神&异域】', - 'description': '这是个神奇的故事~每个人不留弹幕不给走哦~切利哦!~', - 'duration': 3382.259, - 'timestamp': 1396530060, - 'upload_date': '20140403', - 'thumbnail': 're:^https?://.+\.jpg', - 'uploader': '枫叶逝去', - 'uploader_id': '520116', - }, - }, { - 'url': 'http://www.bilibili.com/video/av4808130/', - 'info_dict': { - 'id': '4808130', - 'ext': 'mp4', - 'title': '【长篇】哆啦A梦443【钉铛】', - 'description': '(2016.05.27)来组合客人的脸吧&amp;寻母六千里锭 抱歉,又轮到周日上班现在才到家 封面www.pixiv.net/member_illust.php?mode=medium&amp;illust_id=56912929', - 'duration': 1493.995, - 'timestamp': 1464564180, - 'upload_date': '20160529', - 'thumbnail': 're:^https?://.+\.jpg', - 'uploader': '喜欢拉面', - 'uploader_id': '151066', - }, - }, { - # Missing upload time - 'url': 'http://www.bilibili.com/video/av1867637/', - 'info_dict': { - 'id': '1867637', - 'ext': 'mp4', - 'title': '【HDTV】【喜剧】岳父岳母真难当 (2014)【法国票房冠军】', - 'description': '一个信奉天主教的法国旧式传统资产阶级家庭中有四个女儿。三个女儿却分别找了阿拉伯、犹太、中国丈夫,老夫老妻唯独期盼剩下未嫁的小女儿能找一个信奉天主教的法国白人,结果没想到小女儿找了一位非裔黑人……【这次应该不会跳帧了】', - 'duration': 5760.0, - 'uploader': '黑夜为猫', - 'uploader_id': '610729', - 'thumbnail': 're:^https?://.+\.jpg', - }, - 'params': { - # Just to test metadata extraction - 'skip_download': True, - }, - 'expected_warnings': ['upload time'], - }, { - 'url': 'http://bangumi.bilibili.com/anime/v/40068', - 'md5': '08d539a0884f3deb7b698fb13ba69696', - 'info_dict': { - 'id': '40068', - 'ext': 'mp4', - 'duration': 1402.357, - 'title': '混沌武士 : 第7集 四面楚歌 A Risky Racket', - 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', - 'thumbnail': 're:^http?://.+\.jpg', - }, - }] + } _APP_KEY = '6f90a59ac58a4123' _BILIBILI_KEY = '0bfd84cc3940035173f35e6777508326' From f21830ab4f1ca48d54b7c01a88fb53b324837e58 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Wed, 14 Sep 2016 22:11:49 +0800 Subject: [PATCH 44/55] [bilibili] Fix extraction for videos without backup_url (#10647) --- ChangeLog | 1 + youtube_dl/extractor/bilibili.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 46eea0626..25c916eb2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version Extractors +* [bilibili] Fix extraction for specific videos (#10647) + [nbc] Add support for NBC Olympics (#10361) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index 9f5c12ab9..2d174e6f9 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -67,7 +67,7 @@ class BiliBiliIE(InfoExtractor): 'url': durl['url'], 'filesize': int_or_none(durl['size']), }] - for backup_url in durl['backup_url']: + for backup_url in durl.get('backup_url', []): formats.append({ 'url': backup_url, # backup URLs have lower priorities From a2705c60e6523dcba656addc5a4cbf5dbf9c2d2b Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Wed, 14 Sep 2016 16:36:42 +0100 Subject: [PATCH 45/55] [adobepass] remove unnecessary option --- youtube_dl/__init__.py | 3 --- youtube_dl/extractor/adobepass.py | 3 +-- youtube_dl/options.py | 4 ---- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 052f20ee7..cdff3df65 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -179,8 +179,6 @@ def _real_main(argv=None): opts.retries = parse_retries(opts.retries) if opts.fragment_retries is not None: opts.fragment_retries = parse_retries(opts.fragment_retries) - if opts.ap_retries is not None: - opts.ap_retries = parse_retries(opts.ap_retries) if opts.buffersize is not None: numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) if numeric_buffersize is None: @@ -308,7 +306,6 @@ def _real_main(argv=None): 'ap_mso_id': opts.ap_mso_id, 'ap_username': opts.ap_username, 'ap_password': opts.ap_password, - 'ap_retries': opts.ap_retries, 'quiet': (opts.quiet or any_getting or any_printing), 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 9add6c0f8..913a817d2 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -83,9 +83,8 @@ class AdobePassIE(InfoExtractor): } guid = xml_text(resource, 'guid') - retries = self._downloader.params.get('ap_retries', 3) count = 0 - while count < retries: + while count < 2: requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} authn_token = requestor_info.get('authn_token') if authn_token and is_expired(authn_token, 'simpleTokenExpires'): diff --git a/youtube_dl/options.py b/youtube_dl/options.py index b99201a20..342ae3be3 100644 --- a/youtube_dl/options.py +++ b/youtube_dl/options.py @@ -368,10 +368,6 @@ def parseOpts(overrideArguments=None): '--list-ap-mso-ids', action='store_true', dest='list_ap_mso_ids', default=False, help='List all supported TV Providers') - adobe_pass.add_option( - '--ap-retries', - dest='ap_retries', metavar='APRETRIES', default=3, - help='Number of retries for Adobe Pass Authorization requests') video_format = optparse.OptionGroup(parser, 'Video Format Options') video_format.add_option( From c34fcac746de132148689b172c72b69db958ff4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Wed, 14 Sep 2016 22:45:23 +0700 Subject: [PATCH 46/55] [viafree] Fix test --- youtube_dl/extractor/tvplay.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py index 58ffc0e6f..3eda0a399 100644 --- a/youtube_dl/extractor/tvplay.py +++ b/youtube_dl/extractor/tvplay.py @@ -369,7 +369,7 @@ class ViafreeIE(InfoExtractor): 'add_ie': [TVPlayIE.ie_key()], }, { # Different og:image URL schema - 'url': 'www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2', + 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2', 'only_matching': True, }, { 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1', From 1c646b803f3e082eb7ef8c61104907a180c77f5e Mon Sep 17 00:00:00 2001 From: stepshal Date: Thu, 8 Sep 2016 18:29:05 +0700 Subject: [PATCH 47/55] Improve some _VALID_URLs --- youtube_dl/extractor/abc.py | 2 +- youtube_dl/extractor/aljazeera.py | 2 +- youtube_dl/extractor/azubu.py | 2 +- youtube_dl/extractor/bbc.py | 2 +- youtube_dl/extractor/bpb.py | 2 +- youtube_dl/extractor/camdemy.py | 2 +- youtube_dl/extractor/cbssports.py | 2 +- youtube_dl/extractor/ceskatelevize.py | 2 +- youtube_dl/extractor/chirbit.py | 2 +- youtube_dl/extractor/cmt.py | 2 +- youtube_dl/extractor/criterion.py | 2 +- youtube_dl/extractor/dctp.py | 2 +- youtube_dl/extractor/democracynow.py | 2 +- youtube_dl/extractor/engadget.py | 2 +- youtube_dl/extractor/expotv.py | 2 +- youtube_dl/extractor/freespeech.py | 2 +- youtube_dl/extractor/gamestar.py | 2 +- youtube_dl/extractor/googleplus.py | 2 +- youtube_dl/extractor/goshgay.py | 2 +- youtube_dl/extractor/hark.py | 2 +- youtube_dl/extractor/hotnewhiphop.py | 2 +- youtube_dl/extractor/imdb.py | 2 +- youtube_dl/extractor/karaoketv.py | 2 +- youtube_dl/extractor/kickstarter.py | 2 +- youtube_dl/extractor/kuwo.py | 8 ++++---- youtube_dl/extractor/litv.py | 2 +- youtube_dl/extractor/lynda.py | 2 +- youtube_dl/extractor/macgamestore.py | 2 +- youtube_dl/extractor/metacritic.py | 2 +- youtube_dl/extractor/mgtv.py | 2 +- youtube_dl/extractor/ministrygrid.py | 2 +- youtube_dl/extractor/mitele.py | 2 +- youtube_dl/extractor/motorsport.py | 2 +- youtube_dl/extractor/moviezine.py | 2 +- youtube_dl/extractor/myspass.py | 2 +- youtube_dl/extractor/nbc.py | 6 +++--- youtube_dl/extractor/ndr.py | 8 ++++---- youtube_dl/extractor/nextmedia.py | 6 +++--- youtube_dl/extractor/niconico.py | 2 +- youtube_dl/extractor/oktoberfesttv.py | 2 +- youtube_dl/extractor/openload.py | 2 +- youtube_dl/extractor/periscope.py | 2 +- youtube_dl/extractor/playvid.py | 2 +- youtube_dl/extractor/qqmusic.py | 6 +++--- youtube_dl/extractor/rottentomatoes.py | 2 +- youtube_dl/extractor/roxwel.py | 2 +- youtube_dl/extractor/rtve.py | 6 +++--- youtube_dl/extractor/screenjunkies.py | 2 +- youtube_dl/extractor/senateisvp.py | 2 +- youtube_dl/extractor/slideshare.py | 2 +- youtube_dl/extractor/spiegel.py | 2 +- youtube_dl/extractor/syfy.py | 2 +- youtube_dl/extractor/teachingchannel.py | 2 +- youtube_dl/extractor/telecinco.py | 2 +- youtube_dl/extractor/telewebion.py | 2 +- youtube_dl/extractor/theintercept.py | 2 +- youtube_dl/extractor/thescene.py | 2 +- youtube_dl/extractor/tlc.py | 2 +- youtube_dl/extractor/udemy.py | 2 +- youtube_dl/extractor/ustream.py | 4 ++-- youtube_dl/extractor/vevo.py | 4 ++-- youtube_dl/extractor/videodetective.py | 2 +- youtube_dl/extractor/weiqitv.py | 2 +- youtube_dl/extractor/yam.py | 2 +- youtube_dl/extractor/youtube.py | 12 ++++++------ 65 files changed, 86 insertions(+), 86 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 3792bd232..465249bbf 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -13,7 +13,7 @@ from ..utils import ( class ABCIE(InfoExtractor): IE_NAME = 'abc.net.au' - _VALID_URL = r'https?://www\.abc\.net\.au/news/(?:[^/]+/){1,2}(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/news/(?:[^/]+/){1,2}(?P\d+)' _TESTS = [{ 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', diff --git a/youtube_dl/extractor/aljazeera.py b/youtube_dl/extractor/aljazeera.py index b081695d8..388e578d5 100644 --- a/youtube_dl/extractor/aljazeera.py +++ b/youtube_dl/extractor/aljazeera.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class AlJazeeraIE(InfoExtractor): - _VALID_URL = r'https?://www\.aljazeera\.com/programmes/.*?/(?P[^/]+)\.html' + _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/programmes/.*?/(?P[^/]+)\.html' _TEST = { 'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html', diff --git a/youtube_dl/extractor/azubu.py b/youtube_dl/extractor/azubu.py index a813eb429..72e1bd59d 100644 --- a/youtube_dl/extractor/azubu.py +++ b/youtube_dl/extractor/azubu.py @@ -103,7 +103,7 @@ class AzubuIE(InfoExtractor): class AzubuLiveIE(InfoExtractor): - _VALID_URL = r'https?://www.azubu.tv/(?P[^/]+)$' + _VALID_URL = r'https?://(?:www\.)?azubu\.tv/(?P[^/]+)$' _TEST = { 'url': 'http://www.azubu.tv/MarsTVMDLen', diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index deb9cc1c0..b17916137 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -1028,7 +1028,7 @@ class BBCIE(BBCCoUkIE): class BBCCoUkArticleIE(InfoExtractor): - _VALID_URL = r'https?://www.bbc.co.uk/programmes/articles/(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P[a-zA-Z0-9]+)' IE_NAME = 'bbc.co.uk:article' IE_DESC = 'BBC articles' diff --git a/youtube_dl/extractor/bpb.py b/youtube_dl/extractor/bpb.py index 6ad45a1e6..9661ade4f 100644 --- a/youtube_dl/extractor/bpb.py +++ b/youtube_dl/extractor/bpb.py @@ -12,7 +12,7 @@ from ..utils import ( class BpbIE(InfoExtractor): IE_DESC = 'Bundeszentrale für politische Bildung' - _VALID_URL = r'https?://www\.bpb\.de/mediathek/(?P[0-9]+)/' + _VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P[0-9]+)/' _TEST = { 'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr', diff --git a/youtube_dl/extractor/camdemy.py b/youtube_dl/extractor/camdemy.py index 268c34392..d4e6fbdce 100644 --- a/youtube_dl/extractor/camdemy.py +++ b/youtube_dl/extractor/camdemy.py @@ -112,7 +112,7 @@ class CamdemyIE(InfoExtractor): class CamdemyFolderIE(InfoExtractor): - _VALID_URL = r'https?://www.camdemy.com/folder/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P\d+)' _TESTS = [{ # links with trailing slash 'url': 'http://www.camdemy.com/folder/450', diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dl/extractor/cbssports.py index bf7915626..3a62c840b 100644 --- a/youtube_dl/extractor/cbssports.py +++ b/youtube_dl/extractor/cbssports.py @@ -4,7 +4,7 @@ from .cbs import CBSBaseIE class CBSSportsIE(CBSBaseIE): - _VALID_URL = r'https?://www\.cbssports\.com/video/player/[^/]+/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?cbssports\.com/video/player/[^/]+/(?P\d+)' _TESTS = [{ 'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast', diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 5a58d1777..87c2e7089 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -17,7 +17,7 @@ from ..utils import ( class CeskaTelevizeIE(InfoExtractor): - _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P[^/#?]+)/*(?:[#?].*)?$' + _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(porady|ivysilani)/(?:[^/]+/)*(?P[^/#?]+)/*(?:[#?].*)?$' _TESTS = [{ 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', 'info_dict': { diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dl/extractor/chirbit.py index b43518652..61aed0167 100644 --- a/youtube_dl/extractor/chirbit.py +++ b/youtube_dl/extractor/chirbit.py @@ -65,7 +65,7 @@ class ChirbitIE(InfoExtractor): class ChirbitProfileIE(InfoExtractor): IE_NAME = 'chirbit:profile' - _VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?chirbit\.com/(?:rss/)?(?P[^/]+)' _TEST = { 'url': 'http://chirbit.com/ScarletBeauty', 'info_dict': { diff --git a/youtube_dl/extractor/cmt.py b/youtube_dl/extractor/cmt.py index f24568dcc..ac3bdfe8f 100644 --- a/youtube_dl/extractor/cmt.py +++ b/youtube_dl/extractor/cmt.py @@ -6,7 +6,7 @@ from ..utils import ExtractorError class CMTIE(MTVIE): IE_NAME = 'cmt.com' - _VALID_URL = r'https?://www\.cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?cmt\.com/(?:videos|shows)/(?:[^/]+/)*(?P\d+)' _FEED_URL = 'http://www.cmt.com/sitewide/apps/player/embed/rss/' _TESTS = [{ diff --git a/youtube_dl/extractor/criterion.py b/youtube_dl/extractor/criterion.py index dedb810a0..ad32673a8 100644 --- a/youtube_dl/extractor/criterion.py +++ b/youtube_dl/extractor/criterion.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class CriterionIE(InfoExtractor): - _VALID_URL = r'https?://www\.criterion\.com/films/(?P[0-9]+)-.+' + _VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P[0-9]+)-.+' _TEST = { 'url': 'http://www.criterion.com/films/184-le-samourai', 'md5': 'bc51beba55685509883a9a7830919ec3', diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py index 9099f5046..a47e04993 100644 --- a/youtube_dl/extractor/dctp.py +++ b/youtube_dl/extractor/dctp.py @@ -6,7 +6,7 @@ from ..compat import compat_str class DctpTvIE(InfoExtractor): - _VALID_URL = r'https?://www.dctp.tv/(#/)?filme/(?P.+?)/$' + _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(#/)?filme/(?P.+?)/$' _TEST = { 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/', 'info_dict': { diff --git a/youtube_dl/extractor/democracynow.py b/youtube_dl/extractor/democracynow.py index 65a98d789..bdfe638b4 100644 --- a/youtube_dl/extractor/democracynow.py +++ b/youtube_dl/extractor/democracynow.py @@ -13,7 +13,7 @@ from ..utils import ( class DemocracynowIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?democracynow.org/(?P[^\?]*)' + _VALID_URL = r'https?://(?:www\.)?democracynow\.org/(?P[^\?]*)' IE_NAME = 'democracynow' _TESTS = [{ 'url': 'http://www.democracynow.org/shows/2015/7/3', diff --git a/youtube_dl/extractor/engadget.py b/youtube_dl/extractor/engadget.py index a39e9010d..65635c18b 100644 --- a/youtube_dl/extractor/engadget.py +++ b/youtube_dl/extractor/engadget.py @@ -4,7 +4,7 @@ from .common import InfoExtractor class EngadgetIE(InfoExtractor): - _VALID_URL = r'https?://www.engadget.com/video/(?P[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?engadget\.com/video/(?P[^/?#]+)' _TESTS = [{ # video with 5min ID diff --git a/youtube_dl/extractor/expotv.py b/youtube_dl/extractor/expotv.py index 971c918a4..ef11962f3 100644 --- a/youtube_dl/extractor/expotv.py +++ b/youtube_dl/extractor/expotv.py @@ -8,7 +8,7 @@ from ..utils import ( class ExpoTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.expotv\.com/videos/[^?#]*/(?P[0-9]+)($|[?#])' + _VALID_URL = r'https?://(?:www\.)?expotv\.com/videos/[^?#]*/(?P[0-9]+)($|[?#])' _TEST = { 'url': 'http://www.expotv.com/videos/reviews/3/40/NYX-Butter-lipstick/667916', 'md5': 'fe1d728c3a813ff78f595bc8b7a707a8', diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dl/extractor/freespeech.py index 1477708bb..0a70ca763 100644 --- a/youtube_dl/extractor/freespeech.py +++ b/youtube_dl/extractor/freespeech.py @@ -8,7 +8,7 @@ from .common import InfoExtractor class FreespeechIE(InfoExtractor): IE_NAME = 'freespeech.org' - _VALID_URL = r'https://www\.freespeech\.org/video/(?P.+)' + _VALID_URL = r'https?://(?:www\.)?freespeech\.org/video/(?P<title>.+)' _TEST = { 'add_ie': ['Youtube'], 'url': 'https://www.freespeech.org/video/obama-romney-campaign-colorado-ahead-debate-0', diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dl/extractor/gamestar.py index 341e72733..55a34604a 100644 --- a/youtube_dl/extractor/gamestar.py +++ b/youtube_dl/extractor/gamestar.py @@ -9,7 +9,7 @@ from ..utils import ( class GameStarIE(InfoExtractor): - _VALID_URL = r'https?://www\.gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html' + _VALID_URL = r'https?://(?:www\.)?gamestar\.de/videos/.*,(?P<id>[0-9]+)\.html' _TEST = { 'url': 'http://www.gamestar.de/videos/trailer,3/hobbit-3-die-schlacht-der-fuenf-heere,76110.html', 'md5': '96974ecbb7fd8d0d20fca5a00810cea7', diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dl/extractor/googleplus.py index 731bacd67..427499b11 100644 --- a/youtube_dl/extractor/googleplus.py +++ b/youtube_dl/extractor/googleplus.py @@ -10,7 +10,7 @@ from ..utils import unified_strdate class GooglePlusIE(InfoExtractor): IE_DESC = 'Google Plus' - _VALID_URL = r'https://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)' + _VALID_URL = r'https?://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)' IE_NAME = 'plus.google' _TEST = { 'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH', diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dl/extractor/goshgay.py index 0c015141f..a43abd154 100644 --- a/youtube_dl/extractor/goshgay.py +++ b/youtube_dl/extractor/goshgay.py @@ -11,7 +11,7 @@ from ..utils import ( class GoshgayIE(InfoExtractor): - _VALID_URL = r'https?://www\.goshgay\.com/video(?P<id>\d+?)($|/)' + _VALID_URL = r'https?://(?:www\.)?goshgay\.com/video(?P<id>\d+?)($|/)' _TEST = { 'url': 'http://www.goshgay.com/video299069/diesel_sfw_xxx_video', 'md5': '4b6db9a0a333142eb9f15913142b0ed1', diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py index b6cc15b6f..749e9154f 100644 --- a/youtube_dl/extractor/hark.py +++ b/youtube_dl/extractor/hark.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class HarkIE(InfoExtractor): - _VALID_URL = r'https?://www\.hark\.com/clips/(?P<id>.+?)-.+' + _VALID_URL = r'https?://(?:www\.)?hark\.com/clips/(?P<id>.+?)-.+' _TEST = { 'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013', 'md5': '6783a58491b47b92c7c1af5a77d4cbee', diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dl/extractor/hotnewhiphop.py index 9db565209..34163725f 100644 --- a/youtube_dl/extractor/hotnewhiphop.py +++ b/youtube_dl/extractor/hotnewhiphop.py @@ -12,7 +12,7 @@ from ..utils import ( class HotNewHipHopIE(InfoExtractor): - _VALID_URL = r'https?://www\.hotnewhiphop\.com/.*\.(?P<id>.*)\.html' + _VALID_URL = r'https?://(?:www\.)?hotnewhiphop\.com/.*\.(?P<id>.*)\.html' _TEST = { 'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html', 'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96', diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py index 3a6a6f5ad..f0fc8d49a 100644 --- a/youtube_dl/extractor/imdb.py +++ b/youtube_dl/extractor/imdb.py @@ -94,7 +94,7 @@ class ImdbIE(InfoExtractor): class ImdbListIE(InfoExtractor): IE_NAME = 'imdb:list' IE_DESC = 'Internet Movie Database lists' - _VALID_URL = r'https?://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})' + _VALID_URL = r'https?://(?:www\.)?imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})' _TEST = { 'url': 'http://www.imdb.com/list/JFs9NWw6XI0', 'info_dict': { diff --git a/youtube_dl/extractor/karaoketv.py b/youtube_dl/extractor/karaoketv.py index bad46005b..bfccf89b0 100644 --- a/youtube_dl/extractor/karaoketv.py +++ b/youtube_dl/extractor/karaoketv.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class KaraoketvIE(InfoExtractor): - _VALID_URL = r'https?://www\.karaoketv\.co\.il/[^/]+/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?karaoketv\.co\.il/[^/]+/(?P<id>\d+)' _TEST = { 'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F', 'info_dict': { diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dl/extractor/kickstarter.py index c61e78622..fbe499497 100644 --- a/youtube_dl/extractor/kickstarter.py +++ b/youtube_dl/extractor/kickstarter.py @@ -6,7 +6,7 @@ from ..utils import smuggle_url class KickStarterIE(InfoExtractor): - _VALID_URL = r'https?://www\.kickstarter\.com/projects/(?P<id>[^/]*)/.*' + _VALID_URL = r'https?://(?:www\.)?kickstarter\.com/projects/(?P<id>[^/]*)/.*' _TESTS = [{ 'url': 'https://www.kickstarter.com/projects/1404461844/intersection-the-story-of-josh-grant/description', 'md5': 'c81addca81327ffa66c642b5d8b08cab', diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 0eeb9ffeb..ba621ca7b 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -59,7 +59,7 @@ class KuwoBaseIE(InfoExtractor): class KuwoIE(KuwoBaseIE): IE_NAME = 'kuwo:song' IE_DESC = '酷我音乐' - _VALID_URL = r'https?://www\.kuwo\.cn/yinyue/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/yinyue/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.kuwo.cn/yinyue/635632/', 'info_dict': { @@ -139,7 +139,7 @@ class KuwoIE(KuwoBaseIE): class KuwoAlbumIE(InfoExtractor): IE_NAME = 'kuwo:album' IE_DESC = '酷我音乐 - 专辑' - _VALID_URL = r'https?://www\.kuwo\.cn/album/(?P<id>\d+?)/' + _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/album/(?P<id>\d+?)/' _TEST = { 'url': 'http://www.kuwo.cn/album/502294/', 'info_dict': { @@ -200,7 +200,7 @@ class KuwoChartIE(InfoExtractor): class KuwoSingerIE(InfoExtractor): IE_NAME = 'kuwo:singer' IE_DESC = '酷我音乐 - 歌手' - _VALID_URL = r'https?://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)' + _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mingxing/(?P<id>[^/]+)' _TESTS = [{ 'url': 'http://www.kuwo.cn/mingxing/bruno+mars/', 'info_dict': { @@ -296,7 +296,7 @@ class KuwoCategoryIE(InfoExtractor): class KuwoMvIE(KuwoBaseIE): IE_NAME = 'kuwo:mv' IE_DESC = '酷我音乐 - MV' - _VALID_URL = r'https?://www\.kuwo\.cn/mv/(?P<id>\d+?)/' + _VALID_URL = r'https?://(?:www\.)?kuwo\.cn/mv/(?P<id>\d+?)/' _TEST = { 'url': 'http://www.kuwo.cn/mv/6480076/', 'info_dict': { diff --git a/youtube_dl/extractor/litv.py b/youtube_dl/extractor/litv.py index 05c6579f1..a3784e6c6 100644 --- a/youtube_dl/extractor/litv.py +++ b/youtube_dl/extractor/litv.py @@ -14,7 +14,7 @@ from ..utils import ( class LiTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)' + _VALID_URL = r'https?://(?:www\.)?litv\.tv/(?:vod|promo)/[^/]+/(?:content\.do)?\?.*?\b(?:content_)?id=(?P<id>[^&]+)' _URL_TEMPLATE = 'https://www.litv.tv/vod/%s/content.do?id=%s' diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index a98c4c530..299873ecc 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -94,7 +94,7 @@ class LyndaBaseIE(InfoExtractor): class LyndaIE(LyndaBaseIE): IE_NAME = 'lynda' IE_DESC = 'lynda.com videos' - _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)' _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]' diff --git a/youtube_dl/extractor/macgamestore.py b/youtube_dl/extractor/macgamestore.py index 3cd4a3a19..43db9929c 100644 --- a/youtube_dl/extractor/macgamestore.py +++ b/youtube_dl/extractor/macgamestore.py @@ -7,7 +7,7 @@ from ..utils import ExtractorError class MacGameStoreIE(InfoExtractor): IE_NAME = 'macgamestore' IE_DESC = 'MacGameStore trailers' - _VALID_URL = r'https?://www\.macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)' _TEST = { 'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450', diff --git a/youtube_dl/extractor/metacritic.py b/youtube_dl/extractor/metacritic.py index 444ec0310..7d468d78b 100644 --- a/youtube_dl/extractor/metacritic.py +++ b/youtube_dl/extractor/metacritic.py @@ -9,7 +9,7 @@ from ..utils import ( class MetacriticIE(InfoExtractor): - _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?metacritic\.com/.+?/trailers/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222', diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dl/extractor/mgtv.py index 27bdff8b2..e0bb5d208 100644 --- a/youtube_dl/extractor/mgtv.py +++ b/youtube_dl/extractor/mgtv.py @@ -6,7 +6,7 @@ from ..utils import int_or_none class MGTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html' + _VALID_URL = r'https?://(?:www\.)?mgtv\.com/v/(?:[^/]+/)*(?P<id>\d+)\.html' IE_DESC = '芒果TV' _TESTS = [{ diff --git a/youtube_dl/extractor/ministrygrid.py b/youtube_dl/extractor/ministrygrid.py index e48eba3fa..10190d5f6 100644 --- a/youtube_dl/extractor/ministrygrid.py +++ b/youtube_dl/extractor/ministrygrid.py @@ -8,7 +8,7 @@ from ..utils import ( class MinistryGridIE(InfoExtractor): - _VALID_URL = r'https?://www\.ministrygrid.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?ministrygrid\.com/([^/?#]*/)*(?P<id>[^/#?]+)/?(?:$|[?#])' _TEST = { 'url': 'http://www.ministrygrid.com/training-viewer/-/training/t4g-2014-conference/the-gospel-by-numbers-4/the-gospel-by-numbers', diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index cd169f361..2294745d4 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -74,7 +74,7 @@ class MiTeleBaseIE(InfoExtractor): class MiTeleIE(MiTeleBaseIE): IE_DESC = 'mitele.es' - _VALID_URL = r'https?://www\.mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/' + _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/' _TESTS = [{ 'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dl/extractor/motorsport.py index 370328b36..c9d1ab64d 100644 --- a/youtube_dl/extractor/motorsport.py +++ b/youtube_dl/extractor/motorsport.py @@ -9,7 +9,7 @@ from ..compat import ( class MotorsportIE(InfoExtractor): IE_DESC = 'motorsport.com' - _VALID_URL = r'https?://www\.motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])' + _VALID_URL = r'https?://(?:www\.)?motorsport\.com/[^/?#]+/video/(?:[^/?#]+/)(?P<id>[^/]+)/?(?:$|[?#])' _TEST = { 'url': 'http://www.motorsport.com/f1/video/main-gallery/red-bull-racing-2014-rules-explained/', 'info_dict': { diff --git a/youtube_dl/extractor/moviezine.py b/youtube_dl/extractor/moviezine.py index f130b75c4..aa091a62c 100644 --- a/youtube_dl/extractor/moviezine.py +++ b/youtube_dl/extractor/moviezine.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class MoviezineIE(InfoExtractor): - _VALID_URL = r'https?://www\.moviezine\.se/video/(?P<id>[^?#]+)' + _VALID_URL = r'https?://(?:www\.)?moviezine\.se/video/(?P<id>[^?#]+)' _TEST = { 'url': 'http://www.moviezine.se/video/205866', diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py index 1ca7b1a9e..2afe535b5 100644 --- a/youtube_dl/extractor/myspass.py +++ b/youtube_dl/extractor/myspass.py @@ -11,7 +11,7 @@ from ..utils import ( class MySpassIE(InfoExtractor): - _VALID_URL = r'https?://www\.myspass\.de/.*' + _VALID_URL = r'https?://(?:www\.)?myspass\.de/.*' _TEST = { 'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/', 'md5': '0b49f4844a068f8b33f4b7c88405862b', diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index f37bf2f30..7f1bd9229 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -13,7 +13,7 @@ from ..utils import ( class NBCIE(InfoExtractor): - _VALID_URL = r'https?://www\.nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)' + _VALID_URL = r'https?://(?:www\.)?nbc\.com/(?:[^/]+/)+(?P<id>n?\d+)' _TESTS = [ { @@ -138,7 +138,7 @@ class NBCSportsVPlayerIE(InfoExtractor): class NBCSportsIE(InfoExtractor): # Does not include https because its certificate is invalid - _VALID_URL = r'https?://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' + _VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)' _TEST = { 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke', @@ -161,7 +161,7 @@ class NBCSportsIE(InfoExtractor): class CSNNEIE(InfoExtractor): - _VALID_URL = r'https?://www\.csnne\.com/video/(?P<id>[0-9a-z-]+)' + _VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)' _TEST = { 'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter', diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py index 0cded6b5c..e3b0da2e9 100644 --- a/youtube_dl/extractor/ndr.py +++ b/youtube_dl/extractor/ndr.py @@ -23,7 +23,7 @@ class NDRBaseIE(InfoExtractor): class NDRIE(NDRBaseIE): IE_NAME = 'ndr' IE_DESC = 'NDR.de - Norddeutscher Rundfunk' - _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html' + _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html' _TESTS = [{ # httpVideo, same content id 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', @@ -105,7 +105,7 @@ class NDRIE(NDRBaseIE): class NJoyIE(NDRBaseIE): IE_NAME = 'njoy' IE_DESC = 'N-JOY' - _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html' + _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html' _TESTS = [{ # httpVideo, same content id 'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html', @@ -238,7 +238,7 @@ class NDREmbedBaseIE(InfoExtractor): class NDREmbedIE(NDREmbedBaseIE): IE_NAME = 'ndr:embed' - _VALID_URL = r'https?://www\.ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html' + _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html' _TESTS = [{ 'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html', 'md5': '8b9306142fe65bbdefb5ce24edb6b0a9', @@ -332,7 +332,7 @@ class NDREmbedIE(NDREmbedBaseIE): class NJoyEmbedIE(NDREmbedBaseIE): IE_NAME = 'njoy:embed' - _VALID_URL = r'https?://www\.n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html' + _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html' _TESTS = [{ # httpVideo 'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html', diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dl/extractor/nextmedia.py index aae7aeeeb..a08e48c4b 100644 --- a/youtube_dl/extractor/nextmedia.py +++ b/youtube_dl/extractor/nextmedia.py @@ -7,7 +7,7 @@ from ..utils import parse_iso8601 class NextMediaIE(InfoExtractor): IE_DESC = '蘋果日報' - _VALID_URL = r'https?://hk.apple.nextmedia.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)' + _VALID_URL = r'https?://hk\.apple\.nextmedia\.com/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)' _TESTS = [{ 'url': 'http://hk.apple.nextmedia.com/realtime/news/20141108/53109199', 'md5': 'dff9fad7009311c421176d1ac90bfe4f', @@ -68,7 +68,7 @@ class NextMediaIE(InfoExtractor): class NextMediaActionNewsIE(NextMediaIE): IE_DESC = '蘋果日報 - 動新聞' - _VALID_URL = r'https?://hk.dv.nextmedia.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+' + _VALID_URL = r'https?://hk\.dv\.nextmedia\.com/actionnews/[^/]+/(?P<date>\d+)/(?P<id>\d+)/\d+' _TESTS = [{ 'url': 'http://hk.dv.nextmedia.com/actionnews/hit/20150121/19009428/20061460', 'md5': '05fce8ffeed7a5e00665d4b7cf0f9201', @@ -93,7 +93,7 @@ class NextMediaActionNewsIE(NextMediaIE): class AppleDailyIE(NextMediaIE): IE_DESC = '臺灣蘋果日報' - _VALID_URL = r'https?://(www|ent).appledaily.com.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?' + _VALID_URL = r'https?://(www|ent)\.appledaily\.com\.tw/(?:animation|appledaily|enews|realtimenews)/[^/]+/[^/]+/(?P<date>\d+)/(?P<id>\d+)(/.*)?' _TESTS = [{ 'url': 'http://ent.appledaily.com.tw/enews/article/entertainment/20150128/36354694', 'md5': 'a843ab23d150977cc55ef94f1e2c1e4d', diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py index dd75a48af..6eaaa8416 100644 --- a/youtube_dl/extractor/niconico.py +++ b/youtube_dl/extractor/niconico.py @@ -252,7 +252,7 @@ class NiconicoIE(InfoExtractor): class NiconicoPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://www\.nicovideo\.jp/mylist/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/mylist/(?P<id>\d+)' _TEST = { 'url': 'http://www.nicovideo.jp/mylist/27411728', diff --git a/youtube_dl/extractor/oktoberfesttv.py b/youtube_dl/extractor/oktoberfesttv.py index 4a41c0542..f2ccc53dc 100644 --- a/youtube_dl/extractor/oktoberfesttv.py +++ b/youtube_dl/extractor/oktoberfesttv.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class OktoberfestTVIE(InfoExtractor): - _VALID_URL = r'https?://www\.oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?oktoberfest-tv\.de/[^/]+/[^/]+/video/(?P<id>[^/?#]+)' _TEST = { 'url': 'http://www.oktoberfest-tv.de/de/kameras/video/hb-zelt', diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index 76316ca2f..c261a7455 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -13,7 +13,7 @@ from ..utils import ( class OpenloadIE(InfoExtractor): - _VALID_URL = r'https://openload.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' + _VALID_URL = r'https?://openload\.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _TESTS = [{ 'url': 'https://openload.co/f/kUEfGclsU9o', diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py index 6c640089d..eb1aeba46 100644 --- a/youtube_dl/extractor/periscope.py +++ b/youtube_dl/extractor/periscope.py @@ -94,7 +94,7 @@ class PeriscopeIE(PeriscopeBaseIE): class PeriscopeUserIE(PeriscopeBaseIE): - _VALID_URL = r'https?://www\.periscope\.tv/(?P<id>[^/]+)/?$' + _VALID_URL = r'https?://(?:www\.)?periscope\.tv/(?P<id>[^/]+)/?$' IE_DESC = 'Periscope user videos' IE_NAME = 'periscope:user' diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py index 78d219299..79c2db085 100644 --- a/youtube_dl/extractor/playvid.py +++ b/youtube_dl/extractor/playvid.py @@ -14,7 +14,7 @@ from ..utils import ( class PlayvidIE(InfoExtractor): - _VALID_URL = r'https?://www\.playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' + _VALID_URL = r'https?://(?:www\.)?playvid\.com/watch(\?v=|/)(?P<id>.+?)(?:#|$)' _TESTS = [{ 'url': 'http://www.playvid.com/watch/RnmBNgtrrJu', 'md5': 'ffa2f6b2119af359f544388d8c01eb6c', diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index ff0af9543..37cb9e2c9 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -18,7 +18,7 @@ from ..utils import ( class QQMusicIE(InfoExtractor): IE_NAME = 'qqmusic' IE_DESC = 'QQ音乐' - _VALID_URL = r'https?://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)' + _VALID_URL = r'https?://y\.qq\.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)' _TESTS = [{ 'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD', 'md5': '9ce1c1c8445f561506d2e3cfb0255705', @@ -172,7 +172,7 @@ class QQPlaylistBaseIE(InfoExtractor): class QQMusicSingerIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:singer' IE_DESC = 'QQ音乐 - 歌手' - _VALID_URL = r'https?://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)' + _VALID_URL = r'https?://y\.qq\.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)' _TEST = { 'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2', 'info_dict': { @@ -217,7 +217,7 @@ class QQMusicSingerIE(QQPlaylistBaseIE): class QQMusicAlbumIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:album' IE_DESC = 'QQ音乐 - 专辑' - _VALID_URL = r'https?://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)' + _VALID_URL = r'https?://y\.qq\.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)' _TESTS = [{ 'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1', diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py index 23abf7a27..1d404d20a 100644 --- a/youtube_dl/extractor/rottentomatoes.py +++ b/youtube_dl/extractor/rottentomatoes.py @@ -5,7 +5,7 @@ from .internetvideoarchive import InternetVideoArchiveIE class RottenTomatoesIE(InfoExtractor): - _VALID_URL = r'https?://www\.rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?rottentomatoes\.com/m/[^/]+/trailers/(?P<id>\d+)' _TEST = { 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', diff --git a/youtube_dl/extractor/roxwel.py b/youtube_dl/extractor/roxwel.py index 41638c1d0..65284643b 100644 --- a/youtube_dl/extractor/roxwel.py +++ b/youtube_dl/extractor/roxwel.py @@ -7,7 +7,7 @@ from ..utils import unified_strdate, determine_ext class RoxwelIE(InfoExtractor): - _VALID_URL = r'https?://www\.roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)' + _VALID_URL = r'https?://(?:www\.)?roxwel\.com/player/(?P<filename>.+?)(\.|\?|$)' _TEST = { 'url': 'http://www.roxwel.com/player/passionpittakeawalklive.html', diff --git a/youtube_dl/extractor/rtve.py b/youtube_dl/extractor/rtve.py index 34f9c4a99..f1b92f6da 100644 --- a/youtube_dl/extractor/rtve.py +++ b/youtube_dl/extractor/rtve.py @@ -64,7 +64,7 @@ def _decrypt_url(png): class RTVEALaCartaIE(InfoExtractor): IE_NAME = 'rtve.es:alacarta' IE_DESC = 'RTVE a la carta' - _VALID_URL = r'https?://www\.rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', @@ -184,7 +184,7 @@ class RTVEInfantilIE(InfoExtractor): class RTVELiveIE(InfoExtractor): IE_NAME = 'rtve.es:live' IE_DESC = 'RTVE.es live streams' - _VALID_URL = r'https?://www\.rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' _TESTS = [{ 'url': 'http://www.rtve.es/directo/la-1/', @@ -226,7 +226,7 @@ class RTVELiveIE(InfoExtractor): class RTVETelevisionIE(InfoExtractor): IE_NAME = 'rtve.es:television' - _VALID_URL = r'https?://www\.rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' _TEST = { 'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', diff --git a/youtube_dl/extractor/screenjunkies.py b/youtube_dl/extractor/screenjunkies.py index dd0a6ba19..02e574cd8 100644 --- a/youtube_dl/extractor/screenjunkies.py +++ b/youtube_dl/extractor/screenjunkies.py @@ -11,7 +11,7 @@ from ..utils import ( class ScreenJunkiesIE(InfoExtractor): - _VALID_URL = r'https?://www.screenjunkies.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' + _VALID_URL = r'https?://(?:www\.)?screenjunkies\.com/video/(?P<display_id>[^/]+?)(?:-(?P<id>\d+))?(?:[/?#&]|$)' _TESTS = [{ 'url': 'http://www.screenjunkies.com/video/best-quentin-tarantino-movie-2841915', 'md5': '5c2b686bec3d43de42bde9ec047536b0', diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dl/extractor/senateisvp.py index c5f474dd1..35540c082 100644 --- a/youtube_dl/extractor/senateisvp.py +++ b/youtube_dl/extractor/senateisvp.py @@ -48,7 +48,7 @@ class SenateISVPIE(InfoExtractor): ['arch', '', 'http://ussenate-f.akamaihd.net/'] ] _IE_NAME = 'senate.gov' - _VALID_URL = r'https?://www\.senate\.gov/isvp/?\?(?P<qs>.+)' + _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P<qs>.+)' _TESTS = [{ 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'info_dict': { diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dl/extractor/slideshare.py index 4967c1b77..74a1dc672 100644 --- a/youtube_dl/extractor/slideshare.py +++ b/youtube_dl/extractor/slideshare.py @@ -14,7 +14,7 @@ from ..utils import ( class SlideshareIE(InfoExtractor): - _VALID_URL = r'https?://www\.slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)' + _VALID_URL = r'https?://(?:www\.)?slideshare\.net/[^/]+?/(?P<title>.+?)($|\?)' _TEST = { 'url': 'http://www.slideshare.net/Dataversity/keynote-presentation-managing-scale-and-complexity', diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index 74cb3a08a..b41d9f59f 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -103,7 +103,7 @@ class SpiegelIE(InfoExtractor): class SpiegelArticleIE(InfoExtractor): - _VALID_URL = r'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html' + _VALID_URL = r'https?://(?:www\.)?spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html' IE_NAME = 'Spiegel:Article' IE_DESC = 'Articles on spiegel.de' _TESTS = [{ diff --git a/youtube_dl/extractor/syfy.py b/youtube_dl/extractor/syfy.py index ab8bab5cd..def7e5a2c 100644 --- a/youtube_dl/extractor/syfy.py +++ b/youtube_dl/extractor/syfy.py @@ -8,7 +8,7 @@ from ..utils import ( class SyfyIE(AdobePassIE): - _VALID_URL = r'https?://www\.syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer', 'info_dict': { diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py index d14d93e3a..e89759714 100644 --- a/youtube_dl/extractor/teachingchannel.py +++ b/youtube_dl/extractor/teachingchannel.py @@ -7,7 +7,7 @@ from .ooyala import OoyalaIE class TeachingChannelIE(InfoExtractor): - _VALID_URL = r'https?://www\.teachingchannel\.org/videos/(?P<title>.+)' + _VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos/(?P<title>.+)' _TEST = { 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution', diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py index 2ecfd0405..d5abfc9e4 100644 --- a/youtube_dl/extractor/telecinco.py +++ b/youtube_dl/extractor/telecinco.py @@ -6,7 +6,7 @@ from .mitele import MiTeleBaseIE class TelecincoIE(MiTeleBaseIE): IE_DESC = 'telecinco.es, cuatro.com and mediaset.es' - _VALID_URL = r'https?://www\.(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html' + _VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html' _TESTS = [{ 'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html', diff --git a/youtube_dl/extractor/telewebion.py b/youtube_dl/extractor/telewebion.py index 77916c601..7786b2813 100644 --- a/youtube_dl/extractor/telewebion.py +++ b/youtube_dl/extractor/telewebion.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class TelewebionIE(InfoExtractor): - _VALID_URL = r'https?://www\.telewebion\.com/#!/episode/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?telewebion\.com/#!/episode/(?P<id>\d+)' _TEST = { 'url': 'http://www.telewebion.com/#!/episode/1263668/', diff --git a/youtube_dl/extractor/theintercept.py b/youtube_dl/extractor/theintercept.py index 8cb3c3669..ec6f4ecaa 100644 --- a/youtube_dl/extractor/theintercept.py +++ b/youtube_dl/extractor/theintercept.py @@ -11,7 +11,7 @@ from ..utils import ( class TheInterceptIE(InfoExtractor): - _VALID_URL = r'https://theintercept.com/fieldofvision/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://theintercept\.com/fieldofvision/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://theintercept.com/fieldofvision/thisisacoup-episode-four-surrender-or-die/', 'md5': '145f28b41d44aab2f87c0a4ac8ec95bd', diff --git a/youtube_dl/extractor/thescene.py b/youtube_dl/extractor/thescene.py index 3e4e14031..ce1326c03 100644 --- a/youtube_dl/extractor/thescene.py +++ b/youtube_dl/extractor/thescene.py @@ -7,7 +7,7 @@ from ..utils import qualities class TheSceneIE(InfoExtractor): - _VALID_URL = r'https://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)' + _VALID_URL = r'https?://thescene\.com/watch/[^/]+/(?P<id>[^/#?]+)' _TEST = { 'url': 'https://thescene.com/watch/vogue/narciso-rodriguez-spring-2013-ready-to-wear', diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index 88eb83d74..ce4f91f46 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -13,7 +13,7 @@ from ..compat import ( class TlcDeIE(InfoExtractor): IE_NAME = 'tlc.de' - _VALID_URL = r'https?://www\.tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?' + _VALID_URL = r'https?://(?:www\.)?tlc\.de/(?:[^/]+/)*videos/(?P<title>[^/?#]+)?(?:.*#(?P<id>\d+))?' _TEST = { 'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001', diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 89b869559..c2f507233 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -307,7 +307,7 @@ class UdemyIE(InfoExtractor): class UdemyCourseIE(UdemyIE): IE_NAME = 'udemy:course' - _VALID_URL = r'https?://www\.udemy\.com/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?udemy\.com/(?P<id>[^/?#&]+)' _TESTS = [] @classmethod diff --git a/youtube_dl/extractor/ustream.py b/youtube_dl/extractor/ustream.py index 54605d863..a3dc9d33e 100644 --- a/youtube_dl/extractor/ustream.py +++ b/youtube_dl/extractor/ustream.py @@ -14,7 +14,7 @@ from ..utils import ( class UstreamIE(InfoExtractor): - _VALID_URL = r'https?://www\.ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?ustream\.tv/(?P<type>recorded|embed|embed/recorded)/(?P<id>\d+)' IE_NAME = 'ustream' _TESTS = [{ 'url': 'http://www.ustream.tv/recorded/20274954', @@ -117,7 +117,7 @@ class UstreamIE(InfoExtractor): class UstreamChannelIE(InfoExtractor): - _VALID_URL = r'https?://www\.ustream\.tv/channel/(?P<slug>.+)' + _VALID_URL = r'https?://(?:www\.)?ustream\.tv/channel/(?P<slug>.+)' IE_NAME = 'ustream:channel' _TEST = { 'url': 'http://www.ustream.tv/channel/channeljapan', diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 388b4debe..783efda7d 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -31,7 +31,7 @@ class VevoIE(VevoBaseIE): (currently used by MTVIE and MySpaceIE) ''' _VALID_URL = r'''(?x) - (?:https?://www\.vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| + (?:https?://(?:www\.)?vevo\.com/watch/(?!playlist|genre)(?:[^/]+/(?:[^/]+/)?)?| https?://cache\.vevo\.com/m/html/embed\.html\?video=| https?://videoplayer\.vevo\.com/embed/embedded\?videoId=| vevo:) @@ -374,7 +374,7 @@ class VevoIE(VevoBaseIE): class VevoPlaylistIE(VevoBaseIE): - _VALID_URL = r'https?://www\.vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?vevo\.com/watch/(?P<kind>playlist|genre)/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'http://www.vevo.com/watch/playlist/dadbf4e7-b99f-4184-9670-6f0e547b6a29', diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py index 2ed5d9643..a19411a05 100644 --- a/youtube_dl/extractor/videodetective.py +++ b/youtube_dl/extractor/videodetective.py @@ -6,7 +6,7 @@ from .internetvideoarchive import InternetVideoArchiveIE class VideoDetectiveIE(InfoExtractor): - _VALID_URL = r'https?://www\.videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)' _TEST = { 'url': 'http://www.videodetective.com/movies/kick-ass-2/194487', diff --git a/youtube_dl/extractor/weiqitv.py b/youtube_dl/extractor/weiqitv.py index 3dafbeec2..8e09156c2 100644 --- a/youtube_dl/extractor/weiqitv.py +++ b/youtube_dl/extractor/weiqitv.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class WeiqiTVIE(InfoExtractor): IE_DESC = 'WQTV' - _VALID_URL = r'https?://www\.weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?weiqitv\.com/index/video_play\?videoId=(?P<id>[A-Za-z0-9]+)' _TESTS = [{ 'url': 'http://www.weiqitv.com/index/video_play?videoId=53c744f09874f0e76a8b46f3', diff --git a/youtube_dl/extractor/yam.py b/youtube_dl/extractor/yam.py index 63bbc0634..ef5535547 100644 --- a/youtube_dl/extractor/yam.py +++ b/youtube_dl/extractor/yam.py @@ -15,7 +15,7 @@ from ..utils import ( class YamIE(InfoExtractor): IE_DESC = '蕃薯藤yam天空部落' - _VALID_URL = r'https?://mymedia.yam.com/m/(?P<id>\d+)' + _VALID_URL = r'https?://mymedia\.yam\.com/m/(?P<id>\d+)' _TESTS = [{ # An audio hosted on Yam diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5082cb589..5ca903825 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2302,7 +2302,7 @@ class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor): class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor): IE_DESC = 'YouTube.com (multi-season) shows' - _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)' IE_NAME = 'youtube:show' _TESTS = [{ 'url': 'https://www.youtube.com/show/airdisasters', @@ -2371,7 +2371,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): class YoutubeWatchLaterIE(YoutubePlaylistIE): IE_NAME = 'youtube:watchlater' IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater' _TESTS = [{ 'url': 'https://www.youtube.com/playlist?list=WL', @@ -2392,7 +2392,7 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE): class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube:favorites' IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/my_favorites|:ytfav(?:ou?rites)?' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?' _LOGIN_REQUIRED = True def _real_extract(self, url): @@ -2403,21 +2403,21 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?' _FEED_NAME = 'recommended' _PLAYLIST_TITLE = 'Youtube Recommended videos' class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?' _FEED_NAME = 'subscriptions' _PLAYLIST_TITLE = 'Youtube Subscriptions' class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)' - _VALID_URL = r'https?://www\.youtube\.com/feed/history|:ythistory' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory' _FEED_NAME = 'history' _PLAYLIST_TITLE = 'Youtube History' From b6ba8cb66b16b3332fff3ffc18956e8f25b9a0ea Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 14 Sep 2016 17:07:05 +0100 Subject: [PATCH 48/55] [go] add support for free full episodes(#10439) --- youtube_dl/extractor/go.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 6a437c54d..7925c1e22 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -8,6 +8,8 @@ from ..utils import ( int_or_none, determine_ext, parse_age_limit, + urlencode_postdata, + ExtractorError, ) @@ -19,7 +21,7 @@ class GoIE(InfoExtractor): 'watchdisneyjunior': '008', 'watchdisneyxd': '009', } - _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/.*?vdka(?P<id>\w+)' % '|'.join(_BRANDS.keys()) + _VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:[^/]+/)*(?:vdka(?P<id>\w+)|season-\d+/\d+-(?P<display_id>[^/?#]+))' % '|'.join(_BRANDS.keys()) _TESTS = [{ 'url': 'http://abc.go.com/shows/castle/video/most-recent/vdka0_g86w5onx', 'info_dict': { @@ -38,9 +40,13 @@ class GoIE(InfoExtractor): }] def _real_extract(self, url): - sub_domain, video_id = re.match(self._VALID_URL, url).groups() + sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups() + if not video_id: + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex(r'data-video-id=["\']VDKA(\w+)', webpage, 'video id') + brand = self._BRANDS[sub_domain] video_data = self._download_json( - 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (self._BRANDS[sub_domain], video_id), + 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/-1/-1/%s/-1/-1.json' % (brand, video_id), video_id)['video'][0] title = video_data['title'] @@ -52,6 +58,21 @@ class GoIE(InfoExtractor): format_id = asset.get('format') ext = determine_ext(asset_url) if ext == 'm3u8': + video_type = video_data.get('type') + if video_type == 'lf': + entitlement = self._download_json( + 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json', + video_id, data=urlencode_postdata({ + 'video_id': video_data['id'], + 'video_type': video_type, + 'brand': brand, + 'device': '001', + })) + errors = entitlement.get('errors', {}).get('errors', []) + if errors: + error_massege = ', '.join([error['message'] for error in errors]) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_massege), expected=True) + asset_url += '?' + entitlement['uplynkData']['sessionKey'] formats.extend(self._extract_m3u8_formats( asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)) else: From b01327cd8258fa0a89c09dc92e08b985aeca0718 Mon Sep 17 00:00:00 2001 From: Remita Amine <remitamine@gmail.com> Date: Wed, 14 Sep 2016 17:22:42 +0100 Subject: [PATCH 49/55] [go] fix typo --- youtube_dl/extractor/go.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py index 7925c1e22..c7776b186 100644 --- a/youtube_dl/extractor/go.py +++ b/youtube_dl/extractor/go.py @@ -70,8 +70,8 @@ class GoIE(InfoExtractor): })) errors = entitlement.get('errors', {}).get('errors', []) if errors: - error_massege = ', '.join([error['message'] for error in errors]) - raise ExtractorError('%s said: %s' % (self.IE_NAME, error_massege), expected=True) + error_message = ', '.join([error['message'] for error in errors]) + raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True) asset_url += '?' + entitlement['uplynkData']['sessionKey'] formats.extend(self._extract_m3u8_formats( asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)) From 43e8f41d6ded2b191d7c2f1a53a208c1b09d99bc Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 15 Sep 2016 00:53:04 +0800 Subject: [PATCH 50/55] [kuwo] Update _TESTS --- youtube_dl/extractor/kuwo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index ba621ca7b..081af86f6 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -82,7 +82,7 @@ class KuwoIE(KuwoBaseIE): 'upload_date': '20150518', }, 'params': { - 'format': 'mp3-320' + 'format': 'mp3-320', }, }, { 'url': 'http://www.kuwo.cn/yinyue/3197154?catalog=yueku2016', @@ -181,7 +181,7 @@ class KuwoChartIE(InfoExtractor): 'info_dict': { 'id': '香港中文龙虎榜', }, - 'playlist_mincount': 10, + 'playlist_mincount': 7, } def _real_extract(self, url): @@ -303,7 +303,7 @@ class KuwoMvIE(KuwoBaseIE): 'id': '6480076', 'ext': 'mp4', 'title': 'My HouseMV', - 'creator': 'PM02:00', + 'creator': '2PM', }, # In this video, music URLs (anti.s) are blocked outside China and # USA, while the MV URL (mvurl) is available globally, so force the MV From a2844f2b890b79b57c98eac7ee9fdb943b658c07 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Thu, 15 Sep 2016 00:56:15 +0800 Subject: [PATCH 51/55] [kwuo:song] Improve error detection (closes #10650) --- ChangeLog | 1 + youtube_dl/extractor/kuwo.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 25c916eb2..c3c8bf037 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ version <unreleased> Extractors +* [kwuo] Improve error detection (#10650) * [bilibili] Fix extraction for specific videos (#10647) + [nbc] Add support for NBC Olympics (#10361) diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 081af86f6..63e10125e 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -91,10 +91,10 @@ class KuwoIE(KuwoBaseIE): def _real_extract(self, url): song_id = self._match_id(url) - webpage = self._download_webpage( + webpage, urlh = self._download_webpage_handle( url, song_id, note='Download song detail info', errnote='Unable to get song detail info') - if '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage: + if song_id not in urlh.geturl() or '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage: raise ExtractorError('this song has been offline because of copyright issues', expected=True) song_name = self._html_search_regex( From c98885aa9954e7bb13a95acc79e4765774c0a8ca Mon Sep 17 00:00:00 2001 From: renalid <renaud.euvrard@MAC-1636.local> Date: Fri, 2 Sep 2016 18:31:52 +0200 Subject: [PATCH 52/55] [utils,franceinter] Add french months' names and fix extraction Update of the "FranceInter" radio extractor : webpages HTML structure had changed, the extractor didn't work. So I updated this extractor to get the mp3 URL and all details. --- youtube_dl/extractor/franceinter.py | 38 ++++++++++++++++------------- youtube_dl/utils.py | 13 ++++++++-- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 2369f868d..6dad8d712 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -2,20 +2,24 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + unified_timestamp, + month_by_name, +) class FranceInterIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/player/reecouter\?play=(?P<id>[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?franceinter\.fr/emissions/(?P<id>[^?#]+)' + _TEST = { - 'url': 'http://www.franceinter.fr/player/reecouter?play=793962', + 'url': 'https://www.franceinter.fr/emissions/la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', 'md5': '4764932e466e6f6c79c317d2e74f6884', 'info_dict': { - 'id': '793962', + 'id': 'la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', 'ext': 'mp3', - 'title': 'L’Histoire dans les jeux vidéo', - 'description': 'md5:7e93ddb4451e7530022792240a3049c7', - 'timestamp': 1387369800, + 'title': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 - France Inter', + 'description': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 par Jean Lebrun en replay sur France Inter. Retrouvez l\'émission en réécoute gratuite et abonnez-vous au podcast !', + 'timestamp': 1387324800, 'upload_date': '20131218', }, } @@ -25,17 +29,17 @@ class FranceInterIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - path = self._search_regex( - r'<a id="player".+?href="([^"]+)"', webpage, 'video url') - video_url = 'http://www.franceinter.fr/' + path + video_url = self._search_regex( + r'<button class="replay-button playable" data-is-aod="1" data-url="([^"]+)"', webpage, 'video url') - title = self._html_search_regex( - r'<span class="title-diffusion">(.+?)</span>', webpage, 'title') - description = self._html_search_regex( - r'<span class="description">(.*?)</span>', - webpage, 'description', fatal=False) - timestamp = int_or_none(self._search_regex( - r'data-date="(\d+)"', webpage, 'upload date', fatal=False)) + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + + extractdate = self._search_regex('(\d{2}-([a-zA-Z\s]+)-\d{4}$)', url, 'extractdate', fatal=False) + extractdate = extractdate.split('-') + extractdate = extractdate[2] + "," + str(month_by_name(extractdate[1], 'fr')) + "," + extractdate[0] + + timestamp = unified_timestamp(extractdate) return { 'id': video_id, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ed199c4ad..623ced625 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -91,6 +91,10 @@ ENGLISH_MONTH_NAMES = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] +FRENCH_MONTH_NAMES = [ + 'janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', + 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre'] + KNOWN_EXTENSIONS = ( 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', 'flv', 'f4v', 'f4a', 'f4b', @@ -1587,11 +1591,16 @@ def parse_count(s): return lookup_unit_table(_UNIT_TABLE, s) -def month_by_name(name): +def month_by_name(name, lang='en'): """ Return the number of a month by (locale-independently) English name """ + name_list = ENGLISH_MONTH_NAMES + + if lang == 'fr': + name_list = FRENCH_MONTH_NAMES + try: - return ENGLISH_MONTH_NAMES.index(name) + 1 + return name_list.index(name) + 1 except ValueError: return None From adfb88ed546e6be7bcfec8b0d6ab8137e377b79b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 14 Sep 2016 23:13:55 +0700 Subject: [PATCH 53/55] [utils] Improve month_by_name and add tests --- test/test_utils.py | 11 +++++++++++ youtube_dl/utils.py | 16 ++++++++-------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 405c5d351..4ebca8744 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -40,6 +40,7 @@ from youtube_dl.utils import ( js_to_json, limit_length, mimetype2ext, + month_by_name, ohdave_rsa_encrypt, OnDemandPagedList, orderedSet, @@ -634,6 +635,16 @@ class TestUtil(unittest.TestCase): self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt') self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html') + def test_month_by_name(self): + self.assertEqual(month_by_name(None), None) + self.assertEqual(month_by_name('December', 'en'), 12) + self.assertEqual(month_by_name('decembre', 'fr'), 12) + self.assertEqual(month_by_name('December'), 12) + self.assertEqual(month_by_name('decembre'), None) + self.assertEqual(month_by_name('Unknown', 'unknown'), None) + + def test_m + def test_parse_codecs(self): self.assertEqual(parse_codecs(''), {}) self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 623ced625..a4ef15908 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -91,9 +91,12 @@ ENGLISH_MONTH_NAMES = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] -FRENCH_MONTH_NAMES = [ - 'janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', - 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre'] +MONTH_NAMES = { + 'en': ENGLISH_MONTH_NAMES, + 'fr': [ + 'janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', + 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre'], +} KNOWN_EXTENSIONS = ( 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac', @@ -1594,13 +1597,10 @@ def parse_count(s): def month_by_name(name, lang='en'): """ Return the number of a month by (locale-independently) English name """ - name_list = ENGLISH_MONTH_NAMES - - if lang == 'fr': - name_list = FRENCH_MONTH_NAMES + month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en']) try: - return name_list.index(name) + 1 + return month_names.index(name) + 1 except ValueError: return None From 1ae92c571fd61c629ed6831c870930b6f4ac1962 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 14 Sep 2016 23:57:01 +0700 Subject: [PATCH 54/55] [utils] Use native french month names --- test/test_utils.py | 6 ++---- youtube_dl/utils.py | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 4ebca8744..9789d8611 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -638,13 +638,11 @@ class TestUtil(unittest.TestCase): def test_month_by_name(self): self.assertEqual(month_by_name(None), None) self.assertEqual(month_by_name('December', 'en'), 12) - self.assertEqual(month_by_name('decembre', 'fr'), 12) + self.assertEqual(month_by_name('décembre', 'fr'), 12) self.assertEqual(month_by_name('December'), 12) - self.assertEqual(month_by_name('decembre'), None) + self.assertEqual(month_by_name('décembre'), None) self.assertEqual(month_by_name('Unknown', 'unknown'), None) - def test_m - def test_parse_codecs(self): self.assertEqual(parse_codecs(''), {}) self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a4ef15908..69ca88c85 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -94,8 +94,8 @@ ENGLISH_MONTH_NAMES = [ MONTH_NAMES = { 'en': ENGLISH_MONTH_NAMES, 'fr': [ - 'janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', - 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre'], + 'janvier', 'février', 'mars', 'avril', 'mai', 'juin', + 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'], } KNOWN_EXTENSIONS = ( From bd09b792afa6296e928094a5e07a7c6c85be2df9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 14 Sep 2016 23:59:13 +0700 Subject: [PATCH 55/55] [franceinter] Improve extraction (Closes #10538) --- youtube_dl/extractor/franceinter.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dl/extractor/franceinter.py index 6dad8d712..0d58f89c5 100644 --- a/youtube_dl/extractor/franceinter.py +++ b/youtube_dl/extractor/franceinter.py @@ -2,10 +2,8 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ( - unified_timestamp, - month_by_name, -) +from ..compat import compat_str +from ..utils import month_by_name class FranceInterIE(InfoExtractor): @@ -18,8 +16,7 @@ class FranceInterIE(InfoExtractor): 'id': 'la-marche-de-l-histoire/la-marche-de-l-histoire-18-decembre-2013', 'ext': 'mp3', 'title': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 - France Inter', - 'description': 'L’Histoire dans les jeux vidéo du 18 décembre 2013 par Jean Lebrun en replay sur France Inter. Retrouvez l\'émission en réécoute gratuite et abonnez-vous au podcast !', - 'timestamp': 1387324800, + 'description': 'md5:7f2ce449894d1e585932273080fb410d', 'upload_date': '20131218', }, } @@ -30,22 +27,28 @@ class FranceInterIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( - r'<button class="replay-button playable" data-is-aod="1" data-url="([^"]+)"', webpage, 'video url') + r'(?s)<div[^>]+class=["\']page-diffusion["\'][^>]*>.*?<button[^>]+data-url=(["\'])(?P<url>(?:(?!\1).)+)\1', + webpage, 'video url', group='url') title = self._og_search_title(webpage) description = self._og_search_description(webpage) - extractdate = self._search_regex('(\d{2}-([a-zA-Z\s]+)-\d{4}$)', url, 'extractdate', fatal=False) - extractdate = extractdate.split('-') - extractdate = extractdate[2] + "," + str(month_by_name(extractdate[1], 'fr')) + "," + extractdate[0] - - timestamp = unified_timestamp(extractdate) + upload_date_str = self._search_regex( + r'class=["\']cover-emission-period["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<', + webpage, 'upload date', fatal=False) + if upload_date_str: + upload_date_list = upload_date_str.split() + upload_date_list.reverse() + upload_date_list[1] = compat_str(month_by_name(upload_date_list[1], lang='fr')) + upload_date = ''.join(upload_date_list) + else: + upload_date = None return { 'id': video_id, 'title': title, 'description': description, - 'timestamp': timestamp, + 'upload_date': upload_date, 'formats': [{ 'url': video_url, 'vcodec': 'none',