From 0ba69a77e1a4042a586d98069f093c2cf06e3649 Mon Sep 17 00:00:00 2001 From: Argn0 Date: Mon, 10 Jul 2017 02:44:23 +0200 Subject: [PATCH 1/5] support for alternative embed-player fixes #13019 --- youtube_dl/extractor/vice.py | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 54e207b39..1a47aac99 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -21,10 +21,21 @@ from ..utils import ( class ViceBaseIE(AdobePassIE): def _extract_preplay_video(self, url, locale, webpage): - watch_hub_data = extract_attributes(self._search_regex( - r'(?s)()', webpage, 'watch hub')) - video_id = watch_hub_data['vms-id'] - title = watch_hub_data['video-title'] + try: + watch_hub_data = extract_attributes(self._search_regex( + r'(?s)()', webpage, 'watch hub')) + video_id = watch_hub_data['vms-id'] + title = watch_hub_data['video-title'] + except: + embed_player = extract_attributes(self._search_regex( + r'(?s)()', webpage, 'embed player')) + video_id = embed_player['vms-id'] + title = embed_player['video-title'] + watch_hub_data = {'thumbnail': embed_player.get('img'), + 'show-title': embed_player.get('show-title'), + 'channel-title': embed_player.get('channel-title'), + 'uploader': embed_player.get('channel-id') + } query = {} is_locked = watch_hub_data.get('video-locked') == '1' @@ -93,7 +104,7 @@ class ViceBaseIE(AdobePassIE): class ViceIE(ViceBaseIE): IE_NAME = 'vice' - _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:(?P[^/]+)/)?videos?/(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:(?P[^/]+)/)?(?:videos?|embed)/(?P[^/?#&]+)' _TESTS = [{ 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', @@ -235,6 +246,9 @@ class ViceArticleIE(InfoExtractor): }, { 'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229', 'only_matching': True, + }, { + 'url': 'https://www.vice.com/en_us/article/karley-sciortino-slutever-reloaded', + 'only_matching': True, }] def _real_extract(self, url): @@ -267,8 +281,13 @@ class ViceArticleIE(InfoExtractor): if youtube_url: return _url_res(youtube_url, 'Youtube') - video_url = self._html_search_regex( - r'data-video-url="([^"]+)"', - prefetch_data['embed_code'], 'video URL') + try: + video_url = self._html_search_regex( + r'data-video-url="([^"]+)"', + prefetch_data['embed_code'], 'video URL') + except TypeError: + video_url = self._html_search_regex( + r'<\s*iframe\s*src=\s*"([^"]+)', + body, 'video URL') return _url_res(video_url, ViceIE.ie_key()) From b9a3b64d48bc0a1ea892a8fc6b771f1b970ec9fe Mon Sep 17 00:00:00 2001 From: Argn0 Date: Sun, 16 Jul 2017 17:54:01 +0200 Subject: [PATCH 2/5] fix embed-player extractor --- youtube_dl/extractor/vice.py | 41 ++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 1a47aac99..ff394ab1a 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -21,24 +21,28 @@ from ..utils import ( class ViceBaseIE(AdobePassIE): def _extract_preplay_video(self, url, locale, webpage): - try: + + prefetch_data = self._parse_json(self._search_regex( + r'window\.__PREFETCH_DATA\s*=\s*({.*});', + webpage, 'prefetch data'), None, fatal=False) + if prefetch_data: + if prefetch_data.get('data'): + prefetch_data = prefetch_data.get('data') + prefetch_data = prefetch_data.get('video') + + video_id = prefetch_data.get('id') + title = prefetch_data.get('title') + is_locked = prefetch_data.get('locked') == '1' or prefetch_data.get('locked') == 'true' + watch_hub_data = {} + else: + prefetch_data = {} watch_hub_data = extract_attributes(self._search_regex( r'(?s)()', webpage, 'watch hub')) video_id = watch_hub_data['vms-id'] title = watch_hub_data['video-title'] - except: - embed_player = extract_attributes(self._search_regex( - r'(?s)()', webpage, 'embed player')) - video_id = embed_player['vms-id'] - title = embed_player['video-title'] - watch_hub_data = {'thumbnail': embed_player.get('img'), - 'show-title': embed_player.get('show-title'), - 'channel-title': embed_player.get('channel-title'), - 'uploader': embed_player.get('channel-id') - } + is_locked = watch_hub_data.get('video-locked') == '1' query = {} - is_locked = watch_hub_data.get('video-locked') == '1' if is_locked: resource = self._get_mvpd_resource( 'VICELAND', title, video_id, @@ -86,8 +90,10 @@ class ViceBaseIE(AdobePassIE): 'id': video_id, 'title': title, 'description': base.get('body') or base.get('display_body'), - 'thumbnail': watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'), - 'duration': int_or_none(video_data.get('video_duration')) or parse_duration(watch_hub_data.get('video-duration')), + 'thumbnail': prefetch_data.get('thumbnail_url') or watch_hub_data.get('cover-image') or watch_hub_data.get( + 'thumbnail'), + 'duration': int_or_none(video_data.get('video_duration')) or parse_duration( + watch_hub_data.get('video-duration')), 'timestamp': int_or_none(video_data.get('created_at'), 1000), 'age_limit': parse_age_limit(video_data.get('video_rating')), 'series': video_data.get('show_title') or watch_hub_data.get('show-title'), @@ -154,6 +160,9 @@ class ViceIE(ViceBaseIE): }, { 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4', 'only_matching': True, + }, { + 'url': 'https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060', + 'only_matching': True, }] _PREPLAY_HOST = 'video.vice' @@ -281,11 +290,11 @@ class ViceArticleIE(InfoExtractor): if youtube_url: return _url_res(youtube_url, 'Youtube') - try: + if prefetch_data.get('embed_code'): video_url = self._html_search_regex( r'data-video-url="([^"]+)"', prefetch_data['embed_code'], 'video URL') - except TypeError: + else: video_url = self._html_search_regex( r'<\s*iframe\s*src=\s*"([^"]+)', body, 'video URL') From ec3472b832d3f99dd487c15afbf6cbc1d5aa3b33 Mon Sep 17 00:00:00 2001 From: Argn0 Date: Sun, 16 Jul 2017 20:36:40 +0200 Subject: [PATCH 3/5] [Vice] fixed embed-player extractor --- youtube_dl/extractor/vice.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index ff394ab1a..16586f156 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -90,10 +90,8 @@ class ViceBaseIE(AdobePassIE): 'id': video_id, 'title': title, 'description': base.get('body') or base.get('display_body'), - 'thumbnail': prefetch_data.get('thumbnail_url') or watch_hub_data.get('cover-image') or watch_hub_data.get( - 'thumbnail'), - 'duration': int_or_none(video_data.get('video_duration')) or parse_duration( - watch_hub_data.get('video-duration')), + 'thumbnail': prefetch_data.get('thumbnail_url') or watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'), + 'duration': int_or_none(video_data.get('video_duration')) or parse_duration(watch_hub_data.get('video-duration')), 'timestamp': int_or_none(video_data.get('created_at'), 1000), 'age_limit': parse_age_limit(video_data.get('video_rating')), 'series': video_data.get('show_title') or watch_hub_data.get('show-title'), From abda70059274b674fb6bf8e7228d9e9cbafbe034 Mon Sep 17 00:00:00 2001 From: Argn0 Date: Mon, 17 Jul 2017 20:26:05 +0200 Subject: [PATCH 4/5] [Vice] alternative embed player extraction fixes https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060 --- youtube_dl/extractor/vice.py | 49 +++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 16586f156..8aaa5a243 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -21,26 +21,24 @@ from ..utils import ( class ViceBaseIE(AdobePassIE): def _extract_preplay_video(self, url, locale, webpage): - - prefetch_data = self._parse_json(self._search_regex( - r'window\.__PREFETCH_DATA\s*=\s*({.*});', - webpage, 'prefetch data'), None, fatal=False) - if prefetch_data: - if prefetch_data.get('data'): - prefetch_data = prefetch_data.get('data') - prefetch_data = prefetch_data.get('video') - - video_id = prefetch_data.get('id') - title = prefetch_data.get('title') - is_locked = prefetch_data.get('locked') == '1' or prefetch_data.get('locked') == 'true' - watch_hub_data = {} - else: - prefetch_data = {} - watch_hub_data = extract_attributes(self._search_regex( - r'(?s)()', webpage, 'watch hub')) + watch_hub = self._search_regex( + r'(?s)()', webpage, 'watch hub', default=None) + if watch_hub: + watch_hub_data = extract_attributes(watch_hub) video_id = watch_hub_data['vms-id'] title = watch_hub_data['video-title'] is_locked = watch_hub_data.get('video-locked') == '1' + prefetch_data = {} + else: + prefetch_data = self._parse_json(self._search_regex( + r'window\.__PREFETCH_DATA\s*=\s*({.*});', + webpage, 'prefetch data'), None) + prefetch_data = prefetch_data.get("data", {}) + prefetch_data = prefetch_data['video'] + video_id = prefetch_data['id'] + title = prefetch_data.get('title') + is_locked = prefetch_data.get('locked') == '1' or prefetch_data.get('locked') == 'true' + watch_hub_data = {} query = {} if is_locked: @@ -88,7 +86,7 @@ class ViceBaseIE(AdobePassIE): '_type': 'url_transparent', 'url': uplynk_preplay_url, 'id': video_id, - 'title': title, + 'title': title or base.get('display_title'), 'description': base.get('body') or base.get('display_body'), 'thumbnail': prefetch_data.get('thumbnail_url') or watch_hub_data.get('cover-image') or watch_hub_data.get('thumbnail'), 'duration': int_or_none(video_data.get('video_duration')) or parse_duration(watch_hub_data.get('video-duration')), @@ -160,7 +158,18 @@ class ViceIE(ViceBaseIE): 'only_matching': True, }, { 'url': 'https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060', - 'only_matching': True, + 'info_dict': { + 'id': '57f41d3556a0a80f54726060', + 'ext': 'mp4', + 'title': "Making The World's First Male Sex Doll", + 'description': "

In her show Slutever, VICE's resident sexpert Karley Sciortino explores the " + "mysterious labyrinth of human sexuality and check out the various ways that people around " + "the world like to get off. 

In the premiere episode of Slutever's brand new " + "season, Karley finds herself in the world of life-like custom male sex dolls and meets the " + "team pioneering the perfect plastic fuck buddy for women. 

", + 'uploader_id': '57a204088cb727dec794c67b', + 'age_limit': 'TV-MA', + }, }] _PREPLAY_HOST = 'video.vice' @@ -294,7 +303,7 @@ class ViceArticleIE(InfoExtractor): prefetch_data['embed_code'], 'video URL') else: video_url = self._html_search_regex( - r'<\s*iframe\s*src=\s*"([^"]+)', + r']+src="([^"]+)', body, 'video URL') return _url_res(video_url, ViceIE.ie_key()) From e699a7862b8a037be015a8781d2c1522d44d2794 Mon Sep 17 00:00:00 2001 From: Argn0 Date: Tue, 18 Jul 2017 09:02:50 +0200 Subject: [PATCH 5/5] [Vice] alternative embed player extraction fixes links like https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060 --- youtube_dl/extractor/vice.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index 8aaa5a243..eb600c65f 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -33,8 +33,7 @@ class ViceBaseIE(AdobePassIE): prefetch_data = self._parse_json(self._search_regex( r'window\.__PREFETCH_DATA\s*=\s*({.*});', webpage, 'prefetch data'), None) - prefetch_data = prefetch_data.get("data", {}) - prefetch_data = prefetch_data['video'] + prefetch_data = prefetch_data.get("data", prefetch_data)['video'] video_id = prefetch_data['id'] title = prefetch_data.get('title') is_locked = prefetch_data.get('locked') == '1' or prefetch_data.get('locked') == 'true'