From e854223a2d76ca5894a2e012eea3741e5a7d9445 Mon Sep 17 00:00:00 2001 From: mttronc Date: Thu, 6 Sep 2018 15:41:07 +0200 Subject: [PATCH 1/3] [wwe] Add extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/wwe.py | 53 ++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 youtube_dl/extractor/wwe.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 995af9988..83b934212 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1381,6 +1381,7 @@ from .wsj import ( WSJIE, WSJArticleIE, ) +from .wwe import WWEIE from .xbef import XBefIE from .xboxclips import XboxClipsIE from .xfileshare import XFileShareIE diff --git a/youtube_dl/extractor/wwe.py b/youtube_dl/extractor/wwe.py new file mode 100644 index 000000000..1eabc7511 --- /dev/null +++ b/youtube_dl/extractor/wwe.py @@ -0,0 +1,53 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class WWEIE(InfoExtractor): + _VALID_URL = r'https?://(?:\w+\.)?wwe.com/(?:.*/)?videos/(?P[\w-]+)' + _TESTS = [{ + 'url': 'https://www.wwe.com/videos/daniel-bryan-vs-andrade-cien-almas-smackdown-live-sept-4-2018', + 'info_dict': { + 'id': 'sd994_bryan_almas_090418', + 'ext': 'mp4', + 'title': 'Daniel Bryan vs. Andrade "Cien" Almas: SmackDown LIVE, Sept. 4, 2018', + 'description': 'Still fuming after he and his wife Brie Bella were attacked by The Miz and Maryse last week, Daniel Bryan takes care of some unfinished business with Andrade "Cien" Almas.', + 'thumbnail': 'https://www.wwe.com/f/styles/wwe_16_9_s/public/2018/09/20180904_sd_danielalmas--d97661dd31eea8a99837a3dbc7121f8f.jpg', + } + }, { + 'url': 'https://de.wwe.com/videos/gran-metalik-vs-tony-nese-wwe-205-live-sept-4-2018', + 'only_matching': True, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + drupal_settings = self._parse_json( + self._html_search_regex( + r'(?s)Drupal\.settings\s*,\s*({.+?})\);', + webpage, 'drupal settings'), + display_id) + + player = drupal_settings['WWEVideoLanding']['initialVideo'] + metadata = player['playlist'][0] + + title = metadata.get('title') + video_url = 'https:' + metadata.get('file') + thumbnail = 'https://www.wwe.com' + metadata.get('image') + description = metadata.get('description') + + id = re.split('[/.]', video_url)[-2] + formats = self._extract_m3u8_formats(video_url, id, 'mp4') + + return { + 'id': id, + 'title': title, + 'formats': formats, + 'url': video_url, + 'display_id': display_id, + 'thumbnail': thumbnail, + 'description': description, + } From a403ce27c666ad98c43ca4acac9962a6b8378d9c Mon Sep 17 00:00:00 2001 From: mttronc Date: Sat, 8 Sep 2018 11:52:54 +0200 Subject: [PATCH 2/3] [wwe] Resolve requested issues --- youtube_dl/extractor/wwe.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/wwe.py b/youtube_dl/extractor/wwe.py index 1eabc7511..6679f74b1 100644 --- a/youtube_dl/extractor/wwe.py +++ b/youtube_dl/extractor/wwe.py @@ -1,8 +1,7 @@ from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..utils import ExtractorError class WWEIE(InfoExtractor): @@ -34,12 +33,17 @@ class WWEIE(InfoExtractor): player = drupal_settings['WWEVideoLanding']['initialVideo'] metadata = player['playlist'][0] - title = metadata.get('title') + if metadata.get('file') is None: + raise ExtractorError('Unable to extract video url') + + title = metadata.get('title') or self._og_search_title(webpage) video_url = 'https:' + metadata.get('file') - thumbnail = 'https://www.wwe.com' + metadata.get('image') + thumbnail = None + if metadata.get('image') is not None: + thumbnail = 'https://www.wwe.com' + metadata.get('image') description = metadata.get('description') - id = re.split('[/.]', video_url)[-2] + id = self._generic_id(video_url) formats = self._extract_m3u8_formats(video_url, id, 'mp4') return { From d84d4f17c3082d63a0deda90396f34538eb0912b Mon Sep 17 00:00:00 2001 From: mttronc Date: Tue, 11 Sep 2018 11:26:48 +0200 Subject: [PATCH 3/3] [wwe] Resolve requested issues --- youtube_dl/extractor/wwe.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/wwe.py b/youtube_dl/extractor/wwe.py index 6679f74b1..c471a79f5 100644 --- a/youtube_dl/extractor/wwe.py +++ b/youtube_dl/extractor/wwe.py @@ -1,19 +1,21 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ExtractorError +from ..compat import compat_str +from ..utils import urljoin class WWEIE(InfoExtractor): _VALID_URL = r'https?://(?:\w+\.)?wwe.com/(?:.*/)?videos/(?P[\w-]+)' _TESTS = [{ 'url': 'https://www.wwe.com/videos/daniel-bryan-vs-andrade-cien-almas-smackdown-live-sept-4-2018', + 'md5': '30cbc824b51f4010ea885bfcaec76972', 'info_dict': { - 'id': 'sd994_bryan_almas_090418', + 'id': '40048199', 'ext': 'mp4', 'title': 'Daniel Bryan vs. Andrade "Cien" Almas: SmackDown LIVE, Sept. 4, 2018', 'description': 'Still fuming after he and his wife Brie Bella were attacked by The Miz and Maryse last week, Daniel Bryan takes care of some unfinished business with Andrade "Cien" Almas.', - 'thumbnail': 'https://www.wwe.com/f/styles/wwe_16_9_s/public/2018/09/20180904_sd_danielalmas--d97661dd31eea8a99837a3dbc7121f8f.jpg', + 'thumbnail': r're:^https?://.*\.jpg$', } }, { 'url': 'https://de.wwe.com/videos/gran-metalik-vs-tony-nese-wwe-205-live-sept-4-2018', @@ -33,17 +35,14 @@ class WWEIE(InfoExtractor): player = drupal_settings['WWEVideoLanding']['initialVideo'] metadata = player['playlist'][0] - if metadata.get('file') is None: - raise ExtractorError('Unable to extract video url') - + id = compat_str(metadata['nid']) title = metadata.get('title') or self._og_search_title(webpage) - video_url = 'https:' + metadata.get('file') + video_url = 'https:' + metadata['file'] thumbnail = None if metadata.get('image') is not None: - thumbnail = 'https://www.wwe.com' + metadata.get('image') + thumbnail = urljoin(url, metadata.get('image')) description = metadata.get('description') - id = self._generic_id(video_url) formats = self._extract_m3u8_formats(video_url, id, 'mp4') return {