From ed45eab463b093f5691170eb31468192c4bfce3d Mon Sep 17 00:00:00 2001 From: wernerkarlheisenberg Date: Sun, 18 Feb 2018 08:06:20 +0100 Subject: [PATCH 1/3] Adding support for KijkOnline on Vier and Vijf in existing extractor. --- youtube_dl/extractor/cognito.py | 30 +++++++++ youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/vier.py | 102 ++++++++++++++++++++++++++--- 3 files changed, 125 insertions(+), 9 deletions(-) create mode 100644 youtube_dl/extractor/cognito.py diff --git a/youtube_dl/extractor/cognito.py b/youtube_dl/extractor/cognito.py new file mode 100644 index 000000000..7e74a2e9c --- /dev/null +++ b/youtube_dl/extractor/cognito.py @@ -0,0 +1,30 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class CognitoBaseIE(InfoExtractor): + + def _cognito_login(self, auth_data): + try: + import boto3 + from warrant.aws_srp import AWSSRP + except ImportError: + raise ExtractorError('%s depends on boto3 and warrant.' % self.IE_NAME) + + region = auth_data['PoolId'].split('_')[0] + client = boto3.client( + 'cognito-idp', + region_name=region, + aws_access_key_id='SomeNonsenseValue', + aws_secret_access_key='YetAnotherNonsenseValue' + ) + aws = AWSSRP( + username=auth_data['Username'], + password=auth_data['Password'], + pool_id=auth_data['PoolId'], + client_id=auth_data['ClientId'], + client=client + ) + return aws.authenticate_user() diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 57e74ba62..0bfce38d0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1212,7 +1212,7 @@ from .vidme import ( VidmeUserLikesIE, ) from .vidzi import VidziIE -from .vier import VierIE, VierVideosIE +from .vier import VierIE, VierVideosIE, VierVijfKijkOnlineIE from .viewlift import ( ViewLiftIE, ViewLiftEmbedIE, diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index dbd5ba9ba..50bbba5b0 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -11,6 +11,99 @@ from ..utils import ( unified_strdate, ) +from .cognito import CognitoBaseIE + + +class VierVijfKijkOnlineIE(CognitoBaseIE): + IE_NAME = 'viervijfkijkonline' + IE_DESC = 'vier.be and vijf.be - Kijk Online' + _VALID_URL = r'https?://(?:www\.)?(?Pvier|vijf)\.be/video/(?P(?!v3)[^/]+)/(?P[^/]+)(/(?P[^/]+)|)' + _NETRC_MACHINE = 'vier' + _TESTS = [{ + 'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129', + 'md5': 'e4ae2054a6b040ef1e289e20d111b46e', + 'info_dict': { + 'id': 'ebcd3c39-10a2-4730-b137-b0e7aaed247c', + 'title': 'Hotel Römantiek - Seizoen 1 - Aflevering 1', + 'series': 'Hotel Römantiek', + }, + }, { + 'url': 'https://www.vier.be/video/blockbusters/in-juli-en-augustus-summer-classics', + 'only_matching': True, + }, { + 'url': 'https://www.vier.be/video/achter-de-rug/2017/achter-de-rug-seizoen-1-aflevering-6', + 'only_matching': True, + }] + + def _real_initialize(self): + self._logged_in = False + self.id_token = '' + + def _login(self): + + username, password = self._get_login_info() + if username is None or password is None: + self.raise_login_required() + + auth_data = { + 'PoolId': 'eu-west-1_dViSsKM5Y', + 'ClientId': '6s1h851s8uplco5h6mqh1jac8m', + 'Username': username, + 'Password': password, + } + + tokens = self._cognito_login(auth_data) + self.id_token = tokens['AuthenticationResult']['IdToken'] + self._logged_in = True + + def _real_extract(self, url): + + if not self._logged_in: + self._login() + + webpage = self._download_webpage(url, None) + + title = self._html_search_regex( + r'', + webpage, 'title') + + title_split = title.split(' - ') + series = title_split[0].strip() + if len(title_split) == 3: + season = title_split[1].split('Seizoen')[1].strip() + episode = title_split[2].split('Aflevering')[1].strip() + else: + season = None + episode = title_split[1].split('Aflevering')[1].strip() + + video_id = self._html_search_regex( + r'
]+>', + webpage, 'video_id') + + api_url = 'https://api.viervijfzes.be/content/%s' % (video_id) + api_headers = { + 'authorization': self.id_token, + } + api = self._download_json( + api_url, + None, note='Peforming API Call', errnote='API Call Failed', + headers=api_headers, + ) + + formats = [] + formats.extend(self._extract_m3u8_formats( + api['video']['S'], video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='HLS', fatal=False)) + + return { + 'id': video_id, + 'title': title, + 'series': series, + 'season_number': int_or_none(season), + 'episode_number': int_or_none(episode), + 'formats': formats, + } + class VierIE(InfoExtractor): IE_NAME = 'vier' @@ -20,8 +113,7 @@ class VierIE(InfoExtractor): (?:www\.)?(?Pvier|vijf)\.be/ (?: (?: - [^/]+/videos| - video(?:/[^/]+)* + [^/]+/videos )/ (?P[^/]+)(?:/(?P\d+))?| (?: @@ -100,12 +192,6 @@ class VierIE(InfoExtractor): }, { 'url': 'https://www.vijf.be/embed/video/public/4093', 'only_matching': True, - }, { - 'url': 'https://www.vier.be/video/blockbusters/in-juli-en-augustus-summer-classics', - 'only_matching': True, - }, { - 'url': 'https://www.vier.be/video/achter-de-rug/2017/achter-de-rug-seizoen-1-aflevering-6', - 'only_matching': True, }] def _real_initialize(self): From 3eac4085bd794ad7faffeef244a12fb80a147c67 Mon Sep 17 00:00:00 2001 From: wernerkarlheisenberg Date: Sun, 18 Feb 2018 08:24:27 +0100 Subject: [PATCH 2/3] update kijkonline vier test url --- youtube_dl/extractor/vier.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 50bbba5b0..9e20c0f62 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -20,13 +20,16 @@ class VierVijfKijkOnlineIE(CognitoBaseIE): _VALID_URL = r'https?://(?:www\.)?(?Pvier|vijf)\.be/video/(?P(?!v3)[^/]+)/(?P[^/]+)(/(?P[^/]+)|)' _NETRC_MACHINE = 'vier' _TESTS = [{ - 'url': 'http://www.vier.be/planb/videos/het-wordt-warm-de-moestuin/16129', - 'md5': 'e4ae2054a6b040ef1e289e20d111b46e', + 'url': 'https://www.vier.be/video/hotel-romantiek/2017/hotel-romantiek-aflevering-1', 'info_dict': { 'id': 'ebcd3c39-10a2-4730-b137-b0e7aaed247c', + 'ext': 'mp4', 'title': 'Hotel Römantiek - Seizoen 1 - Aflevering 1', 'series': 'Hotel Römantiek', + 'season_number': 1, + 'episode_number': 1, }, + 'skip': 'This video is only available for registered users' }, { 'url': 'https://www.vier.be/video/blockbusters/in-juli-en-augustus-summer-classics', 'only_matching': True, From a91706bda630d0664d305b5297b56fb9938794b7 Mon Sep 17 00:00:00 2001 From: wernerkarlheisenberg Date: Wed, 23 May 2018 19:25:33 +0200 Subject: [PATCH 3/3] vier - update on season extraction part --- youtube_dl/extractor/vier.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/vier.py b/youtube_dl/extractor/vier.py index 9e20c0f62..86d7e007f 100644 --- a/youtube_dl/extractor/vier.py +++ b/youtube_dl/extractor/vier.py @@ -73,7 +73,10 @@ class VierVijfKijkOnlineIE(CognitoBaseIE): title_split = title.split(' - ') series = title_split[0].strip() if len(title_split) == 3: - season = title_split[1].split('Seizoen')[1].strip() + if 'Seizoen' in title_split[1]: + season = title_split[1].split('Seizoen')[1].strip() + else: + season = title_split[1].strip() episode = title_split[2].split('Aflevering')[1].strip() else: season = None