From abc6cf07109ec6bc9755fb3639f324f9daaf3826 Mon Sep 17 00:00:00 2001 From: Pablo Castorino Date: Tue, 29 Jan 2019 16:00:46 -0300 Subject: [PATCH 01/16] add Contar extractor video platform. extract video, lists, channels and full sections --- youtube_dl/extractor/contar.py | 236 +++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 6 + 2 files changed, 242 insertions(+) create mode 100644 youtube_dl/extractor/contar.py diff --git a/youtube_dl/extractor/contar.py b/youtube_dl/extractor/contar.py new file mode 100644 index 000000000..d3061a537 --- /dev/null +++ b/youtube_dl/extractor/contar.py @@ -0,0 +1,236 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + urlencode_postdata, + compat_str, + ExtractorError, +) + +class ContarBaseIE(InfoExtractor): + + _NETRC_MACHINE = 'contar' + _API_BASE = 'https://api.cont.ar/api/v2/' + + def _handle_errors(self, result): + error = result.get('error', {}).get('message') + if error: + if isinstance(error, dict): + error = ', '.join(error.values()) + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, error), expected=True) + + def _call_api(self, path, video_id, headers = {}): + if self._auth_token: + headers['Authorization'] = 'Bearer ' + self._auth_token + + result = self._download_json( + self._API_BASE + path, video_id, headers=headers) + + self._handle_errors(result) + return result['data'] + + def _real_initialize(self): + email, password = self._get_login_info() + if email is None: + self.raise_login_required() + + result = self._download_json( + self._API_BASE + 'authenticate', None, data=urlencode_postdata({ + 'email': email, + 'password': password, + })) + + self._handle_errors(result) + self._auth_token = result['token'] + + def _get_video_info(self, video, video_id): + #print(json.dumps(video, indent=4, sort_keys=True)) + #print "id = %s S%sE%s" % (video.get('id'), season.get('name') , video.get('episode')) + episode_number = int_or_none(video.get('episode')) + + formats = self._get_formats(video.get('streams', []), video.get('id')) + subtitles = self._get_subtitles(video['subtitles'].get('data', []), video.get('id')) + + info = { + 'id': video.get('id'), + 'title': video.get('name'), + 'description': video.get('synopsis'), + 'series': video.get('serie_name'), + 'episode': video.get('name'), + 'episode_number': int_or_none(video.get('episode')), + 'season_number': int_or_none(video.get('serie')), + 'season_id': video.get('season'), + 'episode_id': video.get('id'), + 'duration': int_or_none(video.get('length')), + 'thumbnail': video.get('posterImage'), + #'timestamp': timestamp, + 'formats': formats, + 'subtitles': subtitles, + } + + return info + + def _get_subtitles(self, subtitles, video_id): + subs = {} + for sub in subtitles: + lang = sub.get('lang').lower() + subs[lang] = [{ 'url': sub.get('url'), 'ext': 'srt'}] + + return subs + + def _get_formats(self, videos, video_id): + formats = [] + for stream in videos: + stream_url = stream.get('url') + type = stream.get('type') + if (type == 'HLS'): + formats.extend(self._extract_m3u8_formats(stream_url, + video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) + elif (type == 'DASH'): + formats.extend(self._extract_mpd_formats( + stream_url, video_id, mpd_id='dash', fatal=False)) + + self._sort_formats(formats) + return formats + + +class ContarIE(ContarBaseIE): + + _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' + _VALID_URL = r'https?://(?:www\.)?cont\.ar/watch/(?P%s)' % _UUID_RE + _TEST = { + 'url': 'https://www.cont.ar/watch/d2815f05-f52f-499f-90d0-5671e9e71ce8', + 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + 'id': 'd2815f05-f52f-499f-90d0-5671e9e71ce8', + 'ext': 'mp4', + 'title': 'Video title goes here', + 'thumbnail': r're:^https?://.*\.jpg$', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._call_api('videos/' + video_id, video_id, headers={'Referer': url}) + info = self._get_video_info(video, video_id); + return info + + +class ContarSerieIE(ContarBaseIE): + + _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' + _VALID_URL = r'https?://(?:www\.)?cont\.ar/serie/(?P%s)' % _UUID_RE + _TEST = { + 'url': 'https://www.cont.ar/serie/353247d5-da97-4cb6-8571-c4fbab28c643', + 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + 'id': 'd2815f05-f52f-499f-90d0-5671e9e71ce8', + 'ext': 'mp4', + 'title': 'Video title goes here', + 'thumbnail': r're:^https?://.*\.jpg$', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + video = self._call_api('serie/' + video_id, video_id, headers={'Referer': url}) + import json + + seasons = [] + entries = [] + for season in video['seasons'].get('data', []): + #print(json.dumps(season, indent=4, sort_keys=True)) + season_number = season.get('name') + for episode in season['videos'].get('data', []): + info = self._get_video_info(video, video_id); + entries.append(info) + + return self.playlist_result( + entries, video_id, + video.get('title'), video.get('synopsis')) + + +class ContarChannelIE(ContarBaseIE): + + _UUID_RE = r'[\d]{1,}' + _VALID_URL = r'https?://(?:www\.)?cont\.ar/channel/(?P%s)' % _UUID_RE + _TEST = { + 'url': 'https://www.cont.ar/channel/242', + 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + 'id': '242', + 'ext': 'mp4', + 'title': 'Video title goes here', + 'thumbnail': r're:^https?://.*\.jpg$', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + list_id = self._match_id(url) + + list = self._call_api('channel/series/' + list_id, list_id, headers={'Referer': url}) + entries = [] + + for video in list: + if (video.get('type') == 'SERIE'): + url = 'www.cont.ar/serie/%s' % video.get('uuid') + entries.append(self.url_result(url, video_id=video.get('uuid'), video_title=video.get('name'))) + + return self.playlist_result( + entries, list_id) + +class ContarBrowseIE(ContarBaseIE): + + _UUID_RE = r'[\d]{1,}' + _VALID_URL = r'https?://(?:www\.)?cont\.ar/browse/genre/(?P%s)' % _UUID_RE + _TEST = { + 'url': 'https://www.cont.ar/browse/genre/46', + 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'info_dict': { + 'id': '46', + 'title': 'Video title goes here', + 'thumbnail': r're:^https?://.*\.jpg$', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + list_id = self._match_id(url) + + list = self._call_api('full/section/' + list_id, list_id, headers={'Referer': url}) + entries = [] + + for video in list['videos'].get('data', []): + if (video.get('type') == 'SERIE'): + url = 'www.cont.ar/serie/%s' % video.get('uuid') + entries.append(self.url_result(url, video_id=video.get('uuid'), video_title=video.get('name'))) + + return self.playlist_result( + entries, list_id, + list.get('title')) + diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 574a47e6d..391221419 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -233,6 +233,12 @@ from .commonprotocols import ( RtmpIE, ) from .condenast import CondeNastIE +from .contar import ( + ContarIE, + ContarSerieIE, + ContarChannelIE, + ContarBrowseIE +) from .corus import CorusIE from .cracked import CrackedIE from .crackle import CrackleIE From 0841071d67b44eef54faca14bbeaad8f5e5f84d8 Mon Sep 17 00:00:00 2001 From: Pablo Castorino Date: Thu, 31 Jan 2019 14:46:21 -0300 Subject: [PATCH 02/16] custom notes for API --- youtube_dl/extractor/contar.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/contar.py b/youtube_dl/extractor/contar.py index d3061a537..7619e122c 100644 --- a/youtube_dl/extractor/contar.py +++ b/youtube_dl/extractor/contar.py @@ -22,12 +22,12 @@ class ContarBaseIE(InfoExtractor): raise ExtractorError( '%s said: %s' % (self.IE_NAME, error), expected=True) - def _call_api(self, path, video_id, headers = {}): + def _call_api(self, path, video_id, headers = {}, note='Downloading JSON metadata'): if self._auth_token: headers['Authorization'] = 'Bearer ' + self._auth_token result = self._download_json( - self._API_BASE + path, video_id, headers=headers) + self._API_BASE + path, video_id, headers=headers, note=note) self._handle_errors(result) return result['data'] From 5e4fa8e2033acac25125f783117151097513fec9 Mon Sep 17 00:00:00 2001 From: Pablo Castorino Date: Thu, 31 Jan 2019 14:47:53 -0300 Subject: [PATCH 03/16] base method get serie info --- youtube_dl/extractor/contar.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/contar.py b/youtube_dl/extractor/contar.py index 7619e122c..97d04ea37 100644 --- a/youtube_dl/extractor/contar.py +++ b/youtube_dl/extractor/contar.py @@ -72,7 +72,11 @@ class ContarBaseIE(InfoExtractor): } return info - + + def _get_serie_info(self, serie_id, headers={}): + serie = self._call_api('serie/' + serie_id, serie_id, headers=headers, note='Downloading Serie JSON metadata') + return serie + def _get_subtitles(self, subtitles, video_id): subs = {} for sub in subtitles: From 051b2854702911f179666defb935b0627ba3c755 Mon Sep 17 00:00:00 2001 From: Pablo Castorino Date: Thu, 31 Jan 2019 14:48:56 -0300 Subject: [PATCH 04/16] base method get season number by episode id --- youtube_dl/extractor/contar.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/youtube_dl/extractor/contar.py b/youtube_dl/extractor/contar.py index 97d04ea37..3e7043c46 100644 --- a/youtube_dl/extractor/contar.py +++ b/youtube_dl/extractor/contar.py @@ -77,6 +77,15 @@ class ContarBaseIE(InfoExtractor): serie = self._call_api('serie/' + serie_id, serie_id, headers=headers, note='Downloading Serie JSON metadata') return serie + def _get_season_number(self, serie_info, video_id): + for season in serie_info['seasons'].get('data', []): + #print(json.dumps(season, indent=4, sort_keys=True)) + season_number = season.get('name') + for episode in season['videos'].get('data', []): + if episode.get('id') == video_id: + return season_number + return None + def _get_subtitles(self, subtitles, video_id): subs = {} for sub in subtitles: From 3af73fd771bd29478628c29f91d8fdb0bfe0e8cb Mon Sep 17 00:00:00 2001 From: Pablo Castorino Date: Thu, 31 Jan 2019 14:58:19 -0300 Subject: [PATCH 05/16] _get_video_info now can be received prefetched data to avoid fetch again --- youtube_dl/extractor/contar.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/contar.py b/youtube_dl/extractor/contar.py index 3e7043c46..950fffd04 100644 --- a/youtube_dl/extractor/contar.py +++ b/youtube_dl/extractor/contar.py @@ -46,26 +46,30 @@ class ContarBaseIE(InfoExtractor): self._handle_errors(result) self._auth_token = result['token'] - def _get_video_info(self, video, video_id): + def _get_video_info(self, video, video_id, base = {}): #print(json.dumps(video, indent=4, sort_keys=True)) #print "id = %s S%sE%s" % (video.get('id'), season.get('name') , video.get('episode')) - episode_number = int_or_none(video.get('episode')) formats = self._get_formats(video.get('streams', []), video.get('id')) subtitles = self._get_subtitles(video['subtitles'].get('data', []), video.get('id')) + serie_info = base.get('serie_info') or self._get_serie_info(video.get('serie')) + season_number = base.get('season_number') or self._get_season_number(serie_info, video.get('id')); + episode_number = video.get('episode') + info = { 'id': video.get('id'), 'title': video.get('name'), 'description': video.get('synopsis'), 'series': video.get('serie_name'), 'episode': video.get('name'), - 'episode_number': int_or_none(video.get('episode')), - 'season_number': int_or_none(video.get('serie')), - 'season_id': video.get('season'), + 'episode_number': int_or_none(episode_number), + 'season_number': int_or_none(season_number), + 'season_id': video.get('serie'), 'episode_id': video.get('id'), 'duration': int_or_none(video.get('length')), 'thumbnail': video.get('posterImage'), + 'release_year': int_or_none(serie_info.get('year')), #'timestamp': timestamp, 'formats': formats, 'subtitles': subtitles, From 595fd520973cdd9dca18f138071cb6b682a464bd Mon Sep 17 00:00:00 2001 From: Pablo Castorino Date: Thu, 31 Jan 2019 15:00:22 -0300 Subject: [PATCH 06/16] more descriptive var name and use prefetched data --- youtube_dl/extractor/contar.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/contar.py b/youtube_dl/extractor/contar.py index 950fffd04..7b8c16a2b 100644 --- a/youtube_dl/extractor/contar.py +++ b/youtube_dl/extractor/contar.py @@ -164,23 +164,26 @@ class ContarSerieIE(ContarBaseIE): } def _real_extract(self, url): - video_id = self._match_id(url) + serie_id = self._match_id(url) - video = self._call_api('serie/' + video_id, video_id, headers={'Referer': url}) - import json + serie_info = self._get_serie_info(serie_id, headers={'Referer': url}) seasons = [] entries = [] - for season in video['seasons'].get('data', []): + + base = {} + base['serie_info'] = serie_info + + for season in serie_info['seasons'].get('data', []): #print(json.dumps(season, indent=4, sort_keys=True)) - season_number = season.get('name') + base['season_number'] = season.get('name') for episode in season['videos'].get('data', []): - info = self._get_video_info(video, video_id); + info = self._get_video_info(episode, serie_id, base); entries.append(info) return self.playlist_result( - entries, video_id, - video.get('title'), video.get('synopsis')) + entries, serie_id, + serie_info.get('name'), serie_info.get('story_large')) class ContarChannelIE(ContarBaseIE): From 66ab0700ec2823ef39fcaf89c0ca7af8d3a7cdc8 Mon Sep 17 00:00:00 2001 From: Pablo Castorino Date: Thu, 31 Jan 2019 15:01:30 -0300 Subject: [PATCH 07/16] get channel info for complete name and description --- youtube_dl/extractor/contar.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/contar.py b/youtube_dl/extractor/contar.py index 7b8c16a2b..438af6350 100644 --- a/youtube_dl/extractor/contar.py +++ b/youtube_dl/extractor/contar.py @@ -208,8 +208,8 @@ class ContarChannelIE(ContarBaseIE): def _real_extract(self, url): list_id = self._match_id(url) - - list = self._call_api('channel/series/' + list_id, list_id, headers={'Referer': url}) + channel_info = self._call_api('channel/info/' + list_id, list_id, headers={'Referer': url}, note='Downloading Channel Info JSON metadata') + list = self._call_api('channel/series/' + list_id, list_id, headers={'Referer': url}, note='Downloading Channel List JSON metadata') entries = [] for video in list: @@ -218,7 +218,7 @@ class ContarChannelIE(ContarBaseIE): entries.append(self.url_result(url, video_id=video.get('uuid'), video_title=video.get('name'))) return self.playlist_result( - entries, list_id) + entries, list_id, channel_info.get('name'), channel_info.get('description')) class ContarBrowseIE(ContarBaseIE): From 13d441b1b167d5123f49d239e5b106f5c73a9890 Mon Sep 17 00:00:00 2001 From: Pablo Castorino Date: Thu, 31 Jan 2019 15:03:46 -0300 Subject: [PATCH 08/16] working tests --- youtube_dl/extractor/contar.py | 80 +++++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/contar.py b/youtube_dl/extractor/contar.py index 438af6350..3f29ac14e 100644 --- a/youtube_dl/extractor/contar.py +++ b/youtube_dl/extractor/contar.py @@ -121,17 +121,25 @@ class ContarIE(ContarBaseIE): _VALID_URL = r'https?://(?:www\.)?cont\.ar/watch/(?P%s)' % _UUID_RE _TEST = { 'url': 'https://www.cont.ar/watch/d2815f05-f52f-499f-90d0-5671e9e71ce8', - 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', + 'md5': '72cfee8799d964291433004c557d0b2b', 'info_dict': { 'id': 'd2815f05-f52f-499f-90d0-5671e9e71ce8', 'ext': 'mp4', - 'title': 'Video title goes here', - 'thumbnail': r're:^https?://.*\.jpg$', + 'title': 'Matilde todos los d\u00edas', + 'duration': 648, + 'release_year': 2016, + 'description': 'Matilde llega a la casa de su abuelo Barbacrespa y en ella accidentalmente rompe un coco m\u00e1gico que la dota de poder.', + 'season_number': 1, + 'episode_number': 1, # TODO more properties, either as: # * A value # * MD5 checksum; start the string with md5: # * A regular expression; start the string with re: # * Any Python type (for example int or float) + }, + 'params': { + 'usenetrc': True, + 'format': 'hls-4755-1' } } @@ -149,17 +157,53 @@ class ContarSerieIE(ContarBaseIE): _VALID_URL = r'https?://(?:www\.)?cont\.ar/serie/(?P%s)' % _UUID_RE _TEST = { 'url': 'https://www.cont.ar/serie/353247d5-da97-4cb6-8571-c4fbab28c643', - 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { - 'id': 'd2815f05-f52f-499f-90d0-5671e9e71ce8', - 'ext': 'mp4', - 'title': 'Video title goes here', - 'thumbnail': r're:^https?://.*\.jpg$', + 'id': '353247d5-da97-4cb6-8571-c4fbab28c643', + 'title': 'Vidas de Radio', + 'description': 'Ana Gerschenson conduce el ciclo que repasa historias de grandes personalidades que le dieron vida al medio; marcaron una época de la Argentina y de tu vida, esas voces amigas que estuvieron siempre y son Vidas De Radio.' + #'thumbnail': r're:^https?://.*\.jpg$', # TODO more properties, either as: # * A value # * MD5 checksum; start the string with md5: # * A regular expression; start the string with re: # * Any Python type (for example int or float) + }, + 'playlist_count': 11, + 'playlist': [{ + 'md5': '651e129bae9f7ee7c4c83e1263b26828', + 'info_dict': { + 'id': '3414c62f-7b40-439e-b74d-1dd9b0190808', + 'ext': 'mp4', + 'title': 'Julio Lagos', + 'duration': 3185, + 'release_year': 2018, + 'description': 'Ana Gerschenson conduce el ciclo que repasa historias de grandes personalidades que le dieron vida al medio. En esta emisi\u00f3n recibi\u00f3 a Julio Lagos para repasar su trayectoria y v\u00ednculo con la radio.', + 'season_number': 1, + 'episode_number': 11, + }, + 'params': { + 'usenetrc': True, + 'format': 'bestvideo', + } + }, { + 'md5': '5b80df03801c2399f62da223f16bb801', + 'info_dict': { + 'id': '5972ae9a-43fe-4056-81bc-ab963c057cc6', + 'ext': 'mp4', + 'title': 'Cacho Fontana', + 'release_year': 2018, + 'duration': 3052, + 'description': 'Cacho Fontana: \u201cMi primer amor fue la Radio\u201d', + 'season_number': 1, + 'episode_number': 3 + }, + 'params': { + 'skip_download': True, + }, + }], + 'params': { + 'usenetrc': True, + 'format': 'bestvideo', } } @@ -192,17 +236,20 @@ class ContarChannelIE(ContarBaseIE): _VALID_URL = r'https?://(?:www\.)?cont\.ar/channel/(?P%s)' % _UUID_RE _TEST = { 'url': 'https://www.cont.ar/channel/242', - 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { 'id': '242', - 'ext': 'mp4', - 'title': 'Video title goes here', - 'thumbnail': r're:^https?://.*\.jpg$', + 'title': 'PAKAPAKA', + 'description': '\xa0PAKAPAKA' # TODO more properties, either as: # * A value # * MD5 checksum; start the string with md5: # * A regular expression; start the string with re: # * Any Python type (for example int or float) + }, + 'playlist_mincount': 68, + 'params': { + 'usenetrc': True, + 'skip_download': True } } @@ -226,16 +273,19 @@ class ContarBrowseIE(ContarBaseIE): _VALID_URL = r'https?://(?:www\.)?cont\.ar/browse/genre/(?P%s)' % _UUID_RE _TEST = { 'url': 'https://www.cont.ar/browse/genre/46', - 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { 'id': '46', - 'title': 'Video title goes here', - 'thumbnail': r're:^https?://.*\.jpg$', + 'title': 'Infantil', # TODO more properties, either as: # * A value # * MD5 checksum; start the string with md5: # * A regular expression; start the string with re: # * Any Python type (for example int or float) + }, + 'playlist_mincount': 65, + 'params': { + 'usenetrc': True, + 'skip_download': True } } From 18b5708685df753d2c33ebeb143cdd192dbdea82 Mon Sep 17 00:00:00 2001 From: Pablo Castorino Date: Thu, 31 Jan 2019 19:33:36 -0300 Subject: [PATCH 09/16] cleanup code, flake8 passed --- youtube_dl/extractor/contar.py | 119 ++++++++++++++++----------------- 1 file changed, 57 insertions(+), 62 deletions(-) diff --git a/youtube_dl/extractor/contar.py b/youtube_dl/extractor/contar.py index 3f29ac14e..3752b5e88 100644 --- a/youtube_dl/extractor/contar.py +++ b/youtube_dl/extractor/contar.py @@ -5,12 +5,12 @@ from .common import InfoExtractor from ..utils import ( int_or_none, urlencode_postdata, - compat_str, ExtractorError, ) + class ContarBaseIE(InfoExtractor): - + _NETRC_MACHINE = 'contar' _API_BASE = 'https://api.cont.ar/api/v2/' @@ -22,16 +22,16 @@ class ContarBaseIE(InfoExtractor): raise ExtractorError( '%s said: %s' % (self.IE_NAME, error), expected=True) - def _call_api(self, path, video_id, headers = {}, note='Downloading JSON metadata'): + def _call_api(self, path, video_id, headers={}, note='Downloading JSON metadata'): if self._auth_token: headers['Authorization'] = 'Bearer ' + self._auth_token - + result = self._download_json( self._API_BASE + path, video_id, headers=headers, note=note) - + self._handle_errors(result) return result['data'] - + def _real_initialize(self): email, password = self._get_login_info() if email is None: @@ -42,21 +42,19 @@ class ContarBaseIE(InfoExtractor): 'email': email, 'password': password, })) - + self._handle_errors(result) self._auth_token = result['token'] - - def _get_video_info(self, video, video_id, base = {}): - #print(json.dumps(video, indent=4, sort_keys=True)) - #print "id = %s S%sE%s" % (video.get('id'), season.get('name') , video.get('episode')) - + + def _get_video_info(self, video, video_id, base={}): + formats = self._get_formats(video.get('streams', []), video.get('id')) subtitles = self._get_subtitles(video['subtitles'].get('data', []), video.get('id')) - + serie_info = base.get('serie_info') or self._get_serie_info(video.get('serie')) - season_number = base.get('season_number') or self._get_season_number(serie_info, video.get('id')); + season_number = base.get('season_number') or self._get_season_number(serie_info, video.get('id')) episode_number = video.get('episode') - + info = { 'id': video.get('id'), 'title': video.get('name'), @@ -70,34 +68,33 @@ class ContarBaseIE(InfoExtractor): 'duration': int_or_none(video.get('length')), 'thumbnail': video.get('posterImage'), 'release_year': int_or_none(serie_info.get('year')), - #'timestamp': timestamp, + # 'timestamp': timestamp, 'formats': formats, 'subtitles': subtitles, } - + return info - + def _get_serie_info(self, serie_id, headers={}): serie = self._call_api('serie/' + serie_id, serie_id, headers=headers, note='Downloading Serie JSON metadata') return serie - + def _get_season_number(self, serie_info, video_id): for season in serie_info['seasons'].get('data', []): - #print(json.dumps(season, indent=4, sort_keys=True)) season_number = season.get('name') for episode in season['videos'].get('data', []): - if episode.get('id') == video_id: + if episode.get('id') == video_id: return season_number return None - + def _get_subtitles(self, subtitles, video_id): subs = {} for sub in subtitles: lang = sub.get('lang').lower() - subs[lang] = [{ 'url': sub.get('url'), 'ext': 'srt'}] - + subs[lang] = [{'url': sub.get('url'), 'ext': 'srt'}] + return subs - + def _get_formats(self, videos, video_id): formats = [] for stream in videos: @@ -105,18 +102,18 @@ class ContarBaseIE(InfoExtractor): type = stream.get('type') if (type == 'HLS'): formats.extend(self._extract_m3u8_formats(stream_url, - video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', - fatal=False)) + video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False)) elif (type == 'DASH'): formats.extend(self._extract_mpd_formats( stream_url, video_id, mpd_id='dash', fatal=False)) - + self._sort_formats(formats) return formats - - + + class ContarIE(ContarBaseIE): - + _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' _VALID_URL = r'https?://(?:www\.)?cont\.ar/watch/(?P%s)' % _UUID_RE _TEST = { @@ -142,17 +139,17 @@ class ContarIE(ContarBaseIE): 'format': 'hls-4755-1' } } - + def _real_extract(self, url): video_id = self._match_id(url) - + video = self._call_api('videos/' + video_id, video_id, headers={'Referer': url}) - info = self._get_video_info(video, video_id); + info = self._get_video_info(video, video_id) return info - - + + class ContarSerieIE(ContarBaseIE): - + _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' _VALID_URL = r'https?://(?:www\.)?cont\.ar/serie/(?P%s)' % _UUID_RE _TEST = { @@ -161,7 +158,7 @@ class ContarSerieIE(ContarBaseIE): 'id': '353247d5-da97-4cb6-8571-c4fbab28c643', 'title': 'Vidas de Radio', 'description': 'Ana Gerschenson conduce el ciclo que repasa historias de grandes personalidades que le dieron vida al medio; marcaron una época de la Argentina y de tu vida, esas voces amigas que estuvieron siempre y son Vidas De Radio.' - #'thumbnail': r're:^https?://.*\.jpg$', + # 'thumbnail': r're:^https?://.*\.jpg$', # TODO more properties, either as: # * A value # * MD5 checksum; start the string with md5: @@ -206,32 +203,30 @@ class ContarSerieIE(ContarBaseIE): 'format': 'bestvideo', } } - + def _real_extract(self, url): serie_id = self._match_id(url) - + serie_info = self._get_serie_info(serie_id, headers={'Referer': url}) - - seasons = [] + entries = [] - + base = {} base['serie_info'] = serie_info for season in serie_info['seasons'].get('data', []): - #print(json.dumps(season, indent=4, sort_keys=True)) base['season_number'] = season.get('name') for episode in season['videos'].get('data', []): - info = self._get_video_info(episode, serie_id, base); + info = self._get_video_info(episode, serie_id, base) entries.append(info) - + return self.playlist_result( entries, serie_id, - serie_info.get('name'), serie_info.get('story_large')) - + serie_info.get('name'), serie_info.get('story_large')) + class ContarChannelIE(ContarBaseIE): - + _UUID_RE = r'[\d]{1,}' _VALID_URL = r'https?://(?:www\.)?cont\.ar/channel/(?P%s)' % _UUID_RE _TEST = { @@ -252,23 +247,24 @@ class ContarChannelIE(ContarBaseIE): 'skip_download': True } } - + def _real_extract(self, url): list_id = self._match_id(url) channel_info = self._call_api('channel/info/' + list_id, list_id, headers={'Referer': url}, note='Downloading Channel Info JSON metadata') list = self._call_api('channel/series/' + list_id, list_id, headers={'Referer': url}, note='Downloading Channel List JSON metadata') - entries = [] - + entries = [] + for video in list: if (video.get('type') == 'SERIE'): url = 'www.cont.ar/serie/%s' % video.get('uuid') entries.append(self.url_result(url, video_id=video.get('uuid'), video_title=video.get('name'))) - + return self.playlist_result( - entries, list_id, channel_info.get('name'), channel_info.get('description')) + entries, list_id, channel_info.get('name'), channel_info.get('description')) + class ContarBrowseIE(ContarBaseIE): - + _UUID_RE = r'[\d]{1,}' _VALID_URL = r'https?://(?:www\.)?cont\.ar/browse/genre/(?P%s)' % _UUID_RE _TEST = { @@ -288,19 +284,18 @@ class ContarBrowseIE(ContarBaseIE): 'skip_download': True } } - + def _real_extract(self, url): list_id = self._match_id(url) - + list = self._call_api('full/section/' + list_id, list_id, headers={'Referer': url}) - entries = [] - + entries = [] + for video in list['videos'].get('data', []): if (video.get('type') == 'SERIE'): url = 'www.cont.ar/serie/%s' % video.get('uuid') entries.append(self.url_result(url, video_id=video.get('uuid'), video_title=video.get('name'))) - + return self.playlist_result( entries, list_id, - list.get('title')) - + list.get('title')) From 11bb3caf98a1fae8a14bdce6b61b80e4c3c4bf93 Mon Sep 17 00:00:00 2001 From: Pablo Castorino Date: Sat, 2 Feb 2019 17:25:43 -0300 Subject: [PATCH 10/16] fixed regex id --- youtube_dl/extractor/contar.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/contar.py b/youtube_dl/extractor/contar.py index 3752b5e88..654bf39ff 100644 --- a/youtube_dl/extractor/contar.py +++ b/youtube_dl/extractor/contar.py @@ -227,7 +227,7 @@ class ContarSerieIE(ContarBaseIE): class ContarChannelIE(ContarBaseIE): - _UUID_RE = r'[\d]{1,}' + _UUID_RE = r'\d+' _VALID_URL = r'https?://(?:www\.)?cont\.ar/channel/(?P%s)' % _UUID_RE _TEST = { 'url': 'https://www.cont.ar/channel/242', @@ -265,7 +265,7 @@ class ContarChannelIE(ContarBaseIE): class ContarBrowseIE(ContarBaseIE): - _UUID_RE = r'[\d]{1,}' + _UUID_RE = r'\d+' _VALID_URL = r'https?://(?:www\.)?cont\.ar/browse/genre/(?P%s)' % _UUID_RE _TEST = { 'url': 'https://www.cont.ar/browse/genre/46', From 7cb3e04d899bf250f88bb630297626af8107be69 Mon Sep 17 00:00:00 2001 From: Pablo Castorino Date: Sat, 2 Feb 2019 17:27:57 -0300 Subject: [PATCH 11/16] md5 checksum for all title and description --- youtube_dl/extractor/contar.py | 45 +++++++++------------------------- 1 file changed, 12 insertions(+), 33 deletions(-) diff --git a/youtube_dl/extractor/contar.py b/youtube_dl/extractor/contar.py index 654bf39ff..3d6639924 100644 --- a/youtube_dl/extractor/contar.py +++ b/youtube_dl/extractor/contar.py @@ -122,17 +122,12 @@ class ContarIE(ContarBaseIE): 'info_dict': { 'id': 'd2815f05-f52f-499f-90d0-5671e9e71ce8', 'ext': 'mp4', - 'title': 'Matilde todos los d\u00edas', + 'title': 'md5:305bc22419c1f4c3ce596e03e725498b', 'duration': 648, 'release_year': 2016, - 'description': 'Matilde llega a la casa de su abuelo Barbacrespa y en ella accidentalmente rompe un coco m\u00e1gico que la dota de poder.', + 'description': 'md5:fb359bdf6ab3d4c01330b4d31d715403', 'season_number': 1, 'episode_number': 1, - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) }, 'params': { 'usenetrc': True, @@ -156,14 +151,8 @@ class ContarSerieIE(ContarBaseIE): 'url': 'https://www.cont.ar/serie/353247d5-da97-4cb6-8571-c4fbab28c643', 'info_dict': { 'id': '353247d5-da97-4cb6-8571-c4fbab28c643', - 'title': 'Vidas de Radio', - 'description': 'Ana Gerschenson conduce el ciclo que repasa historias de grandes personalidades que le dieron vida al medio; marcaron una época de la Argentina y de tu vida, esas voces amigas que estuvieron siempre y son Vidas De Radio.' - # 'thumbnail': r're:^https?://.*\.jpg$', - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) + 'title': 'md5:a387a67af353212f7499ce3a045a86de', + 'description': 'md5:fb14968784f8d6ba0a50a53218ad0538' }, 'playlist_count': 11, 'playlist': [{ @@ -171,10 +160,10 @@ class ContarSerieIE(ContarBaseIE): 'info_dict': { 'id': '3414c62f-7b40-439e-b74d-1dd9b0190808', 'ext': 'mp4', - 'title': 'Julio Lagos', + 'title': 'md5:d5c7d8adf4223d7856d0f8d959f6bff7', 'duration': 3185, 'release_year': 2018, - 'description': 'Ana Gerschenson conduce el ciclo que repasa historias de grandes personalidades que le dieron vida al medio. En esta emisi\u00f3n recibi\u00f3 a Julio Lagos para repasar su trayectoria y v\u00ednculo con la radio.', + 'description': 'md5:94ccc2f57721514ce04e514116e914fa', 'season_number': 1, 'episode_number': 11, }, @@ -187,10 +176,10 @@ class ContarSerieIE(ContarBaseIE): 'info_dict': { 'id': '5972ae9a-43fe-4056-81bc-ab963c057cc6', 'ext': 'mp4', - 'title': 'Cacho Fontana', + 'title': 'md5:64f243a753953726ddc39336e1c2f361', 'release_year': 2018, 'duration': 3052, - 'description': 'Cacho Fontana: \u201cMi primer amor fue la Radio\u201d', + 'description': 'md5:87e1bfbd5ed02808b73f63f2c9a0722d', 'season_number': 1, 'episode_number': 3 }, @@ -233,14 +222,9 @@ class ContarChannelIE(ContarBaseIE): 'url': 'https://www.cont.ar/channel/242', 'info_dict': { 'id': '242', - 'title': 'PAKAPAKA', - 'description': '\xa0PAKAPAKA' - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) - }, + 'title': 'md5:352d30d8fa7896eec02f65b2c7299d27', + 'description': 'md5:ac4e1f02201cffb86ac8ed4bcba4a593' + }, 'playlist_mincount': 68, 'params': { 'usenetrc': True, @@ -271,12 +255,7 @@ class ContarBrowseIE(ContarBaseIE): 'url': 'https://www.cont.ar/browse/genre/46', 'info_dict': { 'id': '46', - 'title': 'Infantil', - # TODO more properties, either as: - # * A value - # * MD5 checksum; start the string with md5: - # * A regular expression; start the string with re: - # * Any Python type (for example int or float) + 'title': 'md5:41dd5b7c28b8b53c32341151e750d367', }, 'playlist_mincount': 65, 'params': { From ae4ebb51792e7f24f12514be15c4d28014b49a48 Mon Sep 17 00:00:00 2001 From: Pablo Castorino Date: Thu, 7 Feb 2019 09:09:22 -0300 Subject: [PATCH 12/16] fix regex --- youtube_dl/extractor/contar.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/contar.py b/youtube_dl/extractor/contar.py index 3d6639924..f7db60c3d 100644 --- a/youtube_dl/extractor/contar.py +++ b/youtube_dl/extractor/contar.py @@ -216,8 +216,7 @@ class ContarSerieIE(ContarBaseIE): class ContarChannelIE(ContarBaseIE): - _UUID_RE = r'\d+' - _VALID_URL = r'https?://(?:www\.)?cont\.ar/channel/(?P%s)' % _UUID_RE + _VALID_URL = r'https?://(?:www\.)?cont\.ar/channel/(?P\d+)' _TEST = { 'url': 'https://www.cont.ar/channel/242', 'info_dict': { @@ -249,8 +248,7 @@ class ContarChannelIE(ContarBaseIE): class ContarBrowseIE(ContarBaseIE): - _UUID_RE = r'\d+' - _VALID_URL = r'https?://(?:www\.)?cont\.ar/browse/genre/(?P%s)' % _UUID_RE + _VALID_URL = r'https?://(?:www\.)?cont\.ar/browse/genre/(?P\d+)' _TEST = { 'url': 'https://www.cont.ar/browse/genre/46', 'info_dict': { From c2f5138853019b6cec1f635a23fcc214f2a4edef Mon Sep 17 00:00:00 2001 From: Pablo Castorino Date: Thu, 7 Feb 2019 09:12:10 -0300 Subject: [PATCH 13/16] DRY --- youtube_dl/extractor/contar.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/contar.py b/youtube_dl/extractor/contar.py index f7db60c3d..a6546f615 100644 --- a/youtube_dl/extractor/contar.py +++ b/youtube_dl/extractor/contar.py @@ -11,6 +11,7 @@ from ..utils import ( class ContarBaseIE(InfoExtractor): + _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' _NETRC_MACHINE = 'contar' _API_BASE = 'https://api.cont.ar/api/v2/' @@ -114,8 +115,7 @@ class ContarBaseIE(InfoExtractor): class ContarIE(ContarBaseIE): - _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' - _VALID_URL = r'https?://(?:www\.)?cont\.ar/watch/(?P%s)' % _UUID_RE + _VALID_URL = r'https?://(?:www\.)?cont\.ar/watch/(?P%s)' % ContarBaseIE._UUID_RE _TEST = { 'url': 'https://www.cont.ar/watch/d2815f05-f52f-499f-90d0-5671e9e71ce8', 'md5': '72cfee8799d964291433004c557d0b2b', @@ -145,8 +145,7 @@ class ContarIE(ContarBaseIE): class ContarSerieIE(ContarBaseIE): - _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' - _VALID_URL = r'https?://(?:www\.)?cont\.ar/serie/(?P%s)' % _UUID_RE + _VALID_URL = r'https?://(?:www\.)?cont\.ar/serie/(?P%s)' % ContarBaseIE._UUID_RE _TEST = { 'url': 'https://www.cont.ar/serie/353247d5-da97-4cb6-8571-c4fbab28c643', 'info_dict': { From 3491b08454f4b6014c666d98cd6c5885eed95297 Mon Sep 17 00:00:00 2001 From: Pablo Castorino Date: Sat, 9 Feb 2019 11:27:43 -0300 Subject: [PATCH 14/16] replace usenetrc --- youtube_dl/extractor/contar.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/contar.py b/youtube_dl/extractor/contar.py index a6546f615..9ae8a1274 100644 --- a/youtube_dl/extractor/contar.py +++ b/youtube_dl/extractor/contar.py @@ -130,7 +130,8 @@ class ContarIE(ContarBaseIE): 'episode_number': 1, }, 'params': { - 'usenetrc': True, + 'username': 'ytdl@yt-dl.org', + 'password': '(snip)', 'format': 'hls-4755-1' } } @@ -167,7 +168,8 @@ class ContarSerieIE(ContarBaseIE): 'episode_number': 11, }, 'params': { - 'usenetrc': True, + 'username': 'ytdl@yt-dl.org', + 'password': '(snip)', 'format': 'bestvideo', } }, { @@ -187,7 +189,8 @@ class ContarSerieIE(ContarBaseIE): }, }], 'params': { - 'usenetrc': True, + 'username': 'ytdl@yt-dl.org', + 'password': '(snip)', 'format': 'bestvideo', } } @@ -225,7 +228,8 @@ class ContarChannelIE(ContarBaseIE): }, 'playlist_mincount': 68, 'params': { - 'usenetrc': True, + 'username': 'ytdl@yt-dl.org', + 'password': '(snip)', 'skip_download': True } } @@ -256,7 +260,8 @@ class ContarBrowseIE(ContarBaseIE): }, 'playlist_mincount': 65, 'params': { - 'usenetrc': True, + 'username': 'ytdl@yt-dl.org', + 'password': '(snip)', 'skip_download': True } } From fa9edae4ff14ea96bb5c0c2858d73242e3e747b2 Mon Sep 17 00:00:00 2001 From: Pablo Castorino Date: Mon, 18 Feb 2019 15:02:19 -0300 Subject: [PATCH 15/16] better coding conventions --- youtube_dl/extractor/contar.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/contar.py b/youtube_dl/extractor/contar.py index 9ae8a1274..ef23a5c43 100644 --- a/youtube_dl/extractor/contar.py +++ b/youtube_dl/extractor/contar.py @@ -31,7 +31,7 @@ class ContarBaseIE(InfoExtractor): self._API_BASE + path, video_id, headers=headers, note=note) self._handle_errors(result) - return result['data'] + return result.get('data', []) def _real_initialize(self): email, password = self._get_login_info() @@ -45,12 +45,12 @@ class ContarBaseIE(InfoExtractor): })) self._handle_errors(result) - self._auth_token = result['token'] + self._auth_token = result.get('token') def _get_video_info(self, video, video_id, base={}): formats = self._get_formats(video.get('streams', []), video.get('id')) - subtitles = self._get_subtitles(video['subtitles'].get('data', []), video.get('id')) + subtitles = self._get_subtitles(video.get('subtitles', []).get('data', []), video.get('id')) serie_info = base.get('serie_info') or self._get_serie_info(video.get('serie')) season_number = base.get('season_number') or self._get_season_number(serie_info, video.get('id')) @@ -81,9 +81,9 @@ class ContarBaseIE(InfoExtractor): return serie def _get_season_number(self, serie_info, video_id): - for season in serie_info['seasons'].get('data', []): + for season in serie_info.get('seasons', []).get('data', []): season_number = season.get('name') - for episode in season['videos'].get('data', []): + for episode in season.get('videos',[]).get('data', []): if episode.get('id') == video_id: return season_number return None @@ -205,9 +205,9 @@ class ContarSerieIE(ContarBaseIE): base = {} base['serie_info'] = serie_info - for season in serie_info['seasons'].get('data', []): + for season in serie_info.get('seasons', []).get('data', []): base['season_number'] = season.get('name') - for episode in season['videos'].get('data', []): + for episode in season.get('videos', []).get('data', []): info = self._get_video_info(episode, serie_id, base) entries.append(info) @@ -272,7 +272,7 @@ class ContarBrowseIE(ContarBaseIE): list = self._call_api('full/section/' + list_id, list_id, headers={'Referer': url}) entries = [] - for video in list['videos'].get('data', []): + for video in list.get('videos', []).get('data', []): if (video.get('type') == 'SERIE'): url = 'www.cont.ar/serie/%s' % video.get('uuid') entries.append(self.url_result(url, video_id=video.get('uuid'), video_title=video.get('name'))) From c7db73fcd30937df2f0314184e3c61283f33b69c Mon Sep 17 00:00:00 2001 From: Pablo Castorino Date: Sun, 22 Mar 2020 12:45:48 -0300 Subject: [PATCH 16/16] fix syntax --- youtube_dl/extractor/contar.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/contar.py b/youtube_dl/extractor/contar.py index ef23a5c43..cbb739480 100644 --- a/youtube_dl/extractor/contar.py +++ b/youtube_dl/extractor/contar.py @@ -83,7 +83,7 @@ class ContarBaseIE(InfoExtractor): def _get_season_number(self, serie_info, video_id): for season in serie_info.get('seasons', []).get('data', []): season_number = season.get('name') - for episode in season.get('videos',[]).get('data', []): + for episode in season.get('videos', []).get('data', []): if episode.get('id') == video_id: return season_number return None @@ -218,18 +218,17 @@ class ContarSerieIE(ContarBaseIE): class ContarChannelIE(ContarBaseIE): - _VALID_URL = r'https?://(?:www\.)?cont\.ar/channel/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?cont\.ar/channel/(?P\d+)' _TEST = { 'url': 'https://www.cont.ar/channel/242', 'info_dict': { 'id': '242', 'title': 'md5:352d30d8fa7896eec02f65b2c7299d27', 'description': 'md5:ac4e1f02201cffb86ac8ed4bcba4a593' - }, + }, 'playlist_mincount': 68, 'params': { - 'username': 'ytdl@yt-dl.org', - 'password': '(snip)', + 'usenetrc': True, 'skip_download': True } } @@ -251,7 +250,7 @@ class ContarChannelIE(ContarBaseIE): class ContarBrowseIE(ContarBaseIE): - _VALID_URL = r'https?://(?:www\.)?cont\.ar/browse/genre/(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?cont\.ar/browse/genre/(?P\d+)' _TEST = { 'url': 'https://www.cont.ar/browse/genre/46', 'info_dict': {