From adf916e044308ec24466c4bad03ce676819c4f14 Mon Sep 17 00:00:00 2001 From: Nekmo Date: Sat, 17 Feb 2018 20:04:59 +0100 Subject: [PATCH 01/12] Fix error 500 --- youtube_dl/extractor/atresplayer.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 1a31ebe08..d5e80894f 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -144,8 +144,16 @@ class AtresPlayerIE(InfoExtractor): self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token), headers={'User-Agent': self._USER_AGENT}) - fmt_json = self._download_json( - request, video_id, 'Downloading windows video JSON') + try: + fmt_json = self._download_json( + request, video_id, 'Downloading windows video JSON') + except ExtractorError as e: + fmt_json = {'resultObject': {}} + else: + result = fmt_json.get('resultDes') + if result.lower() != 'ok': + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, result), expected=True) result = fmt_json.get('resultDes') if result.lower() != 'ok': From fd8c1111ab2261dc53ee2341ceab4b213cf6dea8 Mon Sep 17 00:00:00 2001 From: Nekmo Date: Sun, 18 Feb 2018 01:39:18 +0100 Subject: [PATCH 02/12] Fixed code typo --- youtube_dl/extractor/atresplayer.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index d5e80894f..9a556ee70 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -155,11 +155,6 @@ class AtresPlayerIE(InfoExtractor): raise ExtractorError( '%s returned error: %s' % (self.IE_NAME, result), expected=True) - result = fmt_json.get('resultDes') - if result.lower() != 'ok': - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, result), expected=True) - for format_id, video_url in fmt_json['resultObject'].items(): if format_id == 'token' or not video_url.startswith('http'): continue From 6ad850fe0f487a65d771e80d3fb12d10edec2a36 Mon Sep 17 00:00:00 2001 From: Nekmo Date: Fri, 27 Apr 2018 18:40:08 +0200 Subject: [PATCH 03/12] Download video from new Atresplayer API. --- youtube_dl/extractor/atresplayer.py | 174 ++++++++++++++++------------ 1 file changed, 98 insertions(+), 76 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 9a556ee70..80e21abf9 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -18,10 +18,11 @@ from ..utils import ( class AtresPlayerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P.+?)_\d+\.html' + _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/[^/_]+_(?P[A-z0-9]+)/?' _NETRC_MACHINE = 'atresplayer' _TESTS = [ { + # TODO: 'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html', 'md5': 'efd56753cda1bb64df52a3074f62e38a', 'info_dict': { @@ -35,6 +36,7 @@ class AtresPlayerIE(InfoExtractor): 'skip': 'This video is only available for registered users' }, { + # TODO: 'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html', 'md5': '6e52cbb513c405e403dbacb7aacf8747', 'info_dict': { @@ -58,7 +60,8 @@ class AtresPlayerIE(InfoExtractor): _TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json' _URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json' - _PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s' + # _PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s' + _PLAYER_URL_TEMPLATE = 'https://api.atresplayer.com/client/v1/page/episode/%s' _EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s' _LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check' @@ -101,8 +104,10 @@ class AtresPlayerIE(InfoExtractor): webpage = self._download_webpage(url, video_id) - episode_id = self._search_regex( - r'episode="([^"]+)"', webpage, 'episode id') + # episode_id = self._search_regex( + # r'episode="([^"]+)"', webpage, 'episode id') + + episode_id = video_id request = sanitized_Request( self._PLAYER_URL_TEMPLATE % episode_id, @@ -117,89 +122,106 @@ class AtresPlayerIE(InfoExtractor): formats = [] video_url = player.get('urlVideo') - if video_url: - format_info = { - 'url': video_url, - 'format_id': 'http', - } - mobj = re.search(r'(?P\d+)K_(?P\d+)x(?P\d+)', video_url) - if mobj: - format_info.update({ - 'width': int_or_none(mobj.group('width')), - 'height': int_or_none(mobj.group('height')), - 'tbr': int_or_none(mobj.group('bitrate')), - }) - formats.append(format_info) - - timestamp = int_or_none(self._download_webpage( - self._TIME_API_URL, - video_id, 'Downloading timestamp', fatal=False), 1000, time.time()) - timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT) - token = hmac.new( - self._MAGIC.encode('ascii'), - (episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5 - ).hexdigest() request = sanitized_Request( - self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token), + video_url, headers={'User-Agent': self._USER_AGENT}) + video_data = self._download_json(request, episode_id, 'Downloading video JSON') - try: - fmt_json = self._download_json( - request, video_id, 'Downloading windows video JSON') - except ExtractorError as e: - fmt_json = {'resultObject': {}} - else: - result = fmt_json.get('resultDes') - if result.lower() != 'ok': - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, result), expected=True) + for source in video_data['sources']: + if source['type'] == "application/dash+xml": + formats.extend(self._extract_mpd_formats( + source['src'], video_id, mpd_id='dash', + fatal=False)) + elif source['type'] == "application/vnd.apple.mpegurl": + formats.extend(self._extract_m3u8_formats( + source['src'], video_id, + fatal=False)) - for format_id, video_url in fmt_json['resultObject'].items(): - if format_id == 'token' or not video_url.startswith('http'): - continue - if 'geodeswowsmpra3player' in video_url: - # f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0] - # f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path) - # this videos are protected by DRM, the f4m downloader doesn't support them - continue - video_url_hd = video_url.replace('free_es', 'es') - formats.extend(self._extract_f4m_formats( - video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds', - fatal=False)) - formats.extend(self._extract_mpd_formats( - video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash', - fatal=False)) + # if video_url: + # format_info = { + # 'url': video_url, + # 'format_id': 'http', + # } + # mobj = re.search(r'(?P\d+)K_(?P\d+)x(?P\d+)', video_url) + # if mobj: + # format_info.update({ + # 'width': int_or_none(mobj.group('width')), + # 'height': int_or_none(mobj.group('height')), + # 'tbr': int_or_none(mobj.group('bitrate')), + # }) + # formats.append(format_info) + + # timestamp = int_or_none(self._download_webpage( + # self._TIME_API_URL, + # video_id, 'Downloading timestamp', fatal=False), 1000, time.time()) + # timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT) + # token = hmac.new( + # self._MAGIC.encode('ascii'), + # (episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5 + # ).hexdigest() + # + # request = sanitized_Request( + # self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token), + # headers={'User-Agent': self._USER_AGENT}) + # + # try: + # fmt_json = self._download_json( + # request, video_id, 'Downloading windows video JSON') + # except ExtractorError as e: + # fmt_json = {'resultObject': {}} + # else: + # result = fmt_json.get('resultDes') + # if result.lower() != 'ok': + # raise ExtractorError( + # '%s returned error: %s' % (self.IE_NAME, result), expected=True) + + # for format_id, video_url in fmt_json['resultObject'].items(): + # if format_id == 'token' or not video_url.startswith('http'): + # continue + # if 'geodeswowsmpra3player' in video_url: + # # f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0] + # # f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path) + # # this videos are protected by DRM, the f4m downloader doesn't support them + # continue + # video_url_hd = video_url.replace('free_es', 'es') + # formats.extend(self._extract_f4m_formats( + # video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds', + # fatal=False)) + # formats.extend(self._extract_mpd_formats( + # video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash', + # fatal=False)) self._sort_formats(formats) path_data = player.get('pathData') - episode = self._download_xml( - self._EPISODE_URL_TEMPLATE % path_data, video_id, - 'Downloading episode XML') - - duration = float_or_none(xpath_text( - episode, './media/asset/info/technical/contentDuration', 'duration')) - - art = episode.find('./media/asset/info/art') - title = xpath_text(art, './name', 'title') - description = xpath_text(art, './description', 'description') - thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail') - - subtitles = {} - subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle') - if subtitle_url: - subtitles['es'] = [{ - 'ext': 'srt', - 'url': subtitle_url, - }] + # episode = self._download_xml( + # self._EPISODE_URL_TEMPLATE % path_data, video_id, + # 'Downloading episode XML') + # + # duration = float_or_none(xpath_text( + # episode, './media/asset/info/technical/contentDuration', 'duration')) + # + # art = episode.find('./media/asset/info/art') + # title = xpath_text(art, './name', 'title') + # description = xpath_text(art, './description', 'description') + # thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail') + # + # subtitles = {} + # subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle') + # if subtitle_url: + # subtitles['es'] = [{ + # 'ext': 'srt', + # 'url': subtitle_url, + # }] return { 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, + 'title': video_data['titulo'], + # 'title': title, + # 'description': description, + # 'thumbnail': thumbnail, + # 'duration': duration, 'formats': formats, - 'subtitles': subtitles, + # 'subtitles': subtitles, } From adc4b9a09d76f72add0e8c11c22b87c80c0a3aa6 Mon Sep 17 00:00:00 2001 From: Nekmo Date: Mon, 4 Jun 2018 02:08:59 +0200 Subject: [PATCH 04/12] Extra data and test. --- youtube_dl/extractor/atresplayer.py | 48 ++++++++++++++--------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 80e21abf9..81c0dfaf4 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -21,31 +21,31 @@ class AtresPlayerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/[^/_]+_(?P[A-z0-9]+)/?' _NETRC_MACHINE = 'atresplayer' _TESTS = [ + # { + # # TODO: + # 'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html', + # 'md5': 'efd56753cda1bb64df52a3074f62e38a', + # 'info_dict': { + # 'id': 'capitulo-10-especial-solidario-nochebuena', + # 'ext': 'mp4', + # 'title': 'Especial Solidario de Nochebuena', + # 'description': 'md5:e2d52ff12214fa937107d21064075bf1', + # 'duration': 5527.6, + # 'thumbnail': r're:^https?://.*\.jpg$', + # }, + # 'skip': 'This video is only available for registered users' + # }, { # TODO: - 'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html', - 'md5': 'efd56753cda1bb64df52a3074f62e38a', + 'url': 'https://www.atresplayer.com/lasexta/series/navy-investigacion-criminal/temporada-12/capitulo-10-captulo_5ad6869b986b2866f89ebca0/', + 'md5': '3afa3d3cc155264374916f2a23d1d00c', 'info_dict': { - 'id': 'capitulo-10-especial-solidario-nochebuena', + 'id': '5ad6869b986b2866f89ebca0', 'ext': 'mp4', - 'title': 'Especial Solidario de Nochebuena', - 'description': 'md5:e2d52ff12214fa937107d21064075bf1', - 'duration': 5527.6, - 'thumbnail': r're:^https?://.*\.jpg$', - }, - 'skip': 'This video is only available for registered users' - }, - { - # TODO: - 'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html', - 'md5': '6e52cbb513c405e403dbacb7aacf8747', - 'info_dict': { - 'id': 'capitulo-112-david-bustamante', - 'ext': 'flv', - 'title': 'David Bustamante', - 'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6', - 'duration': 1439.0, - 'thumbnail': r're:^https?://.*\.jpg$', + # 'title': 'David Bustamante', + # 'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6', + # 'duration': 1439.0, + # 'thumbnail': r're:^https?://.*\.jpg$', }, }, { @@ -219,9 +219,9 @@ class AtresPlayerIE(InfoExtractor): 'id': video_id, 'title': video_data['titulo'], # 'title': title, - # 'description': description, - # 'thumbnail': thumbnail, - # 'duration': duration, + 'description': video_data['descripcion'], + 'thumbnail': video_data['imgPoster'], + 'duration': video_data['duration'], 'formats': formats, # 'subtitles': subtitles, } From 8daa5ce546860a13a3f173710b307b4b5536910a Mon Sep 17 00:00:00 2001 From: Nekmo Date: Mon, 4 Jun 2018 02:11:34 +0200 Subject: [PATCH 05/12] Removed old variables. --- youtube_dl/extractor/atresplayer.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 81c0dfaf4..fc8b212cf 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -55,14 +55,8 @@ class AtresPlayerIE(InfoExtractor): ] _USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J' - _MAGIC = 'QWtMLXs414Yo+c#_+Q#K@NN)' - _TIMESTAMP_SHIFT = 30000 - _TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json' - _URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json' - # _PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s' _PLAYER_URL_TEMPLATE = 'https://api.atresplayer.com/client/v1/page/episode/%s' - _EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s' _LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check' @@ -193,19 +187,6 @@ class AtresPlayerIE(InfoExtractor): # fatal=False)) self._sort_formats(formats) - path_data = player.get('pathData') - - # episode = self._download_xml( - # self._EPISODE_URL_TEMPLATE % path_data, video_id, - # 'Downloading episode XML') - # - # duration = float_or_none(xpath_text( - # episode, './media/asset/info/technical/contentDuration', 'duration')) - # - # art = episode.find('./media/asset/info/art') - # title = xpath_text(art, './name', 'title') - # description = xpath_text(art, './description', 'description') - # thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail') # # subtitles = {} # subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle') @@ -223,5 +204,4 @@ class AtresPlayerIE(InfoExtractor): 'thumbnail': video_data['imgPoster'], 'duration': video_data['duration'], 'formats': formats, - # 'subtitles': subtitles, } From e88344baf932ce2af0d80ceeb8655591c77f75a8 Mon Sep 17 00:00:00 2001 From: Nekmo Date: Mon, 4 Jun 2018 03:05:43 +0200 Subject: [PATCH 06/12] Test using skip_download and refactor. --- youtube_dl/extractor/atresplayer.py | 104 +++------------------------- 1 file changed, 11 insertions(+), 93 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index fc8b212cf..bfdbf34a0 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -21,31 +21,19 @@ class AtresPlayerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/[^/_]+_(?P[A-z0-9]+)/?' _NETRC_MACHINE = 'atresplayer' _TESTS = [ - # { - # # TODO: - # 'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html', - # 'md5': 'efd56753cda1bb64df52a3074f62e38a', - # 'info_dict': { - # 'id': 'capitulo-10-especial-solidario-nochebuena', - # 'ext': 'mp4', - # 'title': 'Especial Solidario de Nochebuena', - # 'description': 'md5:e2d52ff12214fa937107d21064075bf1', - # 'duration': 5527.6, - # 'thumbnail': r're:^https?://.*\.jpg$', - # }, - # 'skip': 'This video is only available for registered users' - # }, { - # TODO: 'url': 'https://www.atresplayer.com/lasexta/series/navy-investigacion-criminal/temporada-12/capitulo-10-captulo_5ad6869b986b2866f89ebca0/', 'md5': '3afa3d3cc155264374916f2a23d1d00c', 'info_dict': { 'id': '5ad6869b986b2866f89ebca0', - 'ext': 'mp4', - # 'title': 'David Bustamante', - # 'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6', - # 'duration': 1439.0, - # 'thumbnail': r're:^https?://.*\.jpg$', + 'ext': 'm3u8', + 'title': u'Capítulo 10: Reglas de casa', + 'description': 'md5:3ec43e9b7da2cd1280fa80adccdd09b0', + 'duration': 2500.0, + 'thumbnail': r're:^https://imagenes.atresplayer.com/.+$' + }, + 'params': { + 'skip_download': True, }, }, { @@ -96,17 +84,10 @@ class AtresPlayerIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - - # episode_id = self._search_regex( - # r'episode="([^"]+)"', webpage, 'episode id') - - episode_id = video_id - request = sanitized_Request( - self._PLAYER_URL_TEMPLATE % episode_id, + self._PLAYER_URL_TEMPLATE % video_id, headers={'User-Agent': self._USER_AGENT}) - player = self._download_json(request, episode_id, 'Downloading player JSON') + player = self._download_json(request, video_id, 'Downloading player JSON') episode_type = player.get('typeOfEpisode') error_message = self._ERRORS.get(episode_type) @@ -120,7 +101,7 @@ class AtresPlayerIE(InfoExtractor): request = sanitized_Request( video_url, headers={'User-Agent': self._USER_AGENT}) - video_data = self._download_json(request, episode_id, 'Downloading video JSON') + video_data = self._download_json(request, video_id, 'Downloading video JSON') for source in video_data['sources']: if source['type'] == "application/dash+xml": @@ -132,74 +113,11 @@ class AtresPlayerIE(InfoExtractor): source['src'], video_id, fatal=False)) - # if video_url: - # format_info = { - # 'url': video_url, - # 'format_id': 'http', - # } - # mobj = re.search(r'(?P\d+)K_(?P\d+)x(?P\d+)', video_url) - # if mobj: - # format_info.update({ - # 'width': int_or_none(mobj.group('width')), - # 'height': int_or_none(mobj.group('height')), - # 'tbr': int_or_none(mobj.group('bitrate')), - # }) - # formats.append(format_info) - - # timestamp = int_or_none(self._download_webpage( - # self._TIME_API_URL, - # video_id, 'Downloading timestamp', fatal=False), 1000, time.time()) - # timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT) - # token = hmac.new( - # self._MAGIC.encode('ascii'), - # (episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5 - # ).hexdigest() - # - # request = sanitized_Request( - # self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token), - # headers={'User-Agent': self._USER_AGENT}) - # - # try: - # fmt_json = self._download_json( - # request, video_id, 'Downloading windows video JSON') - # except ExtractorError as e: - # fmt_json = {'resultObject': {}} - # else: - # result = fmt_json.get('resultDes') - # if result.lower() != 'ok': - # raise ExtractorError( - # '%s returned error: %s' % (self.IE_NAME, result), expected=True) - - # for format_id, video_url in fmt_json['resultObject'].items(): - # if format_id == 'token' or not video_url.startswith('http'): - # continue - # if 'geodeswowsmpra3player' in video_url: - # # f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0] - # # f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path) - # # this videos are protected by DRM, the f4m downloader doesn't support them - # continue - # video_url_hd = video_url.replace('free_es', 'es') - # formats.extend(self._extract_f4m_formats( - # video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds', - # fatal=False)) - # formats.extend(self._extract_mpd_formats( - # video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash', - # fatal=False)) self._sort_formats(formats) - # - # subtitles = {} - # subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle') - # if subtitle_url: - # subtitles['es'] = [{ - # 'ext': 'srt', - # 'url': subtitle_url, - # }] - return { 'id': video_id, 'title': video_data['titulo'], - # 'title': title, 'description': video_data['descripcion'], 'thumbnail': video_data['imgPoster'], 'duration': video_data['duration'], From c734b064d0bbd6b801f386bf31b3899c07dd321d Mon Sep 17 00:00:00 2001 From: Nekmo Date: Mon, 4 Jun 2018 03:43:30 +0200 Subject: [PATCH 07/12] Catch server errors and refactor imports. --- youtube_dl/extractor/atresplayer.py | 43 +++++++++++++++-------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index bfdbf34a0..02bcc7816 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -1,21 +1,20 @@ from __future__ import unicode_literals -import time -import hmac -import hashlib -import re +import json + from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, - float_or_none, - int_or_none, sanitized_Request, urlencode_postdata, - xpath_text, ) +try: + from json import JSONDecodeError +except ImportError: + JSONDecodeError = ValueError + class AtresPlayerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/[^/_]+_(?P[A-z0-9]+)/?' @@ -48,12 +47,6 @@ class AtresPlayerIE(InfoExtractor): _LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check' - _ERRORS = { - 'UNPUBLISHED': 'We\'re sorry, but this video is not yet available.', - 'DELETED': 'This video has expired and is no longer available for online streaming.', - 'GEOUNPUBLISHED': 'We\'re sorry, but this video is not available in your region due to right restrictions.', - # 'PREMIUM': 'PREMIUM', - } def _real_initialize(self): self._login() @@ -89,19 +82,27 @@ class AtresPlayerIE(InfoExtractor): headers={'User-Agent': self._USER_AGENT}) player = self._download_json(request, video_id, 'Downloading player JSON') - episode_type = player.get('typeOfEpisode') - error_message = self._ERRORS.get(episode_type) - if error_message: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, error_message), expected=True) - formats = [] video_url = player.get('urlVideo') request = sanitized_Request( video_url, headers={'User-Agent': self._USER_AGENT}) - video_data = self._download_json(request, video_id, 'Downloading video JSON') + try: + video_data = self._download_json(request, video_id, 'Downloading video JSON', fatal=True) + except ExtractorError as e: + if len(e.exc_info) <= 1 or e.exc_info[1].code != 403: + raise + try: + data = json.loads(e.exc_info[1].file.read()) + except JSONDecodeError: + raise e + if isinstance(data, dict) and 'error' in data: + raise ExtractorError('{} returned error: {} ({})'.format( + self.IE_NAME, data['error'], data.get('error_description', 'There is no description') + ), expected=True) + else: + raise e for source in video_data['sources']: if source['type'] == "application/dash+xml": From 1a981adbdd9e6805eebd9b79e2f0c1f97dd9e078 Mon Sep 17 00:00:00 2001 From: Nekmo Date: Mon, 4 Jun 2018 05:03:09 +0200 Subject: [PATCH 08/12] Atresplayer login. --- youtube_dl/extractor/atresplayer.py | 59 ++++++++++++++++++----------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 02bcc7816..52a39dc28 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -1,7 +1,7 @@ from __future__ import unicode_literals import json - +from urllib.error import HTTPError from .common import InfoExtractor from ..utils import ( @@ -16,6 +16,8 @@ except ImportError: JSONDecodeError = ValueError + + class AtresPlayerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/[^/_]+_(?P[A-z0-9]+)/?' _NETRC_MACHINE = 'atresplayer' @@ -45,34 +47,54 @@ class AtresPlayerIE(InfoExtractor): _PLAYER_URL_TEMPLATE = 'https://api.atresplayer.com/client/v1/page/episode/%s' - _LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check' + _LOGIN_URL = 'https://api.atresplayer.com/login?redirect=https%3A%2F%2Fwww.atresplayer.com' + _LOGIN_ACCOUNT_URL = 'https://account.atresmedia.com/api/login' def _real_initialize(self): self._login() def _login(self): + (username, password) = self._get_login_info() if username is None: return login_form = { - 'j_username': username, - 'j_password': password, + 'username': username, + 'password': password, } + self._download_webpage(self._LOGIN_URL, None, 'get login page') request = sanitized_Request( - self._LOGIN_URL, urlencode_postdata(login_form)) + self._LOGIN_ACCOUNT_URL, + urlencode_postdata(login_form), + login_form, + method='post') request.add_header('Content-Type', 'application/x-www-form-urlencoded') - response = self._download_webpage( - request, None, 'Logging in') + # request.add_header('Content-Type', 'multipart/form-data') + try: + response = self._download_json( + request, None, 'post to login form') + except ExtractorError as e: + if isinstance(e.cause, HTTPError): + self._atres_player_error(e.cause.file.read(), e) + else: + raise + else: + self._download_webpage(response['targetUrl'], None, 'Set login session') - error = self._html_search_regex( - r'(?s)]+class="[^"]*\blist_error\b[^"]*">(.+?)', - response, 'error', default=None) - if error: - raise ExtractorError( - 'Unable to login: %s' % error, expected=True) + def _atres_player_error(self, body_response, original_exception): + try: + data = json.loads(body_response) + except JSONDecodeError: + raise original_exception + if isinstance(data, dict) and 'error' in data: + raise ExtractorError('{} returned error: {} ({})'.format( + self.IE_NAME, data['error'], data.get('error_description', 'There is no description') + ), expected=True) + else: + raise original_exception def _real_extract(self, url): video_id = self._match_id(url) @@ -93,16 +115,7 @@ class AtresPlayerIE(InfoExtractor): except ExtractorError as e: if len(e.exc_info) <= 1 or e.exc_info[1].code != 403: raise - try: - data = json.loads(e.exc_info[1].file.read()) - except JSONDecodeError: - raise e - if isinstance(data, dict) and 'error' in data: - raise ExtractorError('{} returned error: {} ({})'.format( - self.IE_NAME, data['error'], data.get('error_description', 'There is no description') - ), expected=True) - else: - raise e + self._atres_player_error(e.exc_info[1].file.read(), e) for source in video_data['sources']: if source['type'] == "application/dash+xml": From 72a6740a4f748afa9deef4e167adead6a2718861 Mon Sep 17 00:00:00 2001 From: Nekmo Date: Tue, 12 Jun 2018 23:36:00 +0200 Subject: [PATCH 09/12] Refactor. --- youtube_dl/extractor/atresplayer.py | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index 52a39dc28..ff72b1552 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -16,26 +16,21 @@ except ImportError: JSONDecodeError = ValueError - - class AtresPlayerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/[^/_]+_(?P[A-z0-9]+)/?' _NETRC_MACHINE = 'atresplayer' _TESTS = [ { - 'url': 'https://www.atresplayer.com/lasexta/series/navy-investigacion-criminal/temporada-12/capitulo-10-captulo_5ad6869b986b2866f89ebca0/', + 'url': 'https://www.atresplayer.com/lasexta/programas/el-intermedio/temporada-12/el-intermedio-21-05-18_5b03068d7ed1a8a94b3faf29/', 'md5': '3afa3d3cc155264374916f2a23d1d00c', 'info_dict': { - 'id': '5ad6869b986b2866f89ebca0', + 'id': '5b03068d7ed1a8a94b3faf29', 'ext': 'm3u8', - 'title': u'Capítulo 10: Reglas de casa', - 'description': 'md5:3ec43e9b7da2cd1280fa80adccdd09b0', - 'duration': 2500.0, - 'thumbnail': r're:^https://imagenes.atresplayer.com/.+$' }, 'params': { 'skip_download': True, }, + 'skip': 'required_registered', }, { 'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html', @@ -44,13 +39,10 @@ class AtresPlayerIE(InfoExtractor): ] _USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J' - _PLAYER_URL_TEMPLATE = 'https://api.atresplayer.com/client/v1/page/episode/%s' - _LOGIN_URL = 'https://api.atresplayer.com/login?redirect=https%3A%2F%2Fwww.atresplayer.com' _LOGIN_ACCOUNT_URL = 'https://account.atresmedia.com/api/login' - def _real_initialize(self): self._login() @@ -72,13 +64,12 @@ class AtresPlayerIE(InfoExtractor): login_form, method='post') request.add_header('Content-Type', 'application/x-www-form-urlencoded') - # request.add_header('Content-Type', 'multipart/form-data') try: response = self._download_json( request, None, 'post to login form') except ExtractorError as e: if isinstance(e.cause, HTTPError): - self._atres_player_error(e.cause.file.read(), e) + raise self._atres_player_error(e.cause.file.read(), e) else: raise else: @@ -88,13 +79,13 @@ class AtresPlayerIE(InfoExtractor): try: data = json.loads(body_response) except JSONDecodeError: - raise original_exception + return original_exception if isinstance(data, dict) and 'error' in data: - raise ExtractorError('{} returned error: {} ({})'.format( + return ExtractorError('{} returned error: {} ({})'.format( self.IE_NAME, data['error'], data.get('error_description', 'There is no description') ), expected=True) else: - raise original_exception + return original_exception def _real_extract(self, url): video_id = self._match_id(url) @@ -115,7 +106,7 @@ class AtresPlayerIE(InfoExtractor): except ExtractorError as e: if len(e.exc_info) <= 1 or e.exc_info[1].code != 403: raise - self._atres_player_error(e.exc_info[1].file.read(), e) + raise self._atres_player_error(e.exc_info[1].file.read(), e) for source in video_data['sources']: if source['type'] == "application/dash+xml": From 11ba372e37784ffee920a0cbfed4a4fd6ecb36e0 Mon Sep 17 00:00:00 2001 From: Nekmo Date: Tue, 12 Jun 2018 23:49:13 +0200 Subject: [PATCH 10/12] Flake8 --- youtube_dl/extractor/atresplayer.py | 31 ++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index ff72b1552..ff1083eb9 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -17,11 +17,14 @@ except ImportError: class AtresPlayerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/[^/_]+_(?P[A-z0-9]+)/?' + _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/' \ + r'[^/]+/[^/]+/[^/_]+_(?P[A-z0-9]+)/?' _NETRC_MACHINE = 'atresplayer' _TESTS = [ { - 'url': 'https://www.atresplayer.com/lasexta/programas/el-intermedio/temporada-12/el-intermedio-21-05-18_5b03068d7ed1a8a94b3faf29/', + 'url': 'https://www.atresplayer.com/lasexta/programas/el-' + 'intermedio/temporada-12/el-intermedio-21-05-18_' + '5b03068d7ed1a8a94b3faf29/', 'md5': '3afa3d3cc155264374916f2a23d1d00c', 'info_dict': { 'id': '5b03068d7ed1a8a94b3faf29', @@ -33,14 +36,19 @@ class AtresPlayerIE(InfoExtractor): 'skip': 'required_registered', }, { - 'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html', + 'url': 'http://www.atresplayer.com/television/series/' + 'el-secreto-de-puente-viejo/el-chico-de-los-' + 'tres-lunares/capitulo-977-29-12-14_' + '2014122400174.html', 'only_matching': True, }, ] _USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J' - _PLAYER_URL_TEMPLATE = 'https://api.atresplayer.com/client/v1/page/episode/%s' - _LOGIN_URL = 'https://api.atresplayer.com/login?redirect=https%3A%2F%2Fwww.atresplayer.com' + _PLAYER_URL_TEMPLATE = 'https://api.atresplayer.com/client/v1/page/' \ + 'episode/%s' + _LOGIN_URL = 'https://api.atresplayer.com/login?redirect=https%3A%2F%2F' \ + 'www.atresplayer.com' _LOGIN_ACCOUNT_URL = 'https://account.atresmedia.com/api/login' def _real_initialize(self): @@ -73,7 +81,8 @@ class AtresPlayerIE(InfoExtractor): else: raise else: - self._download_webpage(response['targetUrl'], None, 'Set login session') + self._download_webpage(response['targetUrl'], None, + 'Set login session') def _atres_player_error(self, body_response, original_exception): try: @@ -82,7 +91,8 @@ class AtresPlayerIE(InfoExtractor): return original_exception if isinstance(data, dict) and 'error' in data: return ExtractorError('{} returned error: {} ({})'.format( - self.IE_NAME, data['error'], data.get('error_description', 'There is no description') + self.IE_NAME, data['error'], data.get( + 'error_description', 'There is no description') ), expected=True) else: return original_exception @@ -93,7 +103,8 @@ class AtresPlayerIE(InfoExtractor): request = sanitized_Request( self._PLAYER_URL_TEMPLATE % video_id, headers={'User-Agent': self._USER_AGENT}) - player = self._download_json(request, video_id, 'Downloading player JSON') + player = self._download_json(request, video_id, + 'Downloading player JSON') formats = [] video_url = player.get('urlVideo') @@ -102,7 +113,9 @@ class AtresPlayerIE(InfoExtractor): video_url, headers={'User-Agent': self._USER_AGENT}) try: - video_data = self._download_json(request, video_id, 'Downloading video JSON', fatal=True) + video_data = self._download_json(request, video_id, + 'Downloading video JSON', + fatal=True) except ExtractorError as e: if len(e.exc_info) <= 1 or e.exc_info[1].code != 403: raise From e9986ce0f3d2faa0582533cd9facd6016bfaa8b5 Mon Sep 17 00:00:00 2001 From: Nekmo Date: Wed, 13 Jun 2018 00:12:31 +0200 Subject: [PATCH 11/12] Fixed Python2 support on Atresplayer. --- youtube_dl/extractor/atresplayer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index ff1083eb9..cf022b4aa 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -1,7 +1,11 @@ from __future__ import unicode_literals import json -from urllib.error import HTTPError + +try: + from urllib.error import HTTPError +except ImportError: + from urllib2 import HTTPError from .common import InfoExtractor from ..utils import ( From bbb857cc948d7ec88398d4b31383df77b69b3a53 Mon Sep 17 00:00:00 2001 From: Nekmo Date: Fri, 13 Jul 2018 20:07:31 +0200 Subject: [PATCH 12/12] Atresplayer PR changes. --- youtube_dl/extractor/atresplayer.py | 40 ++++++++++------------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py index b3f152f17..9be704e6b 100644 --- a/youtube_dl/extractor/atresplayer.py +++ b/youtube_dl/extractor/atresplayer.py @@ -1,11 +1,7 @@ from __future__ import unicode_literals import json - -try: - from urllib.error import HTTPError -except ImportError: - from urllib2 import HTTPError +from ..compat import compat_HTTPError from .common import InfoExtractor from ..utils import ( @@ -21,14 +17,11 @@ except ImportError: class AtresPlayerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/' \ - r'[^/]+/[^/]+/[^/_]+_(?P[A-z0-9]+)/?' + _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/[^/_]+_(?P[a-zA-Z0-9]+)' _NETRC_MACHINE = 'atresplayer' _TESTS = [ { - 'url': 'https://www.atresplayer.com/lasexta/programas/el-' - 'intermedio/temporada-12/el-intermedio-21-05-18_' - '5b03068d7ed1a8a94b3faf29/', + 'url': 'https://www.atresplayer.com/lasexta/programas/el-intermedio/temporada-12/el-intermedio-21-05-18_5b03068d7ed1a8a94b3faf29/', 'md5': '3afa3d3cc155264374916f2a23d1d00c', 'info_dict': { 'id': '5b03068d7ed1a8a94b3faf29', @@ -40,19 +33,14 @@ class AtresPlayerIE(InfoExtractor): 'skip': 'required_registered', }, { - 'url': 'http://www.atresplayer.com/television/series/' - 'el-secreto-de-puente-viejo/el-chico-de-los-' - 'tres-lunares/capitulo-977-29-12-14_' - '2014122400174.html', + 'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html', 'only_matching': True, }, ] _USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J' - _PLAYER_URL_TEMPLATE = 'https://api.atresplayer.com/client/v1/page/' \ - 'episode/%s' - _LOGIN_URL = 'https://api.atresplayer.com/login?redirect=https%3A%2F%2F' \ - 'www.atresplayer.com' + _PLAYER_URL_TEMPLATE = 'https://api.atresplayer.com/client/v1/page/episode/%s' + _LOGIN_URL = 'https://api.atresplayer.com/login?redirect=https%3A%2F%2Fwww.atresplayer.com' _LOGIN_ACCOUNT_URL = 'https://account.atresmedia.com/api/login' def _real_initialize(self): @@ -79,7 +67,7 @@ class AtresPlayerIE(InfoExtractor): response = self._download_json( request, None, 'post to login form') except ExtractorError as e: - if isinstance(e.cause, HTTPError): + if isinstance(e.cause, compat_HTTPError): raise self._atres_player_error(e.cause.file.read(), e) else: raise @@ -93,7 +81,7 @@ class AtresPlayerIE(InfoExtractor): except JSONDecodeError: return original_exception if isinstance(data, dict) and 'error' in data: - return ExtractorError('{} returned error: {} ({})'.format( + return ExtractorError('{0} returned error: {1} ({2})'.format( self.IE_NAME, data['error'], data.get( 'error_description', 'There is no description') ), expected=True) @@ -125,11 +113,11 @@ class AtresPlayerIE(InfoExtractor): raise self._atres_player_error(e.exc_info[1].file.read(), e) for source in video_data['sources']: - if source['type'] == "application/dash+xml": + if source.get('type') == 'application/dash+xml': formats.extend(self._extract_mpd_formats( source['src'], video_id, mpd_id='dash', fatal=False)) - elif source['type'] == "application/vnd.apple.mpegurl": + elif source.get('type') == 'application/vnd.apple.mpegurl': formats.extend(self._extract_m3u8_formats( source['src'], video_id, fatal=False)) @@ -138,9 +126,9 @@ class AtresPlayerIE(InfoExtractor): return { 'id': video_id, - 'title': video_data['titulo'], - 'description': video_data['descripcion'], - 'thumbnail': video_data['imgPoster'], - 'duration': video_data['duration'], + 'title': video_data.get('titulo'), + 'description': video_data.get('descripcion'), + 'thumbnail': video_data.get('imgPoster'), + 'duration': video_data.get('duration'), 'formats': formats, }