From 9266a718cb66d21a98eaf22b3b3bf8d064ca7813 Mon Sep 17 00:00:00 2001 From: vagnum08 Date: Sun, 18 Sep 2016 17:19:17 +0100 Subject: [PATCH 1/3] [Antenna] Add new extractor --- youtube_dl/extractor/antenna.py | 67 ++++++++++++++++++++++++++++++ youtube_dl/extractor/extractors.py | 1 + 2 files changed, 68 insertions(+) create mode 100644 youtube_dl/extractor/antenna.py diff --git a/youtube_dl/extractor/antenna.py b/youtube_dl/extractor/antenna.py new file mode 100644 index 000000000..25fc53b60 --- /dev/null +++ b/youtube_dl/extractor/antenna.py @@ -0,0 +1,67 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class AntennaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?antenna\.gr/webtv/watch\?cid=(?P[\w_%]+)(&p=[0-9]+)?' + _TESTS = [ + { + 'url': 'http://www.antenna.gr/webtv/watch?cid=otn_f_jvi5_e_z_i%3d&p=1', + 'info_dict': { + 'id': 'otn_f_jvi5_e_z_i%3d', + 'ext': 'mp4', + 'title': 'Αραχτοί και λάιτ (επεισ.16)', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': '"Αραχτές και... λάιτ" καταστάσεις στον Ant1.', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Content not available outside Greece.' + }, + { + 'url': 'http://www.antenna.gr/webtv/watch?cid=%2fn_u4_x_n79i_d_i%3d', + 'info_dict': { + 'id': '%2fn_u4_x_n79i_d_i%3d', + 'ext': 'mp4', + 'title': 'Της Ελλάδος τα παιδιά (επεισ.38 - Ιφιγένεια εν Τατοϊω)', + 'thumbnail': 're:^https?://.*\.jpg$', + 'description': 'Τρείς σμηνίτες, η κυρία Μπoυμπoύ και o Σμήvαρχoς τoυ γραφείoυ Κάκαλoς, συνθέτουν μια ' + 'από τις πιο αστείες συντροφιές της ελληνικής τηλεόρασης.', + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'Content not available outside Greece.' + }] + + def _real_extract(self, url): + error = 'Content not available outside Greece.' + desc = r'' + thumb = r'(.+?)' + formats = [] + link = 'http://www.antenna.gr/templates/data/jplayer?d=m&cid=' + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + rsspage = self._download_webpage(link + video_id, video_id) + title = self._og_search_title(webpage) or self._search_regex(r'(.+?)', rsspage, 'title')[9:-3] + video_url = self._search_regex(r'file="(.+?)"', rsspage, 'url') + if video_url == 'http://extranet.antenna.gr/flvsteaming/GR.flv': + raise ExtractorError('%s returned error: %s' % (self.IE_NAME, error), expected=True) + thumbnail = self._og_search_thumbnail(webpage) or self._search_regex(thumb, webpage, 'thumb', fatal=False) + description = self._og_search_description(webpage) or self._search_regex(desc, webpage, 'description', fatal=False) + formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + for video_format in formats: + if video_format.get('format_note') == 'Quality selection URL': + formats.remove(video_format) + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'thumbnail': thumbnail, + 'description': description, + } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8d9c2ae13..1098b72cd 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -38,6 +38,7 @@ from .alphaporno import AlphaPornoIE from .amcnetworks import AMCNetworksIE from .animeondemand import AnimeOnDemandIE from .anitube import AnitubeIE +from .antenna import AntennaIE from .anysex import AnySexIE from .aol import ( AolIE, From 21ddbcd7288b91dd594c6110603d0dc6b5aa3c9b Mon Sep 17 00:00:00 2001 From: vagnum08 Date: Mon, 19 Sep 2016 22:24:57 +0100 Subject: [PATCH 2/3] [Antenna] Added fallback methods for video extraction and error handling --- youtube_dl/extractor/antenna.py | 88 +++++++++++++++++++++++++-------- 1 file changed, 68 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/antenna.py b/youtube_dl/extractor/antenna.py index 25fc53b60..370c16ba5 100644 --- a/youtube_dl/extractor/antenna.py +++ b/youtube_dl/extractor/antenna.py @@ -6,16 +6,18 @@ from ..utils import ExtractorError class AntennaIE(InfoExtractor): + IE_NAME = 'antenna.gr' + IE_DESC = 'ANT1 WEB TV' _VALID_URL = r'https?://(?:www\.)?antenna\.gr/webtv/watch\?cid=(?P[\w_%]+)(&p=[0-9]+)?' _TESTS = [ { - 'url': 'http://www.antenna.gr/webtv/watch?cid=otn_f_jvi5_e_z_i%3d&p=1', + 'url': 'http://www.antenna.gr/webtv/watch?cid=jbq_kgua8_jw_a%3d&p=1', 'info_dict': { - 'id': 'otn_f_jvi5_e_z_i%3d', + 'id': 'jbq_kgua8_jw_a%3d', 'ext': 'mp4', - 'title': 'Αραχτοί και λάιτ (επεισ.16)', + 'title': 'ANT1 News 19-09-2016 στις 19:00 \r\n', 'thumbnail': 're:^https?://.*\.jpg$', - 'description': '"Αραχτές και... λάιτ" καταστάσεις στον Ant1.', + 'description': 'Μετά από αλλεπάλληλες αναβολές ξεκίνησε η δίκη για την τραγωδία της Marfin.', }, 'params': { 'skip_download': True, @@ -38,30 +40,76 @@ class AntennaIE(InfoExtractor): 'skip': 'Content not available outside Greece.' }] - def _real_extract(self, url): - error = 'Content not available outside Greece.' - desc = r'' - thumb = r'(.+?)' + class MediaSelectionError(Exception): + def __init__(self, error_id): + self.id = error_id + + def _raise_extractor_error(self, media_selection_error): + raise ExtractorError('{0} returned error: {1}'.format(self.IE_NAME, + media_selection_error.id), expected=True) + + def _extract_info_rss(self, video_id, webpage): formats = [] - link = 'http://www.antenna.gr/templates/data/jplayer?d=m&cid=' - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - rsspage = self._download_webpage(link + video_id, video_id) - title = self._og_search_title(webpage) or self._search_regex(r'(.+?)', rsspage, 'title')[9:-3] - video_url = self._search_regex(r'file="(.+?)"', rsspage, 'url') + jplayer_url = 'http://www.antenna.gr/templates/data/jplayer?d=m&cid=' + desc_re = r'' + rss = self._download_webpage(jplayer_url + video_id, video_id) + video_url = self._search_regex(r'file="(.+?)"', rss, 'url') if video_url == 'http://extranet.antenna.gr/flvsteaming/GR.flv': - raise ExtractorError('%s returned error: %s' % (self.IE_NAME, error), expected=True) - thumbnail = self._og_search_thumbnail(webpage) or self._search_regex(thumb, webpage, 'thumb', fatal=False) - description = self._og_search_description(webpage) or self._search_regex(desc, webpage, 'description', fatal=False) - formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) + raise AntennaIE.MediaSelectionError('Content not available outside Greece.') + formats.extend(self._extract_akamai_formats(video_url, video_id)) for video_format in formats: if video_format.get('format_note') == 'Quality selection URL': formats.remove(video_format) + if len(formats) == 0: + raise AntennaIE.MediaSelectionError('No formats available') + title = self._og_search_title(webpage).split(' | ')[-1] or self._search_regex(r'(.+?)', rss, + 'title')[9:-3] + thumbnail = self._og_search_thumbnail(webpage) + desc = self._og_search_description(webpage).split(' | ')[-1] or self._search_regex(desc_re, rss, 'description') + return { 'id': video_id, 'formats': formats, 'title': title, 'thumbnail': thumbnail, - 'description': description, + 'description': desc, } + + def _extract_info_json(self, video_id, webpage): + formats = [] + json_url = 'http://www.antenna.gr/templates/data/jsonPlayer?d=m&cid=' + meta = self._download_json(json_url + video_id, video_id) + manifest_url = meta.get('url') + if manifest_url == 'http://extranet.antenna.gr/flvsteaming/GR.mp4': + raise AntennaIE.MediaSelectionError('Content not available outside Greece.') + formats.extend(self._extract_akamai_formats(manifest_url, video_id)) + for video_format in formats: + if video_format.get('format_note') == 'Quality selection URL': + formats.remove(video_format) + if len(formats) == 0: + raise AntennaIE.MediaSelectionError('No formats available') + title = meta.get('title') or self._og_search_title(webpage) + thumbnail = meta.get('thumb') or self._og_search_thumbnail(webpage) + desc = self._html_search_meta('description', webpage) or self._og_search_description(webpage) + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'thumbnail': thumbnail, + 'description': desc.split(' | ')[-1], + } + + def _real_extract(self, url): + extractors = [self._extract_info_json, self._extract_info_rss] + last_exception = None + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + for extractor in extractors: + try: + return extractor(video_id, webpage) + except AntennaIE.MediaSelectionError as e: + if e.id in ('No formats available', 'Content not available outside Greece.'): + last_exception = e + continue + self._raise_extractor_error(e) + self._raise_extractor_error(last_exception) From 28e4a9e149a636023e287f823c9974603ac7bbf1 Mon Sep 17 00:00:00 2001 From: vagnum08 Date: Fri, 14 Oct 2016 15:37:49 +0100 Subject: [PATCH 3/3] pending-fixes Using `if not formats` instead of `if len(formats)==0` Not touching 'Quality Selection URL' --- youtube_dl/extractor/antenna.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/antenna.py b/youtube_dl/extractor/antenna.py index 370c16ba5..fb96f8af9 100644 --- a/youtube_dl/extractor/antenna.py +++ b/youtube_dl/extractor/antenna.py @@ -15,7 +15,7 @@ class AntennaIE(InfoExtractor): 'info_dict': { 'id': 'jbq_kgua8_jw_a%3d', 'ext': 'mp4', - 'title': 'ANT1 News 19-09-2016 στις 19:00 \r\n', + 'title': 'ANT1 News 19-09-2016 στις 19:00', 'thumbnail': 're:^https?://.*\.jpg$', 'description': 'Μετά από αλλεπάλληλες αναβολές ξεκίνησε η δίκη για την τραγωδία της Marfin.', }, @@ -57,20 +57,17 @@ class AntennaIE(InfoExtractor): if video_url == 'http://extranet.antenna.gr/flvsteaming/GR.flv': raise AntennaIE.MediaSelectionError('Content not available outside Greece.') formats.extend(self._extract_akamai_formats(video_url, video_id)) - for video_format in formats: - if video_format.get('format_note') == 'Quality selection URL': - formats.remove(video_format) - if len(formats) == 0: + if not formats: raise AntennaIE.MediaSelectionError('No formats available') title = self._og_search_title(webpage).split(' | ')[-1] or self._search_regex(r'(.+?)', rss, 'title')[9:-3] thumbnail = self._og_search_thumbnail(webpage) - desc = self._og_search_description(webpage).split(' | ')[-1] or self._search_regex(desc_re, rss, 'description') + desc = self._og_search_description(webpage).split(' | ')[-1] or self._search_regex(desc_re, rss, 'description', fatal=false) return { 'id': video_id, 'formats': formats, - 'title': title, + 'title': title.strip(), 'thumbnail': thumbnail, 'description': desc, } @@ -83,10 +80,7 @@ class AntennaIE(InfoExtractor): if manifest_url == 'http://extranet.antenna.gr/flvsteaming/GR.mp4': raise AntennaIE.MediaSelectionError('Content not available outside Greece.') formats.extend(self._extract_akamai_formats(manifest_url, video_id)) - for video_format in formats: - if video_format.get('format_note') == 'Quality selection URL': - formats.remove(video_format) - if len(formats) == 0: + if not formats: raise AntennaIE.MediaSelectionError('No formats available') title = meta.get('title') or self._og_search_title(webpage) thumbnail = meta.get('thumb') or self._og_search_thumbnail(webpage)