From 028cd9d1323bc196220f42eb652bc9d7a65476df Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Wed, 20 Feb 2019 22:42:48 +1100 Subject: [PATCH 01/26] Use default formats when it fails to find the video information. This allows us to get the thumbnails. --- youtube_dl/extractor/abc.py | 128 +++++++++++++++++++++++++++++------- 1 file changed, 104 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 4ac323bf6..e238de73a 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -132,7 +132,6 @@ class ABCIViewIE(InfoExtractor): video_params = self._download_json( 'https://iview.abc.net.au/api/programs/' + video_id, video_id) title = unescapeHTML(video_params.get('title') or video_params['seriesTitle']) - stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) house_number = video_params.get('episodeHouseNumber') or video_id path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format( @@ -143,30 +142,111 @@ class ABCIViewIE(InfoExtractor): token = self._download_webpage( 'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id) - def tokenize_url(url, token): - return update_url_query(url, { - 'hdnea': token, - }) + try: + stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) + + def tokenize_url(url, token): + return update_url_query(url, { + 'hdnea': token, + }) + + for sd in ('sd', 'sd-low'): + sd_url = try_get( + stream, lambda x: x['streams']['hls'][sd], compat_str) + if not sd_url: + continue + formats = self._extract_m3u8_formats( + tokenize_url(sd_url, token), video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + if formats: + break + self._sort_formats(formats) + + # import pprint + # pp = pprint.PrettyPrinter(indent=4) + # pp.pprint(formats) + + subtitles = {} + src_vtt = stream.get('captions', {}).get('src-vtt') + if src_vtt: + subtitles['en'] = [{ + 'url': src_vtt, + 'ext': 'vtt', + }] - for sd in ('sd', 'sd-low'): - sd_url = try_get( - stream, lambda x: x['streams']['hls'][sd], compat_str) - if not sd_url: - continue - formats = self._extract_m3u8_formats( - tokenize_url(sd_url, token), video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) - if formats: - break - self._sort_formats(formats) - - subtitles = {} - src_vtt = stream.get('captions', {}).get('src-vtt') - if src_vtt: - subtitles['en'] = [{ - 'url': src_vtt, - 'ext': 'vtt', - }] + except: + subtitles = {} + formats = [{ + u'acodec': u'mp4a.40.2', + u'ext': u'mp4', + u'format_id': u'hls-64', + u'fps': None, + u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', + u'preference': None, + u'protocol': u'm3u8_native', + u'tbr': 64.0, + u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_4_a.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', + u'vcodec': u'none'}, + { u'acodec': u'mp4a.40.2', + u'ext': u'mp4', + u'format_id': u'hls-234', + u'fps': None, + u'height': 180, + u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', + u'preference': None, + u'protocol': u'm3u8_native', + u'tbr': 234.0, + u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_4_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', + u'vcodec': u'avc1.77.30', + u'width': 320}, + { u'acodec': u'mp4a.40.2', + u'ext': u'mp4', + u'format_id': u'hls-508', + u'fps': None, + u'height': 288, + u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', + u'preference': None, + u'protocol': u'm3u8_native', + u'tbr': 508.0, + u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_3_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', + u'vcodec': u'avc1.77.30', + u'width': 512}, + { u'acodec': u'mp4a.40.2', + u'ext': u'mp4', + u'format_id': u'hls-630', + u'fps': None, + u'height': 360, + u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', + u'preference': None, + u'protocol': u'm3u8_native', + u'tbr': 630.0, + u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_2_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', + u'vcodec': u'avc1.77.30', + u'width': 640}, + { u'acodec': u'mp4a.40.2', + u'ext': u'mp4', + u'format_id': u'hls-993', + u'fps': None, + u'height': 450, + u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', + u'preference': None, + u'protocol': u'm3u8_native', + u'tbr': 993.0, + u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_1_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', + u'vcodec': u'avc1.640028', + u'width': 800}, + { u'acodec': u'mp4a.40.2', + u'ext': u'mp4', + u'format_id': u'hls-1458', + u'fps': None, + u'height': 576, + u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', + u'preference': None, + u'protocol': u'm3u8_native', + u'tbr': 1458.0, + u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_0_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', + u'vcodec': u'avc1.640028', + u'width': 1024}] is_live = video_params.get('livestream') == '1' if is_live: From 9cd5eb4487f9053250fc5c15e9a8f837189b7cda Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Wed, 27 Feb 2019 00:03:24 +1100 Subject: [PATCH 02/26] [abc.net.au:iview:show] Added extractor to get playlists from show urls. --- youtube_dl/extractor/abc.py | 190 +++++++++++++---------------- youtube_dl/extractor/extractors.py | 1 + 2 files changed, 84 insertions(+), 107 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index e238de73a..02cd719cc 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -131,8 +131,11 @@ class ABCIViewIE(InfoExtractor): video_id = self._match_id(url) video_params = self._download_json( 'https://iview.abc.net.au/api/programs/' + video_id, video_id) - title = unescapeHTML(video_params.get('title') or video_params['seriesTitle']) + return self._extract_from_video_params(video_id, video_params) + def _extract_from_video_params(self, video_id, video_params): + title = unescapeHTML(video_params.get('title') or video_params['seriesTitle']) + stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) house_number = video_params.get('episodeHouseNumber') or video_id path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format( int(time.time()), house_number) @@ -142,116 +145,32 @@ class ABCIViewIE(InfoExtractor): token = self._download_webpage( 'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id) - try: - stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) - - def tokenize_url(url, token): - return update_url_query(url, { - 'hdnea': token, - }) - - for sd in ('sd', 'sd-low'): - sd_url = try_get( - stream, lambda x: x['streams']['hls'][sd], compat_str) - if not sd_url: - continue - formats = self._extract_m3u8_formats( - tokenize_url(sd_url, token), video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) - if formats: - break - self._sort_formats(formats) - - # import pprint - # pp = pprint.PrettyPrinter(indent=4) - # pp.pprint(formats) - - subtitles = {} - src_vtt = stream.get('captions', {}).get('src-vtt') - if src_vtt: - subtitles['en'] = [{ - 'url': src_vtt, - 'ext': 'vtt', - }] - - except: - subtitles = {} - formats = [{ - u'acodec': u'mp4a.40.2', - u'ext': u'mp4', - u'format_id': u'hls-64', - u'fps': None, - u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', - u'preference': None, - u'protocol': u'm3u8_native', - u'tbr': 64.0, - u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_4_a.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', - u'vcodec': u'none'}, - { u'acodec': u'mp4a.40.2', - u'ext': u'mp4', - u'format_id': u'hls-234', - u'fps': None, - u'height': 180, - u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', - u'preference': None, - u'protocol': u'm3u8_native', - u'tbr': 234.0, - u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_4_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', - u'vcodec': u'avc1.77.30', - u'width': 320}, - { u'acodec': u'mp4a.40.2', - u'ext': u'mp4', - u'format_id': u'hls-508', - u'fps': None, - u'height': 288, - u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', - u'preference': None, - u'protocol': u'm3u8_native', - u'tbr': 508.0, - u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_3_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', - u'vcodec': u'avc1.77.30', - u'width': 512}, - { u'acodec': u'mp4a.40.2', - u'ext': u'mp4', - u'format_id': u'hls-630', - u'fps': None, - u'height': 360, - u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', - u'preference': None, - u'protocol': u'm3u8_native', - u'tbr': 630.0, - u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_2_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', - u'vcodec': u'avc1.77.30', - u'width': 640}, - { u'acodec': u'mp4a.40.2', - u'ext': u'mp4', - u'format_id': u'hls-993', - u'fps': None, - u'height': 450, - u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', - u'preference': None, - u'protocol': u'm3u8_native', - u'tbr': 993.0, - u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_1_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', - u'vcodec': u'avc1.640028', - u'width': 800}, - { u'acodec': u'mp4a.40.2', - u'ext': u'mp4', - u'format_id': u'hls-1458', - u'fps': None, - u'height': 576, - u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', - u'preference': None, - u'protocol': u'm3u8_native', - u'tbr': 1458.0, - u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_0_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', - u'vcodec': u'avc1.640028', - u'width': 1024}] + def tokenize_url(url, token): + return update_url_query(url, { + 'hdnea': token, + }) + for sd in ('sd', 'sd-low'): + sd_url = try_get( + stream, lambda x: x['streams']['hls'][sd], compat_str) + if not sd_url: + continue + formats = self._extract_m3u8_formats( + tokenize_url(sd_url, token), video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + if formats: + break + self._sort_formats(formats) + subtitles = {} + src_vtt = stream.get('captions', {}).get('src-vtt') + if src_vtt: + subtitles['en'] = [{ + 'url': src_vtt, + 'ext': 'vtt', + }] is_live = video_params.get('livestream') == '1' if is_live: title = self._live_title(title) - return { 'id': video_id, 'title': title, @@ -271,3 +190,60 @@ class ABCIViewIE(InfoExtractor): 'subtitles': subtitles, 'is_live': is_live, } + + +class ABCIViewShowIE(ABCIViewIE): + IE_NAME = 'abc.net.au:iview:show' + _VALID_URL = r'https?://iview\.abc\.net\.au/show/(?P[^/?#]+)' + + # ABC iview programs are normally available for 14 days only. + _TESTS = [{ + 'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00', + 'md5': 'cde42d728b3b7c2b32b1b94b4a548afc', + 'info_dict': { + 'id': 'ZX9371A050S00', + 'ext': 'mp4', + 'title': "Gaston's Birthday", + 'series': "Ben And Holly's Little Kingdom", + 'description': 'md5:f9de914d02f226968f598ac76f105bcf', + 'upload_date': '20180604', + 'uploader_id': 'abc4kids', + 'timestamp': 1528140219, + }, + 'params': { + 'skip_download': True, + }, + }] + + + def _real_extract(self, url): + show_id = self._match_id(url) + # This ends up getting the video_params for the initial entry + # However just taking the next episode data keeps the Downloading JSON metadata, webpage, m3u8 information + # more consistent. + show_params = self._download_json( + 'https://iview.abc.net.au/api/programs/' + show_id, show_id) + + next_href = show_params.get('nextEpisode').get('href') + seen_hrefs = set() + entries = [] + + while next_href and next_href not in seen_hrefs: + seen_hrefs.add(next_href) + + video_id = next_href.rsplit('/', 1)[-1] + video_params = self._download_json( + 'https://iview.abc.net.au/api/' + next_href, video_id) + entries.append(self._extract_from_video_params(video_id, video_params)) + + next_href = video_params.get('nextEpisode').get('href') + + return { + '_type': 'playlist', + 'title': entries[0].get('series'), + 'description': entries[0].get('description'), + 'uploader_id': entries[0].get('uploader_id'), + 'entries': entries + } + + diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d4583b8e4..1476a88d9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals from .abc import ( ABCIE, ABCIViewIE, + ABCIViewShowIE, ) from .abcnews import ( AbcNewsIE, From fb8f3bbdc506e969a98713366a7e57f6dadb6f0b Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Wed, 27 Feb 2019 00:24:38 +1100 Subject: [PATCH 03/26] [abc.net.au:iview:show] Fixed the _TESTS to actually work properly. --- youtube_dl/extractor/abc.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 02cd719cc..8b3c9f0cb 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -198,21 +198,13 @@ class ABCIViewShowIE(ABCIViewIE): # ABC iview programs are normally available for 14 days only. _TESTS = [{ - 'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00', - 'md5': 'cde42d728b3b7c2b32b1b94b4a548afc', + 'url': 'https://iview.abc.net.au/show/play-school-story-time', 'info_dict': { - 'id': 'ZX9371A050S00', - 'ext': 'mp4', - 'title': "Gaston's Birthday", - 'series': "Ben And Holly's Little Kingdom", - 'description': 'md5:f9de914d02f226968f598ac76f105bcf', - 'upload_date': '20180604', + 'title': "Play School Story Time", + 'description': 'md5:384ca6867e84e3aa2f5ef48e1b982e83', 'uploader_id': 'abc4kids', - 'timestamp': 1528140219, - }, - 'params': { - 'skip_download': True, }, + 'playlist_count': 17 }] From eb8a64e6b9d358a1655c6420d38ff4e69c91861b Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Wed, 27 Feb 2019 22:52:56 +1100 Subject: [PATCH 04/26] Get proper series info from https://iview.abc.net.au/api/series/{series_id}. Renamed ABCIViewShowIE to ABCIViewSeriesIE to match this new behaviour. --- youtube_dl/extractor/abc.py | 108 +++++++++++++++++++++++------ youtube_dl/extractor/extractors.py | 2 +- 2 files changed, 88 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 8b3c9f0cb..e48c7dae6 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -192,48 +192,114 @@ class ABCIViewIE(InfoExtractor): } -class ABCIViewShowIE(ABCIViewIE): +class ABCIViewSeriesIE(ABCIViewIE): IE_NAME = 'abc.net.au:iview:show' _VALID_URL = r'https?://iview\.abc\.net\.au/show/(?P[^/?#]+)' - # ABC iview programs are normally available for 14 days only. _TESTS = [{ + 'url': 'https://iview.abc.net.au/show/play-school-celebrity-covers', + 'info_dict': { + 'title': "Play School Celebrity Covers", + 'description': 'md5:5cf7b4e466b72ee1b930fc95b2a80ed7', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 31 + }, + { 'url': 'https://iview.abc.net.au/show/play-school-story-time', 'info_dict': { 'title': "Play School Story Time", - 'description': 'md5:384ca6867e84e3aa2f5ef48e1b982e83', + 'description': 'md5:2763b35f418d334d72e3d7f7fc7afb82', 'uploader_id': 'abc4kids', }, - 'playlist_count': 17 + 'playlist_count': 24 + }, + { + 'url': 'https://iview.abc.net.au/show/play-school-story-time-languages', + 'info_dict': { + 'title': "Play School Story Time: Languages", + 'description': 'md5:cca001fadcf1cb1508a9301c4fb0343a', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 5 + }, + { + 'url': 'https://iview.abc.net.au/show/big-teds-big-adventure', + 'info_dict': { + 'title': "Big Ted's Big Adventure", + 'description': 'md5:77f30f44f632f0f4d312e3b9af1869f6', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 20 + }, + { + 'url': 'https://iview.abc.net.au/show/humptys-big-adventure', + 'info_dict': { + 'title': "Humpty's Big Adventure", + 'description': 'md5:65c4335e1576ec92426f5d05a52c04f6', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 16 + }, + { + 'url': 'https://iview.abc.net.au/show/jemimas-big-adventure', + 'info_dict': { + 'title': "Jemima's Big Adventure", + 'description': 'md5:be79641bb70f329ca40b924c25a7f293', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 10 + }, + { + 'url': 'https://iview.abc.net.au/show/joeys-big-adventure', + 'info_dict': { + 'title': "Joey's Big Adventure", + 'description': 'md5:e3529b28bc25de54bceb96f0f4dbee7a', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 13 + }, + { + 'url': 'https://iview.abc.net.au/show/little-teds-big-adventure', + 'info_dict': { + 'title': "Little Ted's Big Adventure", + 'description': 'md5:8d064998070bfafeec142547ab48982c', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 20 + }, + { + 'url': 'https://iview.abc.net.au/show/maurices-big-adventure', + 'info_dict': { + 'title': "Maurice's Big Adventure", + 'description': 'md5:a41d7b9b0c87ef610c117a679f3efd5e', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 12 }] def _real_extract(self, url): - show_id = self._match_id(url) - # This ends up getting the video_params for the initial entry - # However just taking the next episode data keeps the Downloading JSON metadata, webpage, m3u8 information - # more consistent. - show_params = self._download_json( - 'https://iview.abc.net.au/api/programs/' + show_id, show_id) + series_id = self._match_id(url) + series_params = self._download_json( + 'https://iview.abc.net.au/api/series/' + series_id, series_id) - next_href = show_params.get('nextEpisode').get('href') - seen_hrefs = set() + # Sometimes the episodes are listed in reverse order, with the most recently uploaded ones first. + # This is normally for time-limited series. Others appear oldest to newest + episodes = series_params.get('episodes') entries = [] - while next_href and next_href not in seen_hrefs: - seen_hrefs.add(next_href) - - video_id = next_href.rsplit('/', 1)[-1] + for episode in episodes: + href = episode.get('href') + video_id = episode.get('episodeHouseNumber') video_params = self._download_json( - 'https://iview.abc.net.au/api/' + next_href, video_id) + 'https://iview.abc.net.au/api/' + href, video_id) entries.append(self._extract_from_video_params(video_id, video_params)) - next_href = video_params.get('nextEpisode').get('href') - return { '_type': 'playlist', - 'title': entries[0].get('series'), - 'description': entries[0].get('description'), + 'title': series_params.get('seriesTitle'), + 'description': series_params.get('seriesDescription'), 'uploader_id': entries[0].get('uploader_id'), 'entries': entries } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 1476a88d9..ad81c26e8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals from .abc import ( ABCIE, ABCIViewIE, - ABCIViewShowIE, + ABCIViewSeriesIE, ) from .abcnews import ( AbcNewsIE, From 9e2f67ee5fe2bd3d740c64d1e4493a63f5d80911 Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Thu, 28 Feb 2019 23:20:42 +1100 Subject: [PATCH 05/26] [abc.net.au:iview:series]: Playlist type changed to 'url'. This fixes issues where the incorrect extractor ID was used in the archive, allowing for two separate downloads (if you used both abc.net.au:iview and abc.net.au:iview:series). [abc.net.au:iview]: Added support for alternate episode url: https://iview.abc.net.au/programs// (as used by the series episode list href). --- youtube_dl/extractor/abc.py | 38 +++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index e48c7dae6..86171acc6 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -105,7 +105,7 @@ class ABCIE(InfoExtractor): class ABCIViewIE(InfoExtractor): IE_NAME = 'abc.net.au:iview' - _VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P[^/?#]+)' + _VALID_URL = r'https?://iview\.abc\.net\.au/((?:[^/]+/)*video/|programs/(?:[^/]+/))(?P[^/?#]+)' _GEO_COUNTRIES = ['AU'] # ABC iview programs are normally available for 14 days only. @@ -131,11 +131,9 @@ class ABCIViewIE(InfoExtractor): video_id = self._match_id(url) video_params = self._download_json( 'https://iview.abc.net.au/api/programs/' + video_id, video_id) - return self._extract_from_video_params(video_id, video_params) - - def _extract_from_video_params(self, video_id, video_params): title = unescapeHTML(video_params.get('title') or video_params['seriesTitle']) stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) + house_number = video_params.get('episodeHouseNumber') or video_id path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format( int(time.time()), house_number) @@ -161,6 +159,7 @@ class ABCIViewIE(InfoExtractor): if formats: break self._sort_formats(formats) + subtitles = {} src_vtt = stream.get('captions', {}).get('src-vtt') if src_vtt: @@ -168,9 +167,11 @@ class ABCIViewIE(InfoExtractor): 'url': src_vtt, 'ext': 'vtt', }] + is_live = video_params.get('livestream') == '1' if is_live: title = self._live_title(title) + return { 'id': video_id, 'title': title, @@ -193,7 +194,7 @@ class ABCIViewIE(InfoExtractor): class ABCIViewSeriesIE(ABCIViewIE): - IE_NAME = 'abc.net.au:iview:show' + IE_NAME = 'abc.net.au:iview:series' _VALID_URL = r'https?://iview\.abc\.net\.au/show/(?P[^/?#]+)' _TESTS = [{ @@ -290,11 +291,28 @@ class ABCIViewSeriesIE(ABCIViewIE): entries = [] for episode in episodes: - href = episode.get('href') - video_id = episode.get('episodeHouseNumber') - video_params = self._download_json( - 'https://iview.abc.net.au/api/' + href, video_id) - entries.append(self._extract_from_video_params(video_id, video_params)) + video_id = house_number = episode.get('episodeHouseNumber') + title = unescapeHTML(episode.get('title') or episode['seriesTitle']) + entry = { + '_type': 'url', + 'url': 'https://iview.abc.net.au/' + episode.get('href'), + 'ie_key': ABCIViewIE.ie_key(), + 'id': video_id, + 'title': episode.get('title'), + 'description': episode.get('description'), + 'thumbnail': episode.get('thumbnail'), + 'duration': int_or_none(episode.get('duration')), + 'timestamp': parse_iso8601(episode.get('pubDate'), ' '), + 'series': unescapeHTML(episode.get('seriesTitle')), + 'series_id': episode.get('seriesHouseNumber') or video_id[:7], + 'season_number': int_or_none(self._search_regex( + r'\bSeries\s+(\d+)\b', title, 'season number', default=None)), + 'episode_number': int_or_none(self._search_regex( + r'\bEp\s+(\d+)\b', title, 'episode number', default=None)), + 'episode_id': house_number, + 'uploader_id': episode.get('channel'), + } + entries.append(entry) return { '_type': 'playlist', From 0f0b162de3ea216d7e934f7c0a52bc0b97d70870 Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Fri, 1 Mar 2019 00:10:53 +1100 Subject: [PATCH 06/26] Cleanup _TESTS. Fixed flake8 complaints. --- youtube_dl/extractor/abc.py | 93 ++++--------------------------------- 1 file changed, 10 insertions(+), 83 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 86171acc6..054d18108 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -197,88 +197,17 @@ class ABCIViewSeriesIE(ABCIViewIE): IE_NAME = 'abc.net.au:iview:series' _VALID_URL = r'https?://iview\.abc\.net\.au/show/(?P[^/?#]+)' - _TESTS = [{ - 'url': 'https://iview.abc.net.au/show/play-school-celebrity-covers', - 'info_dict': { - 'title': "Play School Celebrity Covers", - 'description': 'md5:5cf7b4e466b72ee1b930fc95b2a80ed7', - 'uploader_id': 'abc4kids', + _TESTS = [ + { + 'url': 'https://iview.abc.net.au/show/play-school-story-time', + 'info_dict': { + 'title': "Play School Story Time", + 'description': 'md5:2763b35f418d334d72e3d7f7fc7afb82', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 24 }, - 'playlist_count': 31 - }, - { - 'url': 'https://iview.abc.net.au/show/play-school-story-time', - 'info_dict': { - 'title': "Play School Story Time", - 'description': 'md5:2763b35f418d334d72e3d7f7fc7afb82', - 'uploader_id': 'abc4kids', - }, - 'playlist_count': 24 - }, - { - 'url': 'https://iview.abc.net.au/show/play-school-story-time-languages', - 'info_dict': { - 'title': "Play School Story Time: Languages", - 'description': 'md5:cca001fadcf1cb1508a9301c4fb0343a', - 'uploader_id': 'abc4kids', - }, - 'playlist_count': 5 - }, - { - 'url': 'https://iview.abc.net.au/show/big-teds-big-adventure', - 'info_dict': { - 'title': "Big Ted's Big Adventure", - 'description': 'md5:77f30f44f632f0f4d312e3b9af1869f6', - 'uploader_id': 'abc4kids', - }, - 'playlist_count': 20 - }, - { - 'url': 'https://iview.abc.net.au/show/humptys-big-adventure', - 'info_dict': { - 'title': "Humpty's Big Adventure", - 'description': 'md5:65c4335e1576ec92426f5d05a52c04f6', - 'uploader_id': 'abc4kids', - }, - 'playlist_count': 16 - }, - { - 'url': 'https://iview.abc.net.au/show/jemimas-big-adventure', - 'info_dict': { - 'title': "Jemima's Big Adventure", - 'description': 'md5:be79641bb70f329ca40b924c25a7f293', - 'uploader_id': 'abc4kids', - }, - 'playlist_count': 10 - }, - { - 'url': 'https://iview.abc.net.au/show/joeys-big-adventure', - 'info_dict': { - 'title': "Joey's Big Adventure", - 'description': 'md5:e3529b28bc25de54bceb96f0f4dbee7a', - 'uploader_id': 'abc4kids', - }, - 'playlist_count': 13 - }, - { - 'url': 'https://iview.abc.net.au/show/little-teds-big-adventure', - 'info_dict': { - 'title': "Little Ted's Big Adventure", - 'description': 'md5:8d064998070bfafeec142547ab48982c', - 'uploader_id': 'abc4kids', - }, - 'playlist_count': 20 - }, - { - 'url': 'https://iview.abc.net.au/show/maurices-big-adventure', - 'info_dict': { - 'title': "Maurice's Big Adventure", - 'description': 'md5:a41d7b9b0c87ef610c117a679f3efd5e', - 'uploader_id': 'abc4kids', - }, - 'playlist_count': 12 - }] - + ] def _real_extract(self, url): series_id = self._match_id(url) @@ -321,5 +250,3 @@ class ABCIViewSeriesIE(ABCIViewIE): 'uploader_id': entries[0].get('uploader_id'), 'entries': entries } - - From 4ffdcbdccda96763a1102d03627d521cc41760fc Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Fri, 1 Mar 2019 23:45:40 +1100 Subject: [PATCH 07/26] Fixed up overlapping _VALID_URL between ABCIViewIE and ABCIViewSeriesIE. Added more _TESTS to exercise the additional _VALID_URL options. --- youtube_dl/extractor/abc.py | 58 +++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 054d18108..49e7848c9 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -105,22 +105,53 @@ class ABCIE(InfoExtractor): class ABCIViewIE(InfoExtractor): IE_NAME = 'abc.net.au:iview' - _VALID_URL = r'https?://iview\.abc\.net\.au/((?:[^/]+/)*video/|programs/(?:[^/]+/))(?P[^/?#]+)' + _VALID_URL = r'https?://iview\.abc\.net\.au/((?:[^/]+/)*video/|programs/(?:[^/]+/)*)(?P[A-Z0-9]+)' _GEO_COUNTRIES = ['AU'] # ABC iview programs are normally available for 14 days only. _TESTS = [{ - 'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00', - 'md5': 'cde42d728b3b7c2b32b1b94b4a548afc', + 'url': 'https://iview.abc.net.au/show/ben-and-holly-s-little-kingdom/series/0/video/ZX9371A011S00', 'info_dict': { - 'id': 'ZX9371A050S00', + 'id': 'ZX9371A011S00', 'ext': 'mp4', - 'title': "Gaston's Birthday", + 'title': "Dolly Plum", 'series': "Ben And Holly's Little Kingdom", - 'description': 'md5:f9de914d02f226968f598ac76f105bcf', - 'upload_date': '20180604', + 'description': 'md5:7d5d426d28814ac97b7c98e33f37193c', + 'upload_date': '20190301', 'uploader_id': 'abc4kids', - 'timestamp': 1528140219, + 'timestamp': 1551466585, + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'https://iview.abc.net.au/programs/CK1752H001S00', + 'info_dict': { + 'id': 'CK1752H001S00', + 'ext': 'mp4', + 'title': "Emma Alberici: Guess How Much I Love You (Italian)", + 'series': "Play School Story Time: Languages", + 'description': 'md5:b61ce34edc946e109e76c7432db5d91f', + 'upload_date': '20171204', + 'uploader_id': 'abc4kids', + 'timestamp': 1512414300, + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'https://iview.abc.net.au/programs/play-school-story-time-languages/CK1752H001S00', + 'info_dict': { + 'id': 'CK1752H001S00', + 'ext': 'mp4', + 'title': "Emma Alberici: Guess How Much I Love You (Italian)", + 'series': "Play School Story Time: Languages", + 'description': 'md5:b61ce34edc946e109e76c7432db5d91f', + 'upload_date': '20171204', + 'uploader_id': 'abc4kids', + 'timestamp': 1512414300, }, 'params': { 'skip_download': True, @@ -195,9 +226,18 @@ class ABCIViewIE(InfoExtractor): class ABCIViewSeriesIE(ABCIViewIE): IE_NAME = 'abc.net.au:iview:series' - _VALID_URL = r'https?://iview\.abc\.net\.au/show/(?P[^/?#]+)' + _VALID_URL = r'https?://iview\.abc\.net\.au/(show|programs)/(?P[a-z0-9\-]+)/?$' _TESTS = [ + { + 'url': 'https://iview.abc.net.au/programs/play-school-celebrity-covers', + 'info_dict': { + 'title': "Play School Celebrity Covers", + 'description': 'md5:5cf7b4e466b72ee1b930fc95b2a80ed7', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 31 + }, { 'url': 'https://iview.abc.net.au/show/play-school-story-time', 'info_dict': { From b9951a0b7131e981f7f84643ce850a63fa9d96b0 Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Thu, 25 Apr 2019 22:31:27 +1000 Subject: [PATCH 08/26] Override suitable so I can remove the trailing $ from _VALID_URL. --- youtube_dl/extractor/abc.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 49e7848c9..48ece8966 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -226,7 +226,7 @@ class ABCIViewIE(InfoExtractor): class ABCIViewSeriesIE(ABCIViewIE): IE_NAME = 'abc.net.au:iview:series' - _VALID_URL = r'https?://iview\.abc\.net\.au/(show|programs)/(?P[a-z0-9\-]+)/?$' + _VALID_URL = r'https?://iview\.abc\.net\.au/(show|programs)/(?P[a-z0-9\-]+)/?' _TESTS = [ { @@ -249,6 +249,10 @@ class ABCIViewSeriesIE(ABCIViewIE): }, ] + @classmethod + def suitable(cls, url): + return False if ABCIViewIE.suitable(url) else super(ABCIViewSeriesIE, cls).suitable(url) + def _real_extract(self, url): series_id = self._match_id(url) series_params = self._download_json( From 27d17e1f60ac057fe3298217c6c8f70145756779 Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Thu, 25 Apr 2019 23:16:09 +1000 Subject: [PATCH 09/26] Fixed all review comments. --- youtube_dl/extractor/abc.py | 72 ++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 48ece8966..e56a38503 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -105,7 +105,7 @@ class ABCIE(InfoExtractor): class ABCIViewIE(InfoExtractor): IE_NAME = 'abc.net.au:iview' - _VALID_URL = r'https?://iview\.abc\.net\.au/((?:[^/]+/)*video/|programs/(?:[^/]+/)*)(?P[A-Z0-9]+)' + _VALID_URL = r'https?://iview\.abc\.net\.au/(?:(?:[^/]+/)*video/|programs/(?:[^/]+/)*)(?P[^/?#]+)' _GEO_COUNTRIES = ['AU'] # ABC iview programs are normally available for 14 days only. @@ -158,6 +158,26 @@ class ABCIViewIE(InfoExtractor): }, }] + def _make_result(self, video_id, title, house_number, video_params, **kwargs): + res = { + 'id': video_id, + 'title': title, + 'description': video_params.get('description'), + 'thumbnail': video_params.get('thumbnail'), + 'duration': int_or_none(video_params.get('duration')), + 'timestamp': parse_iso8601(video_params.get('pubDate'), ' '), + 'series': unescapeHTML(video_params.get('seriesTitle')), + 'series_id': video_params.get('seriesHouseNumber') or video_id[:7], + 'season_number': int_or_none(self._search_regex( + r'\bSeries\s+(\d+)\b', title, 'season number', default=None)), + 'episode_number': int_or_none(self._search_regex( + r'\bEp\s+(\d+)\b', title, 'episode number', default=None)), + 'episode_id': house_number, + 'uploader_id': video_params.get('channel'), + } + res.update(**kwargs) + return res + def _real_extract(self, url): video_id = self._match_id(url) video_params = self._download_json( @@ -203,30 +223,15 @@ class ABCIViewIE(InfoExtractor): if is_live: title = self._live_title(title) - return { - 'id': video_id, - 'title': title, - 'description': video_params.get('description'), - 'thumbnail': video_params.get('thumbnail'), - 'duration': int_or_none(video_params.get('eventDuration')), - 'timestamp': parse_iso8601(video_params.get('pubDate'), ' '), - 'series': unescapeHTML(video_params.get('seriesTitle')), - 'series_id': video_params.get('seriesHouseNumber') or video_id[:7], - 'season_number': int_or_none(self._search_regex( - r'\bSeries\s+(\d+)\b', title, 'season number', default=None)), - 'episode_number': int_or_none(self._search_regex( - r'\bEp\s+(\d+)\b', title, 'episode number', default=None)), - 'episode_id': house_number, - 'uploader_id': video_params.get('channel'), - 'formats': formats, - 'subtitles': subtitles, - 'is_live': is_live, - } + return self._make_result(video_id, title, house_number, video_params, + formats=formats, + subtitles=subtitles, + is_live=is_live) class ABCIViewSeriesIE(ABCIViewIE): IE_NAME = 'abc.net.au:iview:series' - _VALID_URL = r'https?://iview\.abc\.net\.au/(show|programs)/(?P[a-z0-9\-]+)/?' + _VALID_URL = r'https?://iview\.abc\.net\.au/(show|programs)/(?P[^/?#]+)/?' _TESTS = [ { @@ -266,31 +271,16 @@ class ABCIViewSeriesIE(ABCIViewIE): for episode in episodes: video_id = house_number = episode.get('episodeHouseNumber') title = unescapeHTML(episode.get('title') or episode['seriesTitle']) - entry = { - '_type': 'url', - 'url': 'https://iview.abc.net.au/' + episode.get('href'), - 'ie_key': ABCIViewIE.ie_key(), - 'id': video_id, - 'title': episode.get('title'), - 'description': episode.get('description'), - 'thumbnail': episode.get('thumbnail'), - 'duration': int_or_none(episode.get('duration')), - 'timestamp': parse_iso8601(episode.get('pubDate'), ' '), - 'series': unescapeHTML(episode.get('seriesTitle')), - 'series_id': episode.get('seriesHouseNumber') or video_id[:7], - 'season_number': int_or_none(self._search_regex( - r'\bSeries\s+(\d+)\b', title, 'season number', default=None)), - 'episode_number': int_or_none(self._search_regex( - r'\bEp\s+(\d+)\b', title, 'episode number', default=None)), - 'episode_id': house_number, - 'uploader_id': episode.get('channel'), - } + entry = self._make_result(video_id, episode.get('title'), house_number, episode, + _type='url', + url='https://iview.abc.net.au/' + episode.get('href'), + ie_key=ABCIViewIE.ie_key()) entries.append(entry) return { '_type': 'playlist', 'title': series_params.get('seriesTitle'), 'description': series_params.get('seriesDescription'), - 'uploader_id': entries[0].get('uploader_id'), + 'uploader_id': entries[0].get('uploader_id') if entries else None, 'entries': entries } From 9efefe2f4a278750e66366e16a25a3b293febe2d Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Thu, 25 Apr 2019 23:38:04 +1000 Subject: [PATCH 10/26] Matched to Daniel Heath's earlier version of ABCIViewShowIE. --- youtube_dl/extractor/abc.py | 24 ++++++++++++++---------- youtube_dl/extractor/extractors.py | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index e56a38503..4c96941bf 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -229,8 +229,8 @@ class ABCIViewIE(InfoExtractor): is_live=is_live) -class ABCIViewSeriesIE(ABCIViewIE): - IE_NAME = 'abc.net.au:iview:series' +class ABCIViewShowIE(ABCIViewIE): + IE_NAME = 'abc.net.au:iview:show' _VALID_URL = r'https?://iview\.abc\.net\.au/(show|programs)/(?P[^/?#]+)/?' _TESTS = [ @@ -256,16 +256,19 @@ class ABCIViewSeriesIE(ABCIViewIE): @classmethod def suitable(cls, url): - return False if ABCIViewIE.suitable(url) else super(ABCIViewSeriesIE, cls).suitable(url) + return False if ABCIViewIE.suitable(url) else super(ABCIViewShowIE, cls).suitable(url) def _real_extract(self, url): - series_id = self._match_id(url) - series_params = self._download_json( - 'https://iview.abc.net.au/api/series/' + series_id, series_id) + show_id = self._match_id(url) + show_data = self._download_json( + 'https://iview.abc.net.au/api/series/' + show_id, show_id) + + title = show_data.get('seriesDescription') or show_id + description = show_data.get('seriesDescription') # Sometimes the episodes are listed in reverse order, with the most recently uploaded ones first. # This is normally for time-limited series. Others appear oldest to newest - episodes = series_params.get('episodes') + episodes = show_data.get('episodes') entries = [] for episode in episodes: @@ -279,8 +282,9 @@ class ABCIViewSeriesIE(ABCIViewIE): return { '_type': 'playlist', - 'title': series_params.get('seriesTitle'), - 'description': series_params.get('seriesDescription'), + 'id': show_id, + 'title': title, + 'description': description, 'uploader_id': entries[0].get('uploader_id') if entries else None, 'entries': entries - } + } \ No newline at end of file diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e91cf708a..326ccf828 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals from .abc import ( ABCIE, ABCIViewIE, - ABCIViewSeriesIE, + ABCIViewShowIE, ) from .abcnews import ( AbcNewsIE, From cfa689b1c967fcfe279dc4caafcf65d05e3ae4ae Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Wed, 20 Feb 2019 22:42:48 +1100 Subject: [PATCH 11/26] Use default formats when it fails to find the video information. This allows us to get the thumbnails. --- youtube_dl/extractor/abc.py | 128 +++++++++++++++++++++++++++++------- 1 file changed, 104 insertions(+), 24 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 4ac323bf6..e238de73a 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -132,7 +132,6 @@ class ABCIViewIE(InfoExtractor): video_params = self._download_json( 'https://iview.abc.net.au/api/programs/' + video_id, video_id) title = unescapeHTML(video_params.get('title') or video_params['seriesTitle']) - stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) house_number = video_params.get('episodeHouseNumber') or video_id path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format( @@ -143,30 +142,111 @@ class ABCIViewIE(InfoExtractor): token = self._download_webpage( 'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id) - def tokenize_url(url, token): - return update_url_query(url, { - 'hdnea': token, - }) + try: + stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) + + def tokenize_url(url, token): + return update_url_query(url, { + 'hdnea': token, + }) + + for sd in ('sd', 'sd-low'): + sd_url = try_get( + stream, lambda x: x['streams']['hls'][sd], compat_str) + if not sd_url: + continue + formats = self._extract_m3u8_formats( + tokenize_url(sd_url, token), video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + if formats: + break + self._sort_formats(formats) + + # import pprint + # pp = pprint.PrettyPrinter(indent=4) + # pp.pprint(formats) + + subtitles = {} + src_vtt = stream.get('captions', {}).get('src-vtt') + if src_vtt: + subtitles['en'] = [{ + 'url': src_vtt, + 'ext': 'vtt', + }] - for sd in ('sd', 'sd-low'): - sd_url = try_get( - stream, lambda x: x['streams']['hls'][sd], compat_str) - if not sd_url: - continue - formats = self._extract_m3u8_formats( - tokenize_url(sd_url, token), video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) - if formats: - break - self._sort_formats(formats) - - subtitles = {} - src_vtt = stream.get('captions', {}).get('src-vtt') - if src_vtt: - subtitles['en'] = [{ - 'url': src_vtt, - 'ext': 'vtt', - }] + except: + subtitles = {} + formats = [{ + u'acodec': u'mp4a.40.2', + u'ext': u'mp4', + u'format_id': u'hls-64', + u'fps': None, + u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', + u'preference': None, + u'protocol': u'm3u8_native', + u'tbr': 64.0, + u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_4_a.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', + u'vcodec': u'none'}, + { u'acodec': u'mp4a.40.2', + u'ext': u'mp4', + u'format_id': u'hls-234', + u'fps': None, + u'height': 180, + u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', + u'preference': None, + u'protocol': u'm3u8_native', + u'tbr': 234.0, + u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_4_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', + u'vcodec': u'avc1.77.30', + u'width': 320}, + { u'acodec': u'mp4a.40.2', + u'ext': u'mp4', + u'format_id': u'hls-508', + u'fps': None, + u'height': 288, + u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', + u'preference': None, + u'protocol': u'm3u8_native', + u'tbr': 508.0, + u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_3_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', + u'vcodec': u'avc1.77.30', + u'width': 512}, + { u'acodec': u'mp4a.40.2', + u'ext': u'mp4', + u'format_id': u'hls-630', + u'fps': None, + u'height': 360, + u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', + u'preference': None, + u'protocol': u'm3u8_native', + u'tbr': 630.0, + u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_2_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', + u'vcodec': u'avc1.77.30', + u'width': 640}, + { u'acodec': u'mp4a.40.2', + u'ext': u'mp4', + u'format_id': u'hls-993', + u'fps': None, + u'height': 450, + u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', + u'preference': None, + u'protocol': u'm3u8_native', + u'tbr': 993.0, + u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_1_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', + u'vcodec': u'avc1.640028', + u'width': 800}, + { u'acodec': u'mp4a.40.2', + u'ext': u'mp4', + u'format_id': u'hls-1458', + u'fps': None, + u'height': 576, + u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', + u'preference': None, + u'protocol': u'm3u8_native', + u'tbr': 1458.0, + u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_0_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', + u'vcodec': u'avc1.640028', + u'width': 1024}] is_live = video_params.get('livestream') == '1' if is_live: From 3959b31a4a1782d8778266064586d007e1a9a03a Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Wed, 27 Feb 2019 00:03:24 +1100 Subject: [PATCH 12/26] [abc.net.au:iview:show] Added extractor to get playlists from show urls. --- youtube_dl/extractor/abc.py | 190 +++++++++++++---------------- youtube_dl/extractor/extractors.py | 1 + 2 files changed, 84 insertions(+), 107 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index e238de73a..02cd719cc 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -131,8 +131,11 @@ class ABCIViewIE(InfoExtractor): video_id = self._match_id(url) video_params = self._download_json( 'https://iview.abc.net.au/api/programs/' + video_id, video_id) - title = unescapeHTML(video_params.get('title') or video_params['seriesTitle']) + return self._extract_from_video_params(video_id, video_params) + def _extract_from_video_params(self, video_id, video_params): + title = unescapeHTML(video_params.get('title') or video_params['seriesTitle']) + stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) house_number = video_params.get('episodeHouseNumber') or video_id path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format( int(time.time()), house_number) @@ -142,116 +145,32 @@ class ABCIViewIE(InfoExtractor): token = self._download_webpage( 'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id) - try: - stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) - - def tokenize_url(url, token): - return update_url_query(url, { - 'hdnea': token, - }) - - for sd in ('sd', 'sd-low'): - sd_url = try_get( - stream, lambda x: x['streams']['hls'][sd], compat_str) - if not sd_url: - continue - formats = self._extract_m3u8_formats( - tokenize_url(sd_url, token), video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) - if formats: - break - self._sort_formats(formats) - - # import pprint - # pp = pprint.PrettyPrinter(indent=4) - # pp.pprint(formats) - - subtitles = {} - src_vtt = stream.get('captions', {}).get('src-vtt') - if src_vtt: - subtitles['en'] = [{ - 'url': src_vtt, - 'ext': 'vtt', - }] - - except: - subtitles = {} - formats = [{ - u'acodec': u'mp4a.40.2', - u'ext': u'mp4', - u'format_id': u'hls-64', - u'fps': None, - u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', - u'preference': None, - u'protocol': u'm3u8_native', - u'tbr': 64.0, - u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_4_a.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', - u'vcodec': u'none'}, - { u'acodec': u'mp4a.40.2', - u'ext': u'mp4', - u'format_id': u'hls-234', - u'fps': None, - u'height': 180, - u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', - u'preference': None, - u'protocol': u'm3u8_native', - u'tbr': 234.0, - u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_4_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', - u'vcodec': u'avc1.77.30', - u'width': 320}, - { u'acodec': u'mp4a.40.2', - u'ext': u'mp4', - u'format_id': u'hls-508', - u'fps': None, - u'height': 288, - u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', - u'preference': None, - u'protocol': u'm3u8_native', - u'tbr': 508.0, - u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_3_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', - u'vcodec': u'avc1.77.30', - u'width': 512}, - { u'acodec': u'mp4a.40.2', - u'ext': u'mp4', - u'format_id': u'hls-630', - u'fps': None, - u'height': 360, - u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', - u'preference': None, - u'protocol': u'm3u8_native', - u'tbr': 630.0, - u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_2_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', - u'vcodec': u'avc1.77.30', - u'width': 640}, - { u'acodec': u'mp4a.40.2', - u'ext': u'mp4', - u'format_id': u'hls-993', - u'fps': None, - u'height': 450, - u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', - u'preference': None, - u'protocol': u'm3u8_native', - u'tbr': 993.0, - u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_1_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', - u'vcodec': u'avc1.640028', - u'width': 800}, - { u'acodec': u'mp4a.40.2', - u'ext': u'mp4', - u'format_id': u'hls-1458', - u'fps': None, - u'height': 576, - u'manifest_url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/master.m3u8?hdnea=st%3D1528898229%7Eexp%3D1528905429%7Eacl%3D%2F%2A%7Ehmac%3D7c0049dda233b54c3b960b3f56a00809756fe3d7cc69f53befcd1eca7a5eb44f', - u'preference': None, - u'protocol': u'm3u8_native', - u'tbr': 1458.0, - u'url': u'http://iviewhls-i.akamaihd.net/i/playback/_definst_/_video/ch1/CH1612H002S00MA1D1_20171215125703_,1500000,1000000,650000,500000,220000,.mp4.csmil/index_0_av.m3u8?null=0&id=AgDKn6iplmWlDfIiIVsJw%2fXW7PIsPqRgUMPQ978Sc8JvT18NlaqB9baSiasj4ERXPiwUGmBe0ROqCQ%3d%3d', - u'vcodec': u'avc1.640028', - u'width': 1024}] + def tokenize_url(url, token): + return update_url_query(url, { + 'hdnea': token, + }) + for sd in ('sd', 'sd-low'): + sd_url = try_get( + stream, lambda x: x['streams']['hls'][sd], compat_str) + if not sd_url: + continue + formats = self._extract_m3u8_formats( + tokenize_url(sd_url, token), video_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + if formats: + break + self._sort_formats(formats) + subtitles = {} + src_vtt = stream.get('captions', {}).get('src-vtt') + if src_vtt: + subtitles['en'] = [{ + 'url': src_vtt, + 'ext': 'vtt', + }] is_live = video_params.get('livestream') == '1' if is_live: title = self._live_title(title) - return { 'id': video_id, 'title': title, @@ -271,3 +190,60 @@ class ABCIViewIE(InfoExtractor): 'subtitles': subtitles, 'is_live': is_live, } + + +class ABCIViewShowIE(ABCIViewIE): + IE_NAME = 'abc.net.au:iview:show' + _VALID_URL = r'https?://iview\.abc\.net\.au/show/(?P[^/?#]+)' + + # ABC iview programs are normally available for 14 days only. + _TESTS = [{ + 'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00', + 'md5': 'cde42d728b3b7c2b32b1b94b4a548afc', + 'info_dict': { + 'id': 'ZX9371A050S00', + 'ext': 'mp4', + 'title': "Gaston's Birthday", + 'series': "Ben And Holly's Little Kingdom", + 'description': 'md5:f9de914d02f226968f598ac76f105bcf', + 'upload_date': '20180604', + 'uploader_id': 'abc4kids', + 'timestamp': 1528140219, + }, + 'params': { + 'skip_download': True, + }, + }] + + + def _real_extract(self, url): + show_id = self._match_id(url) + # This ends up getting the video_params for the initial entry + # However just taking the next episode data keeps the Downloading JSON metadata, webpage, m3u8 information + # more consistent. + show_params = self._download_json( + 'https://iview.abc.net.au/api/programs/' + show_id, show_id) + + next_href = show_params.get('nextEpisode').get('href') + seen_hrefs = set() + entries = [] + + while next_href and next_href not in seen_hrefs: + seen_hrefs.add(next_href) + + video_id = next_href.rsplit('/', 1)[-1] + video_params = self._download_json( + 'https://iview.abc.net.au/api/' + next_href, video_id) + entries.append(self._extract_from_video_params(video_id, video_params)) + + next_href = video_params.get('nextEpisode').get('href') + + return { + '_type': 'playlist', + 'title': entries[0].get('series'), + 'description': entries[0].get('description'), + 'uploader_id': entries[0].get('uploader_id'), + 'entries': entries + } + + diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 0e3ccb82d..5af39ac36 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals from .abc import ( ABCIE, ABCIViewIE, + ABCIViewShowIE, ) from .abcnews import ( AbcNewsIE, From cd6f026f8bbb84ec3c896acfe2c18a73cff2a5de Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Wed, 27 Feb 2019 00:24:38 +1100 Subject: [PATCH 13/26] [abc.net.au:iview:show] Fixed the _TESTS to actually work properly. --- youtube_dl/extractor/abc.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 02cd719cc..8b3c9f0cb 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -198,21 +198,13 @@ class ABCIViewShowIE(ABCIViewIE): # ABC iview programs are normally available for 14 days only. _TESTS = [{ - 'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00', - 'md5': 'cde42d728b3b7c2b32b1b94b4a548afc', + 'url': 'https://iview.abc.net.au/show/play-school-story-time', 'info_dict': { - 'id': 'ZX9371A050S00', - 'ext': 'mp4', - 'title': "Gaston's Birthday", - 'series': "Ben And Holly's Little Kingdom", - 'description': 'md5:f9de914d02f226968f598ac76f105bcf', - 'upload_date': '20180604', + 'title': "Play School Story Time", + 'description': 'md5:384ca6867e84e3aa2f5ef48e1b982e83', 'uploader_id': 'abc4kids', - 'timestamp': 1528140219, - }, - 'params': { - 'skip_download': True, }, + 'playlist_count': 17 }] From 78e9d5e873852f285ecfdeb1e949ac8b29cae272 Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Wed, 27 Feb 2019 22:52:56 +1100 Subject: [PATCH 14/26] Get proper series info from https://iview.abc.net.au/api/series/{series_id}. Renamed ABCIViewShowIE to ABCIViewSeriesIE to match this new behaviour. --- youtube_dl/extractor/abc.py | 108 +++++++++++++++++++++++------ youtube_dl/extractor/extractors.py | 2 +- 2 files changed, 88 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 8b3c9f0cb..e48c7dae6 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -192,48 +192,114 @@ class ABCIViewIE(InfoExtractor): } -class ABCIViewShowIE(ABCIViewIE): +class ABCIViewSeriesIE(ABCIViewIE): IE_NAME = 'abc.net.au:iview:show' _VALID_URL = r'https?://iview\.abc\.net\.au/show/(?P[^/?#]+)' - # ABC iview programs are normally available for 14 days only. _TESTS = [{ + 'url': 'https://iview.abc.net.au/show/play-school-celebrity-covers', + 'info_dict': { + 'title': "Play School Celebrity Covers", + 'description': 'md5:5cf7b4e466b72ee1b930fc95b2a80ed7', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 31 + }, + { 'url': 'https://iview.abc.net.au/show/play-school-story-time', 'info_dict': { 'title': "Play School Story Time", - 'description': 'md5:384ca6867e84e3aa2f5ef48e1b982e83', + 'description': 'md5:2763b35f418d334d72e3d7f7fc7afb82', 'uploader_id': 'abc4kids', }, - 'playlist_count': 17 + 'playlist_count': 24 + }, + { + 'url': 'https://iview.abc.net.au/show/play-school-story-time-languages', + 'info_dict': { + 'title': "Play School Story Time: Languages", + 'description': 'md5:cca001fadcf1cb1508a9301c4fb0343a', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 5 + }, + { + 'url': 'https://iview.abc.net.au/show/big-teds-big-adventure', + 'info_dict': { + 'title': "Big Ted's Big Adventure", + 'description': 'md5:77f30f44f632f0f4d312e3b9af1869f6', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 20 + }, + { + 'url': 'https://iview.abc.net.au/show/humptys-big-adventure', + 'info_dict': { + 'title': "Humpty's Big Adventure", + 'description': 'md5:65c4335e1576ec92426f5d05a52c04f6', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 16 + }, + { + 'url': 'https://iview.abc.net.au/show/jemimas-big-adventure', + 'info_dict': { + 'title': "Jemima's Big Adventure", + 'description': 'md5:be79641bb70f329ca40b924c25a7f293', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 10 + }, + { + 'url': 'https://iview.abc.net.au/show/joeys-big-adventure', + 'info_dict': { + 'title': "Joey's Big Adventure", + 'description': 'md5:e3529b28bc25de54bceb96f0f4dbee7a', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 13 + }, + { + 'url': 'https://iview.abc.net.au/show/little-teds-big-adventure', + 'info_dict': { + 'title': "Little Ted's Big Adventure", + 'description': 'md5:8d064998070bfafeec142547ab48982c', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 20 + }, + { + 'url': 'https://iview.abc.net.au/show/maurices-big-adventure', + 'info_dict': { + 'title': "Maurice's Big Adventure", + 'description': 'md5:a41d7b9b0c87ef610c117a679f3efd5e', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 12 }] def _real_extract(self, url): - show_id = self._match_id(url) - # This ends up getting the video_params for the initial entry - # However just taking the next episode data keeps the Downloading JSON metadata, webpage, m3u8 information - # more consistent. - show_params = self._download_json( - 'https://iview.abc.net.au/api/programs/' + show_id, show_id) + series_id = self._match_id(url) + series_params = self._download_json( + 'https://iview.abc.net.au/api/series/' + series_id, series_id) - next_href = show_params.get('nextEpisode').get('href') - seen_hrefs = set() + # Sometimes the episodes are listed in reverse order, with the most recently uploaded ones first. + # This is normally for time-limited series. Others appear oldest to newest + episodes = series_params.get('episodes') entries = [] - while next_href and next_href not in seen_hrefs: - seen_hrefs.add(next_href) - - video_id = next_href.rsplit('/', 1)[-1] + for episode in episodes: + href = episode.get('href') + video_id = episode.get('episodeHouseNumber') video_params = self._download_json( - 'https://iview.abc.net.au/api/' + next_href, video_id) + 'https://iview.abc.net.au/api/' + href, video_id) entries.append(self._extract_from_video_params(video_id, video_params)) - next_href = video_params.get('nextEpisode').get('href') - return { '_type': 'playlist', - 'title': entries[0].get('series'), - 'description': entries[0].get('description'), + 'title': series_params.get('seriesTitle'), + 'description': series_params.get('seriesDescription'), 'uploader_id': entries[0].get('uploader_id'), 'entries': entries } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 5af39ac36..dbc0e3436 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals from .abc import ( ABCIE, ABCIViewIE, - ABCIViewShowIE, + ABCIViewSeriesIE, ) from .abcnews import ( AbcNewsIE, From eaec61aa92c26be11ebe4a1c13400fa1e9441312 Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Thu, 28 Feb 2019 23:20:42 +1100 Subject: [PATCH 15/26] [abc.net.au:iview:series]: Playlist type changed to 'url'. This fixes issues where the incorrect extractor ID was used in the archive, allowing for two separate downloads (if you used both abc.net.au:iview and abc.net.au:iview:series). [abc.net.au:iview]: Added support for alternate episode url: https://iview.abc.net.au/programs// (as used by the series episode list href). --- youtube_dl/extractor/abc.py | 38 +++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index e48c7dae6..86171acc6 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -105,7 +105,7 @@ class ABCIE(InfoExtractor): class ABCIViewIE(InfoExtractor): IE_NAME = 'abc.net.au:iview' - _VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P[^/?#]+)' + _VALID_URL = r'https?://iview\.abc\.net\.au/((?:[^/]+/)*video/|programs/(?:[^/]+/))(?P[^/?#]+)' _GEO_COUNTRIES = ['AU'] # ABC iview programs are normally available for 14 days only. @@ -131,11 +131,9 @@ class ABCIViewIE(InfoExtractor): video_id = self._match_id(url) video_params = self._download_json( 'https://iview.abc.net.au/api/programs/' + video_id, video_id) - return self._extract_from_video_params(video_id, video_params) - - def _extract_from_video_params(self, video_id, video_params): title = unescapeHTML(video_params.get('title') or video_params['seriesTitle']) stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) + house_number = video_params.get('episodeHouseNumber') or video_id path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format( int(time.time()), house_number) @@ -161,6 +159,7 @@ class ABCIViewIE(InfoExtractor): if formats: break self._sort_formats(formats) + subtitles = {} src_vtt = stream.get('captions', {}).get('src-vtt') if src_vtt: @@ -168,9 +167,11 @@ class ABCIViewIE(InfoExtractor): 'url': src_vtt, 'ext': 'vtt', }] + is_live = video_params.get('livestream') == '1' if is_live: title = self._live_title(title) + return { 'id': video_id, 'title': title, @@ -193,7 +194,7 @@ class ABCIViewIE(InfoExtractor): class ABCIViewSeriesIE(ABCIViewIE): - IE_NAME = 'abc.net.au:iview:show' + IE_NAME = 'abc.net.au:iview:series' _VALID_URL = r'https?://iview\.abc\.net\.au/show/(?P[^/?#]+)' _TESTS = [{ @@ -290,11 +291,28 @@ class ABCIViewSeriesIE(ABCIViewIE): entries = [] for episode in episodes: - href = episode.get('href') - video_id = episode.get('episodeHouseNumber') - video_params = self._download_json( - 'https://iview.abc.net.au/api/' + href, video_id) - entries.append(self._extract_from_video_params(video_id, video_params)) + video_id = house_number = episode.get('episodeHouseNumber') + title = unescapeHTML(episode.get('title') or episode['seriesTitle']) + entry = { + '_type': 'url', + 'url': 'https://iview.abc.net.au/' + episode.get('href'), + 'ie_key': ABCIViewIE.ie_key(), + 'id': video_id, + 'title': episode.get('title'), + 'description': episode.get('description'), + 'thumbnail': episode.get('thumbnail'), + 'duration': int_or_none(episode.get('duration')), + 'timestamp': parse_iso8601(episode.get('pubDate'), ' '), + 'series': unescapeHTML(episode.get('seriesTitle')), + 'series_id': episode.get('seriesHouseNumber') or video_id[:7], + 'season_number': int_or_none(self._search_regex( + r'\bSeries\s+(\d+)\b', title, 'season number', default=None)), + 'episode_number': int_or_none(self._search_regex( + r'\bEp\s+(\d+)\b', title, 'episode number', default=None)), + 'episode_id': house_number, + 'uploader_id': episode.get('channel'), + } + entries.append(entry) return { '_type': 'playlist', From 4d6404e65401f269af8aee7661687b30320db82e Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Fri, 1 Mar 2019 00:10:53 +1100 Subject: [PATCH 16/26] Cleanup _TESTS. Fixed flake8 complaints. --- youtube_dl/extractor/abc.py | 93 ++++--------------------------------- 1 file changed, 10 insertions(+), 83 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 86171acc6..054d18108 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -197,88 +197,17 @@ class ABCIViewSeriesIE(ABCIViewIE): IE_NAME = 'abc.net.au:iview:series' _VALID_URL = r'https?://iview\.abc\.net\.au/show/(?P[^/?#]+)' - _TESTS = [{ - 'url': 'https://iview.abc.net.au/show/play-school-celebrity-covers', - 'info_dict': { - 'title': "Play School Celebrity Covers", - 'description': 'md5:5cf7b4e466b72ee1b930fc95b2a80ed7', - 'uploader_id': 'abc4kids', + _TESTS = [ + { + 'url': 'https://iview.abc.net.au/show/play-school-story-time', + 'info_dict': { + 'title': "Play School Story Time", + 'description': 'md5:2763b35f418d334d72e3d7f7fc7afb82', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 24 }, - 'playlist_count': 31 - }, - { - 'url': 'https://iview.abc.net.au/show/play-school-story-time', - 'info_dict': { - 'title': "Play School Story Time", - 'description': 'md5:2763b35f418d334d72e3d7f7fc7afb82', - 'uploader_id': 'abc4kids', - }, - 'playlist_count': 24 - }, - { - 'url': 'https://iview.abc.net.au/show/play-school-story-time-languages', - 'info_dict': { - 'title': "Play School Story Time: Languages", - 'description': 'md5:cca001fadcf1cb1508a9301c4fb0343a', - 'uploader_id': 'abc4kids', - }, - 'playlist_count': 5 - }, - { - 'url': 'https://iview.abc.net.au/show/big-teds-big-adventure', - 'info_dict': { - 'title': "Big Ted's Big Adventure", - 'description': 'md5:77f30f44f632f0f4d312e3b9af1869f6', - 'uploader_id': 'abc4kids', - }, - 'playlist_count': 20 - }, - { - 'url': 'https://iview.abc.net.au/show/humptys-big-adventure', - 'info_dict': { - 'title': "Humpty's Big Adventure", - 'description': 'md5:65c4335e1576ec92426f5d05a52c04f6', - 'uploader_id': 'abc4kids', - }, - 'playlist_count': 16 - }, - { - 'url': 'https://iview.abc.net.au/show/jemimas-big-adventure', - 'info_dict': { - 'title': "Jemima's Big Adventure", - 'description': 'md5:be79641bb70f329ca40b924c25a7f293', - 'uploader_id': 'abc4kids', - }, - 'playlist_count': 10 - }, - { - 'url': 'https://iview.abc.net.au/show/joeys-big-adventure', - 'info_dict': { - 'title': "Joey's Big Adventure", - 'description': 'md5:e3529b28bc25de54bceb96f0f4dbee7a', - 'uploader_id': 'abc4kids', - }, - 'playlist_count': 13 - }, - { - 'url': 'https://iview.abc.net.au/show/little-teds-big-adventure', - 'info_dict': { - 'title': "Little Ted's Big Adventure", - 'description': 'md5:8d064998070bfafeec142547ab48982c', - 'uploader_id': 'abc4kids', - }, - 'playlist_count': 20 - }, - { - 'url': 'https://iview.abc.net.au/show/maurices-big-adventure', - 'info_dict': { - 'title': "Maurice's Big Adventure", - 'description': 'md5:a41d7b9b0c87ef610c117a679f3efd5e', - 'uploader_id': 'abc4kids', - }, - 'playlist_count': 12 - }] - + ] def _real_extract(self, url): series_id = self._match_id(url) @@ -321,5 +250,3 @@ class ABCIViewSeriesIE(ABCIViewIE): 'uploader_id': entries[0].get('uploader_id'), 'entries': entries } - - From c322a6f62e8cb63efe2cb7c2690cd14b0ab681b3 Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Fri, 1 Mar 2019 23:45:40 +1100 Subject: [PATCH 17/26] Fixed up overlapping _VALID_URL between ABCIViewIE and ABCIViewSeriesIE. Added more _TESTS to exercise the additional _VALID_URL options. --- youtube_dl/extractor/abc.py | 58 +++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 054d18108..49e7848c9 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -105,22 +105,53 @@ class ABCIE(InfoExtractor): class ABCIViewIE(InfoExtractor): IE_NAME = 'abc.net.au:iview' - _VALID_URL = r'https?://iview\.abc\.net\.au/((?:[^/]+/)*video/|programs/(?:[^/]+/))(?P[^/?#]+)' + _VALID_URL = r'https?://iview\.abc\.net\.au/((?:[^/]+/)*video/|programs/(?:[^/]+/)*)(?P[A-Z0-9]+)' _GEO_COUNTRIES = ['AU'] # ABC iview programs are normally available for 14 days only. _TESTS = [{ - 'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00', - 'md5': 'cde42d728b3b7c2b32b1b94b4a548afc', + 'url': 'https://iview.abc.net.au/show/ben-and-holly-s-little-kingdom/series/0/video/ZX9371A011S00', 'info_dict': { - 'id': 'ZX9371A050S00', + 'id': 'ZX9371A011S00', 'ext': 'mp4', - 'title': "Gaston's Birthday", + 'title': "Dolly Plum", 'series': "Ben And Holly's Little Kingdom", - 'description': 'md5:f9de914d02f226968f598ac76f105bcf', - 'upload_date': '20180604', + 'description': 'md5:7d5d426d28814ac97b7c98e33f37193c', + 'upload_date': '20190301', 'uploader_id': 'abc4kids', - 'timestamp': 1528140219, + 'timestamp': 1551466585, + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'https://iview.abc.net.au/programs/CK1752H001S00', + 'info_dict': { + 'id': 'CK1752H001S00', + 'ext': 'mp4', + 'title': "Emma Alberici: Guess How Much I Love You (Italian)", + 'series': "Play School Story Time: Languages", + 'description': 'md5:b61ce34edc946e109e76c7432db5d91f', + 'upload_date': '20171204', + 'uploader_id': 'abc4kids', + 'timestamp': 1512414300, + }, + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'https://iview.abc.net.au/programs/play-school-story-time-languages/CK1752H001S00', + 'info_dict': { + 'id': 'CK1752H001S00', + 'ext': 'mp4', + 'title': "Emma Alberici: Guess How Much I Love You (Italian)", + 'series': "Play School Story Time: Languages", + 'description': 'md5:b61ce34edc946e109e76c7432db5d91f', + 'upload_date': '20171204', + 'uploader_id': 'abc4kids', + 'timestamp': 1512414300, }, 'params': { 'skip_download': True, @@ -195,9 +226,18 @@ class ABCIViewIE(InfoExtractor): class ABCIViewSeriesIE(ABCIViewIE): IE_NAME = 'abc.net.au:iview:series' - _VALID_URL = r'https?://iview\.abc\.net\.au/show/(?P[^/?#]+)' + _VALID_URL = r'https?://iview\.abc\.net\.au/(show|programs)/(?P[a-z0-9\-]+)/?$' _TESTS = [ + { + 'url': 'https://iview.abc.net.au/programs/play-school-celebrity-covers', + 'info_dict': { + 'title': "Play School Celebrity Covers", + 'description': 'md5:5cf7b4e466b72ee1b930fc95b2a80ed7', + 'uploader_id': 'abc4kids', + }, + 'playlist_count': 31 + }, { 'url': 'https://iview.abc.net.au/show/play-school-story-time', 'info_dict': { From 786079f4d2720a9931b004db85123df196762c3e Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Thu, 25 Apr 2019 22:31:27 +1000 Subject: [PATCH 18/26] Override suitable so I can remove the trailing $ from _VALID_URL. --- youtube_dl/extractor/abc.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 49e7848c9..48ece8966 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -226,7 +226,7 @@ class ABCIViewIE(InfoExtractor): class ABCIViewSeriesIE(ABCIViewIE): IE_NAME = 'abc.net.au:iview:series' - _VALID_URL = r'https?://iview\.abc\.net\.au/(show|programs)/(?P[a-z0-9\-]+)/?$' + _VALID_URL = r'https?://iview\.abc\.net\.au/(show|programs)/(?P[a-z0-9\-]+)/?' _TESTS = [ { @@ -249,6 +249,10 @@ class ABCIViewSeriesIE(ABCIViewIE): }, ] + @classmethod + def suitable(cls, url): + return False if ABCIViewIE.suitable(url) else super(ABCIViewSeriesIE, cls).suitable(url) + def _real_extract(self, url): series_id = self._match_id(url) series_params = self._download_json( From 5e8d882b1930c04b0691cc2e7a287ffaa525e314 Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Thu, 25 Apr 2019 23:16:09 +1000 Subject: [PATCH 19/26] Fixed all review comments. --- youtube_dl/extractor/abc.py | 72 ++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 41 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 48ece8966..e56a38503 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -105,7 +105,7 @@ class ABCIE(InfoExtractor): class ABCIViewIE(InfoExtractor): IE_NAME = 'abc.net.au:iview' - _VALID_URL = r'https?://iview\.abc\.net\.au/((?:[^/]+/)*video/|programs/(?:[^/]+/)*)(?P[A-Z0-9]+)' + _VALID_URL = r'https?://iview\.abc\.net\.au/(?:(?:[^/]+/)*video/|programs/(?:[^/]+/)*)(?P[^/?#]+)' _GEO_COUNTRIES = ['AU'] # ABC iview programs are normally available for 14 days only. @@ -158,6 +158,26 @@ class ABCIViewIE(InfoExtractor): }, }] + def _make_result(self, video_id, title, house_number, video_params, **kwargs): + res = { + 'id': video_id, + 'title': title, + 'description': video_params.get('description'), + 'thumbnail': video_params.get('thumbnail'), + 'duration': int_or_none(video_params.get('duration')), + 'timestamp': parse_iso8601(video_params.get('pubDate'), ' '), + 'series': unescapeHTML(video_params.get('seriesTitle')), + 'series_id': video_params.get('seriesHouseNumber') or video_id[:7], + 'season_number': int_or_none(self._search_regex( + r'\bSeries\s+(\d+)\b', title, 'season number', default=None)), + 'episode_number': int_or_none(self._search_regex( + r'\bEp\s+(\d+)\b', title, 'episode number', default=None)), + 'episode_id': house_number, + 'uploader_id': video_params.get('channel'), + } + res.update(**kwargs) + return res + def _real_extract(self, url): video_id = self._match_id(url) video_params = self._download_json( @@ -203,30 +223,15 @@ class ABCIViewIE(InfoExtractor): if is_live: title = self._live_title(title) - return { - 'id': video_id, - 'title': title, - 'description': video_params.get('description'), - 'thumbnail': video_params.get('thumbnail'), - 'duration': int_or_none(video_params.get('eventDuration')), - 'timestamp': parse_iso8601(video_params.get('pubDate'), ' '), - 'series': unescapeHTML(video_params.get('seriesTitle')), - 'series_id': video_params.get('seriesHouseNumber') or video_id[:7], - 'season_number': int_or_none(self._search_regex( - r'\bSeries\s+(\d+)\b', title, 'season number', default=None)), - 'episode_number': int_or_none(self._search_regex( - r'\bEp\s+(\d+)\b', title, 'episode number', default=None)), - 'episode_id': house_number, - 'uploader_id': video_params.get('channel'), - 'formats': formats, - 'subtitles': subtitles, - 'is_live': is_live, - } + return self._make_result(video_id, title, house_number, video_params, + formats=formats, + subtitles=subtitles, + is_live=is_live) class ABCIViewSeriesIE(ABCIViewIE): IE_NAME = 'abc.net.au:iview:series' - _VALID_URL = r'https?://iview\.abc\.net\.au/(show|programs)/(?P[a-z0-9\-]+)/?' + _VALID_URL = r'https?://iview\.abc\.net\.au/(show|programs)/(?P[^/?#]+)/?' _TESTS = [ { @@ -266,31 +271,16 @@ class ABCIViewSeriesIE(ABCIViewIE): for episode in episodes: video_id = house_number = episode.get('episodeHouseNumber') title = unescapeHTML(episode.get('title') or episode['seriesTitle']) - entry = { - '_type': 'url', - 'url': 'https://iview.abc.net.au/' + episode.get('href'), - 'ie_key': ABCIViewIE.ie_key(), - 'id': video_id, - 'title': episode.get('title'), - 'description': episode.get('description'), - 'thumbnail': episode.get('thumbnail'), - 'duration': int_or_none(episode.get('duration')), - 'timestamp': parse_iso8601(episode.get('pubDate'), ' '), - 'series': unescapeHTML(episode.get('seriesTitle')), - 'series_id': episode.get('seriesHouseNumber') or video_id[:7], - 'season_number': int_or_none(self._search_regex( - r'\bSeries\s+(\d+)\b', title, 'season number', default=None)), - 'episode_number': int_or_none(self._search_regex( - r'\bEp\s+(\d+)\b', title, 'episode number', default=None)), - 'episode_id': house_number, - 'uploader_id': episode.get('channel'), - } + entry = self._make_result(video_id, episode.get('title'), house_number, episode, + _type='url', + url='https://iview.abc.net.au/' + episode.get('href'), + ie_key=ABCIViewIE.ie_key()) entries.append(entry) return { '_type': 'playlist', 'title': series_params.get('seriesTitle'), 'description': series_params.get('seriesDescription'), - 'uploader_id': entries[0].get('uploader_id'), + 'uploader_id': entries[0].get('uploader_id') if entries else None, 'entries': entries } From 2b90c649bbdf100b53732767ea096819d76f3ad9 Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Thu, 25 Apr 2019 23:38:04 +1000 Subject: [PATCH 20/26] Matched to Daniel Heath's earlier version of ABCIViewShowIE. --- youtube_dl/extractor/abc.py | 24 ++++++++++++++---------- youtube_dl/extractor/extractors.py | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index e56a38503..4c96941bf 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -229,8 +229,8 @@ class ABCIViewIE(InfoExtractor): is_live=is_live) -class ABCIViewSeriesIE(ABCIViewIE): - IE_NAME = 'abc.net.au:iview:series' +class ABCIViewShowIE(ABCIViewIE): + IE_NAME = 'abc.net.au:iview:show' _VALID_URL = r'https?://iview\.abc\.net\.au/(show|programs)/(?P[^/?#]+)/?' _TESTS = [ @@ -256,16 +256,19 @@ class ABCIViewSeriesIE(ABCIViewIE): @classmethod def suitable(cls, url): - return False if ABCIViewIE.suitable(url) else super(ABCIViewSeriesIE, cls).suitable(url) + return False if ABCIViewIE.suitable(url) else super(ABCIViewShowIE, cls).suitable(url) def _real_extract(self, url): - series_id = self._match_id(url) - series_params = self._download_json( - 'https://iview.abc.net.au/api/series/' + series_id, series_id) + show_id = self._match_id(url) + show_data = self._download_json( + 'https://iview.abc.net.au/api/series/' + show_id, show_id) + + title = show_data.get('seriesDescription') or show_id + description = show_data.get('seriesDescription') # Sometimes the episodes are listed in reverse order, with the most recently uploaded ones first. # This is normally for time-limited series. Others appear oldest to newest - episodes = series_params.get('episodes') + episodes = show_data.get('episodes') entries = [] for episode in episodes: @@ -279,8 +282,9 @@ class ABCIViewSeriesIE(ABCIViewIE): return { '_type': 'playlist', - 'title': series_params.get('seriesTitle'), - 'description': series_params.get('seriesDescription'), + 'id': show_id, + 'title': title, + 'description': description, 'uploader_id': entries[0].get('uploader_id') if entries else None, 'entries': entries - } + } \ No newline at end of file diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index dbc0e3436..5af39ac36 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals from .abc import ( ABCIE, ABCIViewIE, - ABCIViewSeriesIE, + ABCIViewShowIE, ) from .abcnews import ( AbcNewsIE, From ccf7ed236f86a187dbda3994b885fe64300ca7d4 Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Thu, 25 Apr 2019 23:56:06 +1000 Subject: [PATCH 21/26] Re-added restrictions to the id expression until the following can be otherwise distinguished: https://iview.abc.net.au/programs/play-school-celebrity-covers https://iview.abc.net.au/programs/CK1752H001S00 --- youtube_dl/extractor/abc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 4c96941bf..458388e65 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -105,7 +105,7 @@ class ABCIE(InfoExtractor): class ABCIViewIE(InfoExtractor): IE_NAME = 'abc.net.au:iview' - _VALID_URL = r'https?://iview\.abc\.net\.au/(?:(?:[^/]+/)*video/|programs/(?:[^/]+/)*)(?P[^/?#]+)' + _VALID_URL = r'https?://iview\.abc\.net\.au/(?:(?:[^/]+/)*video/|programs/(?:[^/]+/)*)(?P[A-Z0-9]+)' _GEO_COUNTRIES = ['AU'] # ABC iview programs are normally available for 14 days only. @@ -231,7 +231,7 @@ class ABCIViewIE(InfoExtractor): class ABCIViewShowIE(ABCIViewIE): IE_NAME = 'abc.net.au:iview:show' - _VALID_URL = r'https?://iview\.abc\.net\.au/(show|programs)/(?P[^/?#]+)/?' + _VALID_URL = r'https?://iview\.abc\.net\.au/(?:show|programs)/(?P[a-z0-9\-]+)/?' _TESTS = [ { From 5305e1ece505a8d7d0fe3665455196bcf6d9340b Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Fri, 26 Apr 2019 00:21:14 +1000 Subject: [PATCH 22/26] Fixed mixup between title and episode_title. --- youtube_dl/extractor/abc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 458388e65..f5f5f7bea 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -273,8 +273,8 @@ class ABCIViewShowIE(ABCIViewIE): for episode in episodes: video_id = house_number = episode.get('episodeHouseNumber') - title = unescapeHTML(episode.get('title') or episode['seriesTitle']) - entry = self._make_result(video_id, episode.get('title'), house_number, episode, + episode_title = unescapeHTML(episode.get('title') or episode['seriesTitle']) + entry = self._make_result(video_id, episode_title, house_number, episode, _type='url', url='https://iview.abc.net.au/' + episode.get('href'), ie_key=ABCIViewIE.ie_key()) From c43eb6453982a0336279dc4c6c3d2e61ee67fbe3 Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Fri, 26 Apr 2019 00:30:51 +1000 Subject: [PATCH 23/26] Fixed title to use 'seriesTitle' instead of incorrectly using 'seriesDescription' --- youtube_dl/extractor/abc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index f5f5f7bea..22bb4ba8e 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -263,7 +263,7 @@ class ABCIViewShowIE(ABCIViewIE): show_data = self._download_json( 'https://iview.abc.net.au/api/series/' + show_id, show_id) - title = show_data.get('seriesDescription') or show_id + title = show_data.get('seriesTitle') or show_id description = show_data.get('seriesDescription') # Sometimes the episodes are listed in reverse order, with the most recently uploaded ones first. From 2e0f5efcf145ab32d6a2848c7f385b76b136c974 Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Sat, 27 Apr 2019 22:30:54 +1000 Subject: [PATCH 24/26] Improved error message when 'playlist' is missing from video_params. Error messaged will use the 'statusMessage', if available. This will give messages like: * "This episode is no longer available in iview." * "This episode is not yet available in iview." These additional lines are removed from the 'statusMessage': "Programs are normally available for 14 days." "You may enjoy similar programs to this, shown below." "Learn more about program availability in the FAQs, under Support." --- youtube_dl/extractor/abc.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 22bb4ba8e..1542e3503 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -182,6 +182,12 @@ class ABCIViewIE(InfoExtractor): video_id = self._match_id(url) video_params = self._download_json( 'https://iview.abc.net.au/api/programs/' + video_id, video_id) + if 'playlist' not in video_params: + error_message = video_params.get('statusMessage') + if error_message: + error_message = error_message.splitlines()[0] + raise ExtractorError(error_message, expected=True) + raise ExtractorError('no playlist information found in video params') title = unescapeHTML(video_params.get('title') or video_params['seriesTitle']) stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream')) From 21221d0d923bde5796a2b715fa0987511410226c Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Thu, 2 May 2019 23:22:37 +1000 Subject: [PATCH 25/26] Only match on unambiguous urls - this removes the overly restrictive . urls containing programs// are still used since they are returned by the series json info. --- youtube_dl/extractor/abc.py | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 1542e3503..29d284086 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -105,7 +105,7 @@ class ABCIE(InfoExtractor): class ABCIViewIE(InfoExtractor): IE_NAME = 'abc.net.au:iview' - _VALID_URL = r'https?://iview\.abc\.net\.au/(?:(?:[^/]+/)*video/|programs/(?:[^/]+/)*)(?P[A-Z0-9]+)' + _VALID_URL = r'https?://iview\.abc\.net\.au/(?:programs/[^/]+/|(?:[^/]+/)*video/)(?P[^/?#]+)/?' _GEO_COUNTRIES = ['AU'] # ABC iview programs are normally available for 14 days only. @@ -125,22 +125,6 @@ class ABCIViewIE(InfoExtractor): 'skip_download': True, }, }, - { - 'url': 'https://iview.abc.net.au/programs/CK1752H001S00', - 'info_dict': { - 'id': 'CK1752H001S00', - 'ext': 'mp4', - 'title': "Emma Alberici: Guess How Much I Love You (Italian)", - 'series': "Play School Story Time: Languages", - 'description': 'md5:b61ce34edc946e109e76c7432db5d91f', - 'upload_date': '20171204', - 'uploader_id': 'abc4kids', - 'timestamp': 1512414300, - }, - 'params': { - 'skip_download': True, - }, - }, { 'url': 'https://iview.abc.net.au/programs/play-school-story-time-languages/CK1752H001S00', 'info_dict': { @@ -237,18 +221,9 @@ class ABCIViewIE(InfoExtractor): class ABCIViewShowIE(ABCIViewIE): IE_NAME = 'abc.net.au:iview:show' - _VALID_URL = r'https?://iview\.abc\.net\.au/(?:show|programs)/(?P[a-z0-9\-]+)/?' + _VALID_URL = r'https?://iview\.abc\.net\.au/show/(?P[^/?#]+)/?' _TESTS = [ - { - 'url': 'https://iview.abc.net.au/programs/play-school-celebrity-covers', - 'info_dict': { - 'title': "Play School Celebrity Covers", - 'description': 'md5:5cf7b4e466b72ee1b930fc95b2a80ed7', - 'uploader_id': 'abc4kids', - }, - 'playlist_count': 31 - }, { 'url': 'https://iview.abc.net.au/show/play-school-story-time', 'info_dict': { From 70b46b7605b523323986244ca88c1fb72f75d267 Mon Sep 17 00:00:00 2001 From: David Hood <30076632+davidphood@users.noreply.github.com> Date: Fri, 3 May 2019 00:12:54 +1000 Subject: [PATCH 26/26] Fixed failing tests for ABCIView and ABCIViewShow --- youtube_dl/extractor/abc.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py index 29d284086..cf6cfa593 100644 --- a/youtube_dl/extractor/abc.py +++ b/youtube_dl/extractor/abc.py @@ -110,16 +110,16 @@ class ABCIViewIE(InfoExtractor): # ABC iview programs are normally available for 14 days only. _TESTS = [{ - 'url': 'https://iview.abc.net.au/show/ben-and-holly-s-little-kingdom/series/0/video/ZX9371A011S00', + 'url': 'https://iview.abc.net.au/show/big-ted-s-big-adventure/series/0/video/CH1307H001S00', 'info_dict': { - 'id': 'ZX9371A011S00', + 'id': 'CH1307H001S00', 'ext': 'mp4', - 'title': "Dolly Plum", - 'series': "Ben And Holly's Little Kingdom", - 'description': 'md5:7d5d426d28814ac97b7c98e33f37193c', - 'upload_date': '20190301', + 'title': "Otters", + 'series': "Big Ted's Big Adventure", + 'description': 'md5:74b89063ffb40eaf0ac95cd9486e645c', + 'upload_date': '20180923', 'uploader_id': 'abc4kids', - 'timestamp': 1551466585, + 'timestamp': 1537682400, }, 'params': { 'skip_download': True, @@ -230,6 +230,7 @@ class ABCIViewShowIE(ABCIViewIE): 'title': "Play School Story Time", 'description': 'md5:2763b35f418d334d72e3d7f7fc7afb82', 'uploader_id': 'abc4kids', + 'id': 'play-school-story-time' }, 'playlist_count': 24 },