From d71bc476a88a5b5bbaeefd0e51bee303d58260f1 Mon Sep 17 00:00:00 2001 From: Roxedus Date: Sat, 20 Jun 2020 03:33:44 +0200 Subject: [PATCH 1/7] [nrk:episode] Fixes KeyError Get content of meta tag Inspired by, and closes #25594 --- youtube_dl/extractor/nrk.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 94115534b..dc3d18490 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -427,7 +427,8 @@ class NRKTVEpisodeIE(InfoExtractor): nrk_id = self._parse_json( self._search_regex(JSON_LD_RE, webpage, 'JSON-LD', group='json_ld'), - display_id)['@id'] + display_id).get('@id') or \ + self._html_search_meta('nrk:program-id', webpage) assert re.match(NRKTVIE._EPISODE_RE, nrk_id) return self.url_result( From cbd871084c7965e1931625a4eb14f3f8910eed82 Mon Sep 17 00:00:00 2001 From: Roxedus Date: Sat, 20 Jun 2020 03:40:02 +0200 Subject: [PATCH 2/7] [nrk:base] Secondary method to fetch streams Can't see why the original method don't work for some streams. Bonus: provides 1080p quality Closes #24221 --- youtube_dl/extractor/nrk.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index dc3d18490..c0f5eedb3 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -62,6 +62,14 @@ class NRKBaseIE(InfoExtractor): if not asset_url: continue formats = self._extract_akamai_formats(asset_url, video_id) + + playback_manifest = self._download_json( + 'http://%s/playback/manifest/program/%s' % (self._api_host, video_id), + video_id, 'Downloading manifest JSON') + streamurl = playback_manifest.get('statistics').get('conviva').get('streamUrl') + formats.extend(self._extract_m3u8_formats( + streamurl, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False, + errnote='Alternate extractor failed')) if not formats: continue self._sort_formats(formats) From 2be9bcaae6497ccc869c7ab32b28edd28b91af26 Mon Sep 17 00:00:00 2001 From: Roxedus Date: Sat, 20 Jun 2020 05:03:17 +0200 Subject: [PATCH 3/7] [nrk:base] Add alt_title This is what I consider the actual title --- youtube_dl/extractor/nrk.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index c0f5eedb3..94973ab3f 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -41,6 +41,7 @@ class NRKBaseIE(InfoExtractor): break title = data.get('fullTitle') or data.get('mainTitle') or data['title'] + alt_title = data.get('mainTitle') video_id = data.get('id') or video_id entries = [] @@ -170,6 +171,7 @@ class NRKBaseIE(InfoExtractor): 'categories': [category] if category else None, 'age_limit': parse_age_limit(data.get('legalAge')), 'thumbnails': thumbnails, + 'alt_title':alt_title, } vcodec = 'none' if data.get('mediaType') == 'Audio' else None From 5063afc5df99f7fc5212ca2ac2358e14df8d7f23 Mon Sep 17 00:00:00 2001 From: Roxedus Date: Sun, 21 Jun 2020 21:25:05 +0200 Subject: [PATCH 4/7] [nrk:base] Fall back to another endpoint for season and episode number Prevents some long-running series from returning `NA` --- youtube_dl/extractor/nrk.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 94973ab3f..4087ba103 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -148,6 +148,13 @@ class NRKBaseIE(InfoExtractor): EPISODENUM_RE, _season_episode, 'episode number', default=None, group='episode')) + if not season_number or episode_number: + programs = self._download_json( + 'http://%s/programs/%s' % (self._api_host, video_id), + video_id, 'Downloading programs manifest JSON') + season_number = int_or_none(programs.get('seasonNumber')) + episode_number = int_or_none(programs.get('episodeNumber')) + thumbnails = None images = data.get('images') if images and isinstance(images, dict): From 64ea58e49c08a3d645adf5da3aa9ee8cbdcf4d74 Mon Sep 17 00:00:00 2001 From: Roxedus Date: Sun, 21 Jun 2020 21:50:36 +0200 Subject: [PATCH 5/7] [nrk:base] Add tests for alt_title, flake8 and convension --- youtube_dl/extractor/nrk.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 4087ba103..87428b397 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -66,7 +66,7 @@ class NRKBaseIE(InfoExtractor): playback_manifest = self._download_json( 'http://%s/playback/manifest/program/%s' % (self._api_host, video_id), - video_id, 'Downloading manifest JSON') + video_id, 'Downloading manifest JSON', fatal=False) streamurl = playback_manifest.get('statistics').get('conviva').get('streamUrl') formats.extend(self._extract_m3u8_formats( streamurl, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False, @@ -151,7 +151,7 @@ class NRKBaseIE(InfoExtractor): if not season_number or episode_number: programs = self._download_json( 'http://%s/programs/%s' % (self._api_host, video_id), - video_id, 'Downloading programs manifest JSON') + video_id, 'Downloading programs manifest JSON', fatal=False) season_number = int_or_none(programs.get('seasonNumber')) episode_number = int_or_none(programs.get('episodeNumber')) @@ -178,7 +178,7 @@ class NRKBaseIE(InfoExtractor): 'categories': [category] if category else None, 'age_limit': parse_age_limit(data.get('legalAge')), 'thumbnails': thumbnails, - 'alt_title':alt_title, + 'alt_title': alt_title, } vcodec = 'none' if data.get('mediaType') == 'Audio' else None @@ -366,7 +366,8 @@ class NRKTVIE(NRKBaseIE): 'id': 'KMTE50001317AA', 'ext': 'mp4', 'title': 'Anno 13:30', - 'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa', + 'alt_title': 'Anno', + 'description': 'md5:13735a46076f1ed9310ed13dfd69789f', 'duration': 2340, 'series': 'Anno', 'episode': '13:30', @@ -407,6 +408,7 @@ class NRKTVEpisodeIE(InfoExtractor): 'id': 'MUHH36005220BA', 'ext': 'mp4', 'title': 'Kro, krig og kjærlighet 2:6', + 'alt_title': 'Kro, krig og kjærlighet', 'description': 'md5:b32a7dc0b1ed27c8064f58b97bda4350', 'duration': 1563, 'series': 'Hellums kro', From 10664e43be202362635bbbc85893b68751381ed8 Mon Sep 17 00:00:00 2001 From: Roxedus Date: Wed, 24 Jun 2020 14:28:34 +0200 Subject: [PATCH 6/7] [nrk:base] Redo playback extractor It does not need to be in the loop, introduces some duplicated parts of code --- youtube_dl/extractor/nrk.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 87428b397..54173f430 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -40,6 +40,10 @@ class NRKBaseIE(InfoExtractor): self._api_host = api_host break + playback_manifest = self._download_json( + 'http://%s/playback/manifest/program/%s' % (self._api_host, video_id), + video_id, 'Downloading manifest JSON', fatal=False) + title = data.get('fullTitle') or data.get('mainTitle') or data['title'] alt_title = data.get('mainTitle') video_id = data.get('id') or video_id @@ -53,6 +57,28 @@ class NRKBaseIE(InfoExtractor): def make_title(t): return self._live_title(t) if live else t + playback_convia = playback_manifest.get('statistics').get('conviva') + if playback_convia: + streamurl = playback_convia.get('streamUrl', None) + stream = self._extract_m3u8_formats(streamurl, video_id, 'mp4', + 'm3u8_native', m3u8_id='hls', fatal=False) + custom = playback_convia.get('custom') + + dur = parse_duration(playback_convia.get('duration')) + subs = {} + for sub_title in playback_manifest.get('playable').get('subtitles'): + subs.setdefault('no', []).append({ + 'url': sub_title.get('webVtt') + }) + self._sort_formats(stream) + entries.append({ + 'id': custom.get('contentId'), + 'title': custom.get('title'), + 'duration': dur, + 'subtitles': subs, + 'formats': stream, + }) + media_assets = data.get('mediaAssets') if media_assets and isinstance(media_assets, list): def video_id_and_title(idx): @@ -63,14 +89,6 @@ class NRKBaseIE(InfoExtractor): if not asset_url: continue formats = self._extract_akamai_formats(asset_url, video_id) - - playback_manifest = self._download_json( - 'http://%s/playback/manifest/program/%s' % (self._api_host, video_id), - video_id, 'Downloading manifest JSON', fatal=False) - streamurl = playback_manifest.get('statistics').get('conviva').get('streamUrl') - formats.extend(self._extract_m3u8_formats( - streamurl, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False, - errnote='Alternate extractor failed')) if not formats: continue self._sort_formats(formats) From da499b84da8e9928b5f6538dba3dfe527e132d17 Mon Sep 17 00:00:00 2001 From: Roxedus Date: Wed, 1 Jul 2020 15:22:01 +0200 Subject: [PATCH 7/7] Fix break when request fails --- youtube_dl/extractor/nrk.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py index 54173f430..9833e8502 100644 --- a/youtube_dl/extractor/nrk.py +++ b/youtube_dl/extractor/nrk.py @@ -57,11 +57,12 @@ class NRKBaseIE(InfoExtractor): def make_title(t): return self._live_title(t) if live else t - playback_convia = playback_manifest.get('statistics').get('conviva') - if playback_convia: + playback_convia = (playback_manifest.get('statistics', {}).get('conviva') + if playback_manifest else None) + if isinstance(playback_convia, dict): streamurl = playback_convia.get('streamUrl', None) stream = self._extract_m3u8_formats(streamurl, video_id, 'mp4', - 'm3u8_native', m3u8_id='hls', fatal=False) + 'm3u8_native', m3u8_id='hls', fatal=False) custom = playback_convia.get('custom') dur = parse_duration(playback_convia.get('duration')) @@ -170,8 +171,8 @@ class NRKBaseIE(InfoExtractor): programs = self._download_json( 'http://%s/programs/%s' % (self._api_host, video_id), video_id, 'Downloading programs manifest JSON', fatal=False) - season_number = int_or_none(programs.get('seasonNumber')) - episode_number = int_or_none(programs.get('episodeNumber')) + season_number = int_or_none(programs.get('seasonNumber')) if programs else None + episode_number = int_or_none(programs.get('episodeNumber')) if programs else None thumbnails = None images = data.get('images')