mirror of
https://github.com/l1ving/youtube-dl
synced 2020-11-18 19:53:54 -08:00
Fixed flake8 and now using json data for extraction
This commit is contained in:
parent
5279d2f4e1
commit
cd0f150ce9
@ -10,7 +10,6 @@ from .common import InfoExtractor
|
|||||||
from ..aes import aes_cbc_decrypt
|
from ..aes import aes_cbc_decrypt
|
||||||
from ..compat import compat_urllib_parse_unquote
|
from ..compat import compat_urllib_parse_unquote
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
base_url,
|
|
||||||
bytes_to_intlist,
|
bytes_to_intlist,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
@ -21,7 +20,6 @@ from ..utils import (
|
|||||||
try_get,
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_basename,
|
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urljoin
|
urljoin
|
||||||
)
|
)
|
||||||
@ -312,7 +310,7 @@ class DRTVPlaylistIE(InfoExtractor):
|
|||||||
'id': 'tv-avisen-21_00_160258',
|
'id': 'tv-avisen-21_00_160258',
|
||||||
'title': 'TV AVISEN 21:00'
|
'title': 'TV AVISEN 21:00'
|
||||||
},
|
},
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 20,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.dr.dk/drtv/serie/spise-med-price_43537',
|
'url': 'https://www.dr.dk/drtv/serie/spise-med-price_43537',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -320,6 +318,13 @@ class DRTVPlaylistIE(InfoExtractor):
|
|||||||
'title': 'Spise med Price'
|
'title': 'Spise med Price'
|
||||||
},
|
},
|
||||||
'playlist_mincount': 2,
|
'playlist_mincount': 2,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.dr.dk/drtv/saeson/spise-med-price_163641',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'spise-med-price_163641',
|
||||||
|
'title': 'Spise med Price'
|
||||||
|
},
|
||||||
|
'playlist_mincount': 2,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -328,46 +333,15 @@ class DRTVPlaylistIE(InfoExtractor):
|
|||||||
return False if DRTVIE.suitable(url) else super(
|
return False if DRTVIE.suitable(url) else super(
|
||||||
DRTVPlaylistIE, cls).suitable(url)
|
DRTVPlaylistIE, cls).suitable(url)
|
||||||
|
|
||||||
def _extract_series(self, url):
|
def _extract_json_data(self, webpage):
|
||||||
display_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
episodes = []
|
|
||||||
for season in re.finditer(r'href="(?P<url>/drtv/saeson/.+?)"', webpage):
|
|
||||||
season_url = urljoin(base_url(url), season.group('url'))
|
|
||||||
episodes = episodes + self._extract_episode_from_season(season_url)
|
|
||||||
|
|
||||||
if len(episodes) == 0:
|
|
||||||
episodes = episodes + self._extract_episode_from_season(url)
|
|
||||||
|
|
||||||
return episodes
|
|
||||||
|
|
||||||
def _extract_episode_from_season(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
episodes = []
|
|
||||||
|
|
||||||
for episode in re.finditer(r'href="(?P<url>/drtv/se/.+?)"', webpage):
|
|
||||||
episode_url = urljoin(base_url(url), episode.group('url'))
|
|
||||||
episodes.append(episode_url)
|
|
||||||
|
|
||||||
return episodes
|
|
||||||
|
|
||||||
def _extract_json_data(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
return json.loads(re.search(r'(?P<json>{"app":.*?})<\/', webpage).group('json'))
|
return json.loads(re.search(r'(?P<json>{"app":.*?})<\/', webpage).group('json'))
|
||||||
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
|
base = re.search(r'(?P<url>.*?/drtv/)', url).group()
|
||||||
json = self._extract_json_data(url)
|
|
||||||
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
json = self._extract_json_data(webpage)
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'<h1 class=".*?hero__title".*?>(.+?)</h1>', webpage,
|
r'<h1 class=".*?hero__title".*?>(.+?)</h1>', webpage,
|
||||||
'title', default=None)
|
'title', default=None)
|
||||||
@ -375,26 +349,58 @@ class DRTVPlaylistIE(InfoExtractor):
|
|||||||
if title:
|
if title:
|
||||||
title = re.sub(r'\s*\|\s*.+?$', '', title)
|
title = re.sub(r'\s*\|\s*.+?$', '', title)
|
||||||
|
|
||||||
seasons = []
|
def iterate_all(iterable, returned="key"):
|
||||||
episodes = []
|
"""Returns an iterator that returns all keys or values
|
||||||
base = re.search(r'(?P<url>.*?/drtv)', url).group()
|
of a (nested) iterable.
|
||||||
|
|
||||||
if 'serie' in url:
|
Arguments:
|
||||||
series_item = re.search(r'(?P<item>/serie/[\da-z_-]+)', url).group('item')
|
- iterable: <list> or <dictionary>
|
||||||
seasons = [ i['path'] for i in json.get('cache', {}).get('page', {}).get(series_item, {}).get('item', {}).get('show', {}).get('seasons', {}).get('items', {}) ]
|
- returned: <string> "key" or "value"
|
||||||
elif 'saeson' in url:
|
Returns:
|
||||||
seasons = [url]
|
- <iterator>
|
||||||
|
"""
|
||||||
|
|
||||||
|
if isinstance(iterable, dict):
|
||||||
|
for key, value in iterable.items():
|
||||||
|
if returned == "key":
|
||||||
|
yield key
|
||||||
|
elif returned == "value":
|
||||||
|
if not (isinstance(value, dict) or isinstance(value, list)):
|
||||||
|
yield value
|
||||||
|
else:
|
||||||
|
raise ValueError("'returned' keyword only accepts 'key' or 'value'.")
|
||||||
|
for ret in iterate_all(value, returned=returned):
|
||||||
|
yield ret
|
||||||
|
elif isinstance(iterable, list):
|
||||||
|
for el in iterable:
|
||||||
|
for ret in iterate_all(el, returned=returned):
|
||||||
|
yield ret
|
||||||
|
|
||||||
|
seasons = [url]
|
||||||
|
if 'saeson' not in url:
|
||||||
|
seasons = list(dict.fromkeys([
|
||||||
|
re.search(r'/(?P<season>saeson/[\da-z_-]+)', str(i)).group('season') for i in list(iterate_all(json, "value"))
|
||||||
|
if re.search(r'/(saeson/[\da-z_-]+)', str(i))
|
||||||
|
and i != re.search(r'drtv(?P<item>/.+)', url).group('item')
|
||||||
|
]))
|
||||||
|
|
||||||
episodes = []
|
episodes = []
|
||||||
|
for season in seasons:
|
||||||
ep = self._extract_json_data(base + seasons[0])
|
if season == url:
|
||||||
items = ep.get('cache', {}).get('page', {}).get(seasons[0], {}).get('item', {}).get('episodes', {}).get('items', {})
|
season_data = json
|
||||||
|
else:
|
||||||
episodes = [
|
season_url = urljoin(base, season)
|
||||||
base + i['watchPath'] for i in items
|
season_display_id = self._match_id(season_url)
|
||||||
]
|
season_webpage = self._download_webpage(season_url, season_display_id)
|
||||||
|
season_data = self._extract_json_data(season_webpage)
|
||||||
|
|
||||||
entries = [self.url_result(ep, ie=DRTVIE.ie_key()) for ep in episodes]
|
episodes.extend([
|
||||||
|
re.search(r'/(?P<item>se/[\da-z_-]+)', str(i)).group('item') for i in list(iterate_all(season_data, "value"))
|
||||||
|
if re.search(r'/(se/[\da-z_-]+)', str(i))
|
||||||
|
])
|
||||||
|
episodes = list(dict.fromkeys(episodes))
|
||||||
|
|
||||||
|
entries = [self.url_result(urljoin(base, ep), ie=DRTVIE.ie_key()) for ep in episodes]
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id, title)
|
return self.playlist_result(entries, playlist_id, title)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user