From 9b9d90214a2d9e1745b5a0202068b2180423c051 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan Date: Thu, 14 Jan 2016 14:11:21 +0800 Subject: [PATCH] [WIP] Use YoutubeIE._formats as fallback --- youtube_dl/extractor/common.py | 4 ++-- youtube_dl/extractor/youtube.py | 28 ++++++++++++++++++---------- youtube_dl/utils.py | 2 ++ 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b05b22a94..b9f38cda0 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1021,9 +1021,9 @@ class InfoExtractor(object): # TODO: looks like video codec is not always necessarily goes first va_codecs = codecs.split(',') if va_codecs[0]: - f['vcodec'] = va_codecs[0].partition('.')[0] + f['vcodec'] = va_codecs[0] if len(va_codecs) > 1 and va_codecs[1]: - f['acodec'] = va_codecs[1].partition('.')[0] + f['acodec'] = va_codecs[1] resolution = last_info.get('RESOLUTION') if resolution: width_str, height_str = resolution.split('x') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e4f227f19..b3bd91132 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -32,6 +32,7 @@ from ..utils import ( get_element_by_attribute, get_element_by_id, int_or_none, + mimetype2ext, orderedSet, parse_duration, remove_quotes, @@ -1083,9 +1084,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): full_info.update(f) codecs = r.attrib.get('codecs') if codecs: - if full_info.get('acodec') == 'none' and 'vcodec' not in full_info: + if full_info.get('acodec') == 'none': full_info['vcodec'] = codecs - elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info: + elif full_info.get('vcodec') == 'none': full_info['acodec'] = codecs formats.append(full_info) else: @@ -1454,15 +1455,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if 'ratebypass' not in url: url += '&ratebypass=yes' + dct = { + 'format_id': format_id, + 'url': url, + 'player_url': player_url, + } + if format_id in self._formats: + dct.update(self._formats[format_id]) + # Some itags are not included in DASH manifest thus corresponding formats will # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993). # Trying to extract metadata from url_encoded_fmt_stream_map entry. mobj = re.search(r'^(?P\d+)[xX](?P\d+)$', url_data.get('size', [''])[0]) width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None) - dct = { - 'format_id': format_id, - 'url': url, - 'player_url': player_url, + + more_fields = { 'filesize': int_or_none(url_data.get('clen', [None])[0]), 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000), 'width': width, @@ -1470,13 +1477,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'fps': int_or_none(url_data.get('fps', [None])[0]), 'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0], } + for key, value in more_fields.items(): + if value: + dct[key] = value type_ = url_data.get('type', [None])[0] if type_: type_split = type_.split(';') kind_ext = type_split[0].split('/') if len(kind_ext) == 2: - kind, ext = kind_ext - dct['ext'] = ext + kind, _ = kind_ext + dct['ext'] = mimetype2ext(type_split[0]) if kind in ('audio', 'video'): codecs = None for mobj in re.finditer( @@ -1494,8 +1504,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'acodec': acodec, 'vcodec': vcodec, }) - if format_id in self._formats: - dct.update(self._formats[format_id]) formats.append(dct) elif video_info.get('hlsvp'): manifest_url = video_info['hlsvp'][0] diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 9c1c0e0bd..178d1dcb3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1831,6 +1831,8 @@ def mimetype2ext(mt): 'x-ms-wmv': 'wmv', 'x-mp4-fragmented': 'mp4', 'ttml+xml': 'ttml', + '3gpp': '3gp', + 'x-flv': 'flv', }.get(res, res)