1
0
mirror of https://github.com/l1ving/youtube-dl synced 2020-11-18 19:53:54 -08:00

Merge remote-tracking branch 'upstream/master' into myversion

This commit is contained in:
Andrew Udvare 2018-08-16 04:15:22 -04:00
commit 131e1647f6
No known key found for this signature in database
GPG Key ID: 1AFD9AFC120C26DD
7 changed files with 196 additions and 65 deletions

View File

@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
@ -11,7 +13,13 @@ from ..utils import (
class ExpressenIE(InfoExtractor): class ExpressenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?expressen\.se/tv/(?:[^/]+/)*(?P<id>[^/?#&]+)' _VALID_URL = r'''(?x)
https?://
(?:www\.)?expressen\.se/
(?:(?:tvspelare/video|videoplayer/embed)/)?
tv/(?:[^/]+/)*
(?P<id>[^/?#&]+)
'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/', 'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
'md5': '2fbbe3ca14392a6b1b36941858d33a45', 'md5': '2fbbe3ca14392a6b1b36941858d33a45',
@ -28,8 +36,21 @@ class ExpressenIE(InfoExtractor):
}, { }, {
'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/', 'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.expressen.se/tvspelare/video/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
'only_matching': True,
}, {
'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
'only_matching': True,
}] }]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?expressen\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
webpage)]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)

View File

@ -899,7 +899,10 @@ from .rai import (
RaiPlayPlaylistIE, RaiPlayPlaylistIE,
RaiIE, RaiIE,
) )
from .raywenderlich import RayWenderlichIE from .raywenderlich import (
RayWenderlichIE,
RayWenderlichCourseIE,
)
from .rbmaradio import RBMARadioIE from .rbmaradio import RBMARadioIE
from .rds import RDSIE from .rds import RDSIE
from .redbulltv import RedBullTVIE from .redbulltv import RedBullTVIE

View File

@ -114,6 +114,7 @@ from .indavideo import IndavideoEmbedIE
from .apa import APAIE from .apa import APAIE
from .foxnews import FoxNewsIE from .foxnews import FoxNewsIE
from .viqeo import ViqeoIE from .viqeo import ViqeoIE
from .expressen import ExpressenIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -3109,6 +3110,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key()) viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
expressen_urls = ExpressenIE._extract_urls(webpage)
if expressen_urls:
return self.playlist_from_matches(
expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
# Look for HTML5 media # Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries: if entries:

View File

@ -6,7 +6,9 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
js_to_json,
unified_strdate, unified_strdate,
url_or_none,
) )
@ -111,8 +113,21 @@ class NovaIE(InfoExtractor):
webpage, 'video id') webpage, 'video id')
config_url = self._search_regex( config_url = self._search_regex(
r'src="(http://tn\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"', r'src="(https?://(?:tn|api)\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"',
webpage, 'config url', default=None) webpage, 'config url', default=None)
config_params = {}
if not config_url:
player = self._parse_json(
self._search_regex(
r'(?s)Player\s*\(.+?\s*,\s*({.+?\bmedia\b["\']?\s*:\s*["\']?\d+.+?})\s*\)', webpage,
'player', default='{}'),
video_id, transform_source=js_to_json, fatal=False)
if player:
config_url = url_or_none(player.get('configUrl'))
params = player.get('configParams')
if isinstance(params, dict):
config_params = params
if not config_url: if not config_url:
DEFAULT_SITE_ID = '23000' DEFAULT_SITE_ID = '23000'
@ -127,14 +142,20 @@ class NovaIE(InfoExtractor):
} }
site_id = self._search_regex( site_id = self._search_regex(
r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(site, DEFAULT_SITE_ID) r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(
site, DEFAULT_SITE_ID)
config_url = ('http://tn.nova.cz/bin/player/videojs/config.php?site=%s&media=%s&jsVar=vjsconfig' config_url = 'https://api.nova.cz/bin/player/videojs/config.php'
% (site_id, video_id)) config_params = {
'site': site_id,
'media': video_id,
'quality': 3,
'version': 1,
}
config = self._download_json( config = self._download_json(
config_url, display_id, config_url, display_id,
'Downloading config JSON', 'Downloading config JSON', query=config_params,
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1]) transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
mediafile = config['mediafile'] mediafile = config['mediafile']

View File

@ -4,24 +4,37 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from .vimeo import VimeoIE from .vimeo import VimeoIE
from ..compat import compat_str
from ..utils import ( from ..utils import (
extract_attributes,
ExtractorError, ExtractorError,
smuggle_url, int_or_none,
unsmuggle_url, merge_dicts,
try_get,
unescapeHTML,
unified_timestamp,
urljoin, urljoin,
) )
class RayWenderlichIE(InfoExtractor): class RayWenderlichIE(InfoExtractor):
_VALID_URL = r'https?://videos\.raywenderlich\.com/courses/(?P<course_id>[^/]+)/lessons/(?P<id>\d+)' _VALID_URL = r'''(?x)
https?://
(?:
videos\.raywenderlich\.com/courses|
(?:www\.)?raywenderlich\.com
)/
(?P<course_id>[^/]+)/lessons/(?P<id>\d+)
'''
_TESTS = [{ _TESTS = [{
'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1', 'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1',
'info_dict': { 'info_dict': {
'id': '248377018', 'id': '248377018',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Testing In iOS Episode 1: Introduction', 'title': 'Introduction',
'description': 'md5:804d031b3efa9fcb49777d512d74f722',
'timestamp': 1513906277,
'upload_date': '20171222',
'duration': 133, 'duration': 133,
'uploader': 'Ray Wenderlich', 'uploader': 'Ray Wenderlich',
'uploader_id': 'user3304672', 'uploader_id': 'user3304672',
@ -34,69 +47,133 @@ class RayWenderlichIE(InfoExtractor):
'expected_warnings': ['HTTP Error 403: Forbidden'], 'expected_warnings': ['HTTP Error 403: Forbidden'],
}, { }, {
'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1', 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
'only_matching': True,
}]
@staticmethod
def _extract_video_id(data, lesson_id):
if not data:
return
groups = try_get(data, lambda x: x['groups'], list) or []
if not groups:
return
for group in groups:
if not isinstance(group, dict):
continue
contents = try_get(data, lambda x: x['contents'], list) or []
for content in contents:
if not isinstance(content, dict):
continue
ordinal = int_or_none(content.get('ordinal'))
if ordinal != lesson_id:
continue
video_id = content.get('identifier')
if video_id:
return compat_str(video_id)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
course_id, lesson_id = mobj.group('course_id', 'id')
display_id = '%s/%s' % (course_id, lesson_id)
webpage = self._download_webpage(url, display_id)
thumbnail = self._og_search_thumbnail(
webpage, default=None) or self._html_search_meta(
'twitter:image', webpage, 'thumbnail')
if '>Subscribe to unlock' in webpage:
raise ExtractorError(
'This content is only available for subscribers',
expected=True)
info = {
'thumbnail': thumbnail,
}
vimeo_id = self._search_regex(
r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None)
if not vimeo_id:
data = self._parse_json(
self._search_regex(
r'data-collection=(["\'])(?P<data>{.+?})\1', webpage,
'data collection', default='{}', group='data'),
display_id, transform_source=unescapeHTML, fatal=False)
video_id = self._extract_video_id(
data, lesson_id) or self._search_regex(
r'/videos/(\d+)/', thumbnail, 'video id')
headers = {
'Referer': url,
'X-Requested-With': 'XMLHttpRequest',
}
csrf_token = self._html_search_meta(
'csrf-token', webpage, 'csrf token', default=None)
if csrf_token:
headers['X-CSRF-Token'] = csrf_token
video = self._download_json(
'https://videos.raywenderlich.com/api/v1/videos/%s.json'
% video_id, display_id, headers=headers)['video']
vimeo_id = video['clips'][0]['provider_id']
info.update({
'_type': 'url_transparent',
'title': video.get('name'),
'description': video.get('description') or video.get(
'meta_description'),
'duration': int_or_none(video.get('duration')),
'timestamp': unified_timestamp(video.get('created_at')),
})
return merge_dicts(info, self.url_result(
VimeoIE._smuggle_referrer(
'https://player.vimeo.com/video/%s' % vimeo_id, url),
ie=VimeoIE.ie_key(), video_id=vimeo_id))
class RayWenderlichCourseIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
(?:
videos\.raywenderlich\.com/courses|
(?:www\.)?raywenderlich\.com
)/
(?P<id>[^/]+)
'''
_TEST = {
'url': 'https://www.raywenderlich.com/3530-testing-in-ios',
'info_dict': { 'info_dict': {
'title': 'Testing in iOS', 'title': 'Testing in iOS',
'id': '105-testing-in-ios', 'id': '3530-testing-in-ios',
}, },
'params': { 'params': {
'noplaylist': False, 'noplaylist': False,
}, },
'playlist_count': 29, 'playlist_count': 29,
}] }
@classmethod
def suitable(cls, url):
return False if RayWenderlichIE.suitable(url) else super(
RayWenderlichCourseIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) course_id = self._match_id(url)
mobj = re.match(self._VALID_URL, url) webpage = self._download_webpage(url, course_id)
course_id, lesson_id = mobj.group('course_id', 'id')
video_id = '%s/%s' % (course_id, lesson_id)
webpage = self._download_webpage(url, video_id)
no_playlist = self._downloader.params.get('noplaylist')
if no_playlist or smuggled_data.get('force_video', False):
if no_playlist:
self.to_screen(
'Downloading just video %s because of --no-playlist'
% video_id)
if '>Subscribe to unlock' in webpage:
raise ExtractorError(
'This content is only available for subscribers',
expected=True)
vimeo_id = self._search_regex(
r'data-vimeo-id=["\'](\d+)', webpage, 'video id')
return self.url_result(
VimeoIE._smuggle_referrer(
'https://player.vimeo.com/video/%s' % vimeo_id, url),
ie=VimeoIE.ie_key(), video_id=vimeo_id)
self.to_screen(
'Downloading playlist %s - add --no-playlist to just download video'
% course_id)
lesson_ids = set((lesson_id, ))
for lesson in re.findall(
r'(<a[^>]+\bclass=["\']lesson-link[^>]+>)', webpage):
attrs = extract_attributes(lesson)
if not attrs:
continue
lesson_url = attrs.get('href')
if not lesson_url:
continue
lesson_id = self._search_regex(
r'/lessons/(\d+)', lesson_url, 'lesson id', default=None)
if not lesson_id:
continue
lesson_ids.add(lesson_id)
entries = [] entries = []
for lesson_id in sorted(lesson_ids): lesson_urls = set()
for lesson_url in re.findall(
r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage):
if lesson_url in lesson_urls:
continue
lesson_urls.add(lesson_url)
entries.append(self.url_result( entries.append(self.url_result(
smuggle_url(urljoin(url, lesson_id), {'force_video': True}), urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key()))
ie=RayWenderlichIE.ie_key()))
title = self._search_regex( title = self._og_search_title(
r'class=["\']course-title[^>]+>([^<]+)', webpage, 'course title', webpage, default=None) or self._html_search_meta(
default=None) 'twitter:title', webpage, 'title', default=None)
return self.playlist_result(entries, course_id, title) return self.playlist_result(entries, course_id, title)

View File

@ -10,7 +10,7 @@ from ..utils import (
class RedBullTVIE(InfoExtractor): class RedBullTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?redbull\.tv/video/(?P<id>AP-\w+)' _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com/(?:[^/]+/)?tv)/video/(?P<id>AP-\w+)'
_TESTS = [{ _TESTS = [{
# film # film
'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11', 'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11',
@ -35,6 +35,9 @@ class RedBullTVIE(InfoExtractor):
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'https://www.redbull.com/int-en/tv/video/AP-1UWHCAR9S1W11/rob-meets-sam-gaze?playlist=playlists::3f81040a-2f31-4832-8e2e-545b1d39d173',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

View File

@ -82,7 +82,7 @@ def register_socks_protocols():
compiled_regex_type = type(re.compile('')) compiled_regex_type = type(re.compile(''))
std_headers = { std_headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0 (Chrome)', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate', 'Accept-Encoding': 'gzip, deflate',