1
0
mirror of https://github.com/l1ving/youtube-dl synced 2020-11-18 19:53:54 -08:00

Merge branch 'master' into PornHub-issue-16078

This commit is contained in:
Parmjit Virk 2018-11-17 14:45:19 -06:00
commit 8481dbce73
32 changed files with 808 additions and 394 deletions

View File

@ -6,8 +6,8 @@
--- ---
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.29** - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.18**
### Before submitting an *issue* make sure you have: ### Before submitting an *issue* make sure you have:
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections - [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
[debug] User config: [] [debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
[debug] youtube-dl version 2018.10.29 [debug] youtube-dl version 2018.11.18
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {} [debug] Proxy map: {}

View File

@ -296,5 +296,26 @@ title = self._search_regex(
### Use safe conversion functions ### Use safe conversion functions
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well. Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
Use `url_or_none` for safe URL processing.
Use `try_get` for safe metadata extraction from parsed JSON.
Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
#### More examples
##### Safely extract optional description from parsed JSON
```python
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
```
##### Safely extract more optional metadata
```python
video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
description = video.get('summary')
duration = float_or_none(video.get('durationMs'), scale=1000)
view_count = int_or_none(video.get('views'))
```

View File

@ -1,3 +1,54 @@
version 2018.11.18
Extractors
+ [wwe] Extract subtitles
+ [wwe] Add support for playlistst (#14781)
+ [wwe] Add support for wwe.com (#14781, #17450)
* [vk] Detect geo restriction (#17767)
* [openload] Use original host during extraction (#18211)
* [atvat] Fix extraction (#18041)
+ [rte] Add support for new API endpoint (#18206)
* [tnaflixnetwork:embed] Fix extraction (#18205)
* [picarto] Use API and add token support (#16518)
+ [zype] Add support for player.zype.com (#18143)
* [vivo] Fix extraction (#18139)
* [ruutu] Update API endpoint (#18138)
version 2018.11.07
Extractors
+ [youtube] Add another JS signature function name regex (#18091, #18093,
#18094)
* [facebook] Fix tahoe request (#17171)
* [cliphunter] Fix extraction (#18083)
+ [youtube:playlist] Add support for invidio.us (#18077)
* [zattoo] Arrange API hosts for derived extractors (#18035)
+ [youtube] Add fallback metadata extraction from videoDetails (#18052)
version 2018.11.03
Core
* [extractor/common] Ensure response handle is not prematurely closed before
it can be read if it matches expected_status (#17195, #17846, #17447)
Extractors
* [laola1tv:embed] Set correct stream access URL scheme (#16341)
+ [ehftv] Add support for ehftv.com (#15408)
* [azmedien] Adopt to major site redesign (#17745, #17746)
+ [twitcasting] Add support for twitcasting.tv (#17981)
* [orf:tvthek] Fix extraction (#17737, #17956, #18024)
+ [openload] Add support for oload.fun (#18045)
* [njpwworld] Fix authentication (#17427)
+ [linkedin:learning] Add support for linkedin.com/learning (#13545)
* [theplatform] Improve error detection (#13222)
* [cnbc] Simplify extraction (#14280, #17110)
+ [cbnc] Add support for new URL schema (#14193)
* [aparat] Improve extraction and extract more metadata (#17445, #18008)
* [aparat] Fix extraction
version 2018.10.29 version 2018.10.29
Core Core

View File

@ -1168,7 +1168,28 @@ title = self._search_regex(
### Use safe conversion functions ### Use safe conversion functions
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well. Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
Use `url_or_none` for safe URL processing.
Use `try_get` for safe metadata extraction from parsed JSON.
Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
#### More examples
##### Safely extract optional description from parsed JSON
```python
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
```
##### Safely extract more optional metadata
```python
video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
description = video.get('summary')
duration = float_or_none(video.get('durationMs'), scale=1000)
view_count = int_or_none(video.get('views'))
```
# EMBEDDING YOUTUBE-DL # EMBEDDING YOUTUBE-DL

View File

@ -84,8 +84,6 @@
- **awaan:season** - **awaan:season**
- **awaan:video** - **awaan:video**
- **AZMedien**: AZ Medien videos - **AZMedien**: AZ Medien videos
- **AZMedienPlaylist**: AZ Medien playlists
- **AZMedienShowPlaylist**: AZ Medien show playlists
- **BaiduVideo**: 百度视频 - **BaiduVideo**: 百度视频
- **bambuser** - **bambuser**
- **bambuser:channel** - **bambuser:channel**
@ -178,6 +176,7 @@
- **Clyp** - **Clyp**
- **cmt.com** - **cmt.com**
- **CNBC** - **CNBC**
- **CNBCVideo**
- **CNN** - **CNN**
- **CNNArticle** - **CNNArticle**
- **CNNBlogs** - **CNNBlogs**
@ -251,6 +250,7 @@
- **EchoMsk** - **EchoMsk**
- **egghead:course**: egghead.io course - **egghead:course**: egghead.io course
- **egghead:lesson**: egghead.io lesson - **egghead:lesson**: egghead.io lesson
- **ehftv**
- **eHow** - **eHow**
- **EinsUndEinsTV** - **EinsUndEinsTV**
- **Einthusan** - **Einthusan**
@ -445,6 +445,8 @@
- **limelight:channel** - **limelight:channel**
- **limelight:channel_list** - **limelight:channel_list**
- **LineTV** - **LineTV**
- **linkedin:learning**
- **linkedin:learning:course**
- **LiTV** - **LiTV**
- **LiveLeak** - **LiveLeak**
- **LiveLeakEmbed** - **LiveLeakEmbed**
@ -930,6 +932,7 @@
- **TVPlayer** - **TVPlayer**
- **TVPlayHome** - **TVPlayHome**
- **Tweakers** - **Tweakers**
- **TwitCasting**
- **twitch:chapter** - **twitch:chapter**
- **twitch:clips** - **twitch:clips**
- **twitch:profile** - **twitch:profile**
@ -1077,6 +1080,7 @@
- **wrzuta.pl:playlist** - **wrzuta.pl:playlist**
- **WSJ**: Wall Street Journal - **WSJ**: Wall Street Journal
- **WSJArticle** - **WSJArticle**
- **WWE**
- **XBef** - **XBef**
- **XboxClips** - **XboxClips**
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
@ -1136,3 +1140,4 @@
- **ZDF** - **ZDF**
- **ZDFChannel** - **ZDFChannel**
- **zingmp3**: mp3.zing.vn - **zingmp3**: mp3.zing.vn
- **Zype**

View File

@ -7,6 +7,7 @@ import json
import os.path import os.path
import re import re
import types import types
import ssl
import sys import sys
import youtube_dl.extractor import youtube_dl.extractor
@ -244,3 +245,12 @@ def expect_warnings(ydl, warnings_re):
real_warning(w) real_warning(w)
ydl.report_warning = _report_warning ydl.report_warning = _report_warning
def http_server_port(httpd):
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
# In Jython SSLSocket is not a subclass of socket.socket
sock = httpd.socket.sock
else:
sock = httpd.socket
return sock.getsockname()[1]

View File

@ -9,11 +9,30 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, expect_dict, expect_value from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
from youtube_dl.compat import compat_etree_fromstring from youtube_dl.compat import compat_etree_fromstring, compat_http_server
from youtube_dl.extractor.common import InfoExtractor from youtube_dl.extractor.common import InfoExtractor
from youtube_dl.extractor import YoutubeIE, get_info_extractor from youtube_dl.extractor import YoutubeIE, get_info_extractor
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
import threading
TEAPOT_RESPONSE_STATUS = 418
TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass
def do_GET(self):
if self.path == '/teapot':
self.send_response(TEAPOT_RESPONSE_STATUS)
self.send_header('Content-Type', 'text/html; charset=utf-8')
self.end_headers()
self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
else:
assert False
class TestIE(InfoExtractor): class TestIE(InfoExtractor):
@ -743,6 +762,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
for i in range(len(entries)): for i in range(len(entries)):
expect_dict(self, entries[i], expected_entries[i]) expect_dict(self, entries[i], expected_entries[i])
def test_response_with_expected_status_returns_content(self):
# Checks for mitigations against the effects of
# <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which
# manifest as `_download_webpage`, `_download_xml`, `_download_json`,
# or the underlying `_download_webpage_handle` returning no content
# when a response matches `expected_status`.
httpd = compat_http_server.HTTPServer(
('127.0.0.1', 0), InfoExtractorTestRequestHandler)
port = http_server_port(httpd)
server_thread = threading.Thread(target=httpd.serve_forever)
server_thread.daemon = True
server_thread.start()
(content, urlh) = self.ie._download_webpage_handle(
'http://127.0.0.1:%d/teapot' % port, None,
expected_status=TEAPOT_RESPONSE_STATUS)
self.assertEqual(content, TEAPOT_RESPONSE_BODY)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -9,26 +9,16 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import try_rm from test.helper import http_server_port, try_rm
from youtube_dl import YoutubeDL from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_http_server from youtube_dl.compat import compat_http_server
from youtube_dl.downloader.http import HttpFD from youtube_dl.downloader.http import HttpFD
from youtube_dl.utils import encodeFilename from youtube_dl.utils import encodeFilename
import ssl
import threading import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DIR = os.path.dirname(os.path.abspath(__file__))
def http_server_port(httpd):
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
# In Jython SSLSocket is not a subclass of socket.socket
sock = httpd.socket.sock
else:
sock = httpd.socket
return sock.getsockname()[1]
TEST_SIZE = 10 * 1024 TEST_SIZE = 10 * 1024

View File

@ -8,6 +8,7 @@ import sys
import unittest import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import http_server_port
from youtube_dl import YoutubeDL from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_http_server, compat_urllib_request from youtube_dl.compat import compat_http_server, compat_urllib_request
import ssl import ssl
@ -16,15 +17,6 @@ import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DIR = os.path.dirname(os.path.abspath(__file__))
def http_server_port(httpd):
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
# In Jython SSLSocket is not a subclass of socket.socket
sock = httpd.socket.sock
else:
sock = httpd.socket
return sock.getsockname()[1]
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
def log_message(self, format, *args): def log_message(self, format, *args):
pass pass

View File

@ -28,8 +28,10 @@ class ATVAtIE(InfoExtractor):
display_id = self._match_id(url) display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_data = self._parse_json(unescapeHTML(self._search_regex( video_data = self._parse_json(unescapeHTML(self._search_regex(
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"', [r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1',
webpage, 'player data')), display_id)['config']['initial_video'] r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'],
webpage, 'player data', group='json')),
display_id)['config']['initial_video']
video_id = video_data['id'] video_id = video_data['id']
video_title = video_data['title'] video_title = video_data['title']

View File

@ -1,213 +1,90 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .kaltura import KalturaIE from .kaltura import KalturaIE
from ..utils import (
get_element_by_class,
get_element_by_id,
strip_or_none,
urljoin,
)
class AZMedienBaseIE(InfoExtractor): class AZMedienIE(InfoExtractor):
def _kaltura_video(self, partner_id, entry_id):
return self.url_result(
'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
video_id=entry_id)
class AZMedienIE(AZMedienBaseIE):
IE_DESC = 'AZ Medien videos' IE_DESC = 'AZ Medien videos'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?:www\.)? (?:www\.)?
(?: (?P<host>
telezueri\.ch| telezueri\.ch|
telebaern\.tv| telebaern\.tv|
telem1\.ch telem1\.ch
)/ )/
[0-9]+-show-[^/\#]+ [^/]+/
(?: (?P<id>
/[0-9]+-episode-[^/\#]+ [^/]+-(?P<article_id>\d+)
(?:
/[0-9]+-segment-(?:[^/\#]+\#)?|
\#
)|
\#
) )
(?P<id>[^\#]+) (?:
\#video=
(?P<kaltura_id>
[_0-9a-z]+
)
)?
''' '''
_TESTS = [{ _TESTS = [{
# URL with 'segment' 'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
'info_dict': { 'info_dict': {
'id': '1_2444peh4', 'id': '1_anruz3wy',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom', 'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8', 'description': 'md5:dd9f96751ec9c35e409a698a328402f3',
'uploader_id': 'TeleZ?ri', 'uploader_id': 'TVOnline',
'upload_date': '20161218', 'upload_date': '20180930',
'timestamp': 1482084490, 'timestamp': 1538328802,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
}, },
}, { }, {
# URL with 'segment' and fragment: 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
'only_matching': True
}, {
# URL with 'episode' and fragment:
'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
'only_matching': True
}, {
# URL with 'show' and fragment:
'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
'only_matching': True 'only_matching': True
}] }]
def _real_extract(self, url): _PARTNER_ID = '1719221'
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
partner_id = self._search_regex(
r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
webpage, 'kaltura partner id')
entry_id = self._html_search_regex(
r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
% re.escape(video_id), webpage, 'kaltura entry id', group='id')
return self._kaltura_video(partner_id, entry_id)
class AZMedienPlaylistIE(AZMedienBaseIE):
IE_DESC = 'AZ Medien playlists'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?:
telezueri\.ch|
telebaern\.tv|
telem1\.ch
)/
(?P<id>[0-9]+-
(?:
show|
topic|
themen
)-[^/\#]+
(?:
/[0-9]+-episode-[^/\#]+
)?
)$
'''
_TESTS = [{
# URL with 'episode'
'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016',
'info_dict': {
'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016',
'title': 'News - Donnerstag, 15. Dezember 2016',
},
'playlist_count': 9,
}, {
# URL with 'themen'
'url': 'http://www.telem1.ch/258-themen-tele-m1-classics',
'info_dict': {
'id': '258-themen-tele-m1-classics',
'title': 'Tele M1 Classics',
},
'playlist_mincount': 15,
}, {
# URL with 'topic', contains nested playlists
'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen',
'only_matching': True,
}, {
# URL with 'show' only
'url': 'http://www.telezueri.ch/86-show-talktaeglich',
'only_matching': True
}]
def _real_extract(self, url): def _real_extract(self, url):
show_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
webpage = self._download_webpage(url, show_id) video_id = mobj.group('id')
entry_id = mobj.group('kaltura_id')
entries = [] if not entry_id:
webpage = self._download_webpage(url, video_id)
api_path = self._search_regex(
r'["\']apiPath["\']\s*:\s*["\']([^"^\']+)["\']',
webpage, 'api path')
api_url = 'https://www.%s%s' % (mobj.group('host'), api_path)
payload = {
'query': '''query VideoContext($articleId: ID!) {
article: node(id: $articleId) {
... on Article {
mainAssetRelation {
asset {
... on VideoAsset {
kalturaId
}
}
}
}
}
}''',
'variables': {'articleId': 'Article:%s' % mobj.group('article_id')},
}
json_data = self._download_json(
api_url, video_id, headers={
'Content-Type': 'application/json',
},
data=json.dumps(payload).encode())
entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId']
partner_id = self._search_regex( return self.url_result(
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)', 'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
webpage, 'kaltura partner id', default=None) ie=KalturaIE.ie_key(), video_id=entry_id)
if partner_id:
entries = [
self._kaltura_video(partner_id, m.group('id'))
for m in re.finditer(
r'data-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage)]
if not entries:
entries = [
self.url_result(m.group('url'), ie=AZMedienIE.ie_key())
for m in re.finditer(
r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
if not entries:
entries = [
# May contain nested playlists (e.g. [1]) thus no explicit
# ie_key
# 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen)
self.url_result(urljoin(url, m.group('url')))
for m in re.finditer(
r'<a[^>]+name=[^>]+href=(["\'])(?P<url>/.+?)\1', webpage)]
title = self._search_regex(
r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
webpage, 'title',
default=strip_or_none(get_element_by_id(
'video-title', webpage)), group='title')
return self.playlist_result(entries, show_id, title)
class AZMedienShowPlaylistIE(AZMedienBaseIE):
IE_DESC = 'AZ Medien show playlists'
_VALID_URL = r'''(?x)
https?://
(?:www\.)?
(?:
telezueri\.ch|
telebaern\.tv|
telem1\.ch
)/
(?:
all-episodes|
alle-episoden
)/
(?P<id>[^/?#&]+)
'''
_TEST = {
'url': 'http://www.telezueri.ch/all-episodes/astrotalk',
'info_dict': {
'id': 'astrotalk',
'title': 'TeleZüri: AstroTalk - alle episoden',
'description': 'md5:4c0f7e7d741d906004266e295ceb4a26',
},
'playlist_mincount': 13,
}
def _real_extract(self, url):
playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
episodes = get_element_by_class('search-mobile-box', webpage)
entries = [self.url_result(
urljoin(url, m.group('url'))) for m in re.finditer(
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)]
title = self._og_search_title(webpage, fatal=False)
description = self._og_search_description(webpage)
return self.playlist_result(entries, playlist_id, title, description)

View File

@ -1,19 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none from ..utils import (
int_or_none,
url_or_none,
_translation_table = { )
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
'y': 'l', 'z': 'i',
'$': ':', '&': '.', '(': '=', '^': '&', '=': '/',
}
def _decode(s):
return ''.join(_translation_table.get(c, c) for c in s)
class CliphunterIE(InfoExtractor): class CliphunterIE(InfoExtractor):
@ -60,14 +51,14 @@ class CliphunterIE(InfoExtractor):
formats = [] formats = []
for format_id, f in gexo_files.items(): for format_id, f in gexo_files.items():
video_url = f.get('url') video_url = url_or_none(f.get('url'))
if not video_url: if not video_url:
continue continue
fmt = f.get('fmt') fmt = f.get('fmt')
height = f.get('h') height = f.get('h')
format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
formats.append({ formats.append({
'url': _decode(video_url), 'url': video_url,
'format_id': format_id, 'format_id': format_id,
'width': int_or_none(f.get('w')), 'width': int_or_none(f.get('w')),
'height': int_or_none(height), 'height': int_or_none(height),

View File

@ -606,6 +606,11 @@ class InfoExtractor(object):
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if isinstance(err, compat_urllib_error.HTTPError): if isinstance(err, compat_urllib_error.HTTPError):
if self.__can_accept_status_code(err, expected_status): if self.__can_accept_status_code(err, expected_status):
# Retain reference to error to prevent file object from
# being closed before it can be read. Works around the
# effects of <https://bugs.python.org/issue15002>
# introduced in Python 3.4.1.
err.fp._error = err
return err.fp return err.fp
if errnote is False: if errnote is False:

View File

@ -88,11 +88,7 @@ from .awaan import (
AWAANLiveIE, AWAANLiveIE,
AWAANSeasonIE, AWAANSeasonIE,
) )
from .azmedien import ( from .azmedien import AZMedienIE
AZMedienIE,
AZMedienPlaylistIE,
AZMedienShowPlaylistIE,
)
from .baidu import BaiduVideoIE from .baidu import BaiduVideoIE
from .bambuser import BambuserIE, BambuserChannelIE from .bambuser import BambuserIE, BambuserChannelIE
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
@ -543,6 +539,7 @@ from .la7 import LA7IE
from .laola1tv import ( from .laola1tv import (
Laola1TvEmbedIE, Laola1TvEmbedIE,
Laola1TvIE, Laola1TvIE,
EHFTVIE,
ITTFIE, ITTFIE,
) )
from .lci import LCIIE from .lci import LCIIE
@ -1196,6 +1193,7 @@ from .tweakers import TweakersIE
from .twentyfourvideo import TwentyFourVideoIE from .twentyfourvideo import TwentyFourVideoIE
from .twentymin import TwentyMinutenIE from .twentymin import TwentyMinutenIE
from .twentythreevideo import TwentyThreeVideoIE from .twentythreevideo import TwentyThreeVideoIE
from .twitcasting import TwitCastingIE
from .twitch import ( from .twitch import (
TwitchVideoIE, TwitchVideoIE,
TwitchChapterIE, TwitchChapterIE,
@ -1388,6 +1386,7 @@ from .wsj import (
WSJIE, WSJIE,
WSJArticleIE, WSJArticleIE,
) )
from .wwe import WWEIE
from .xbef import XBefIE from .xbef import XBefIE
from .xboxclips import XboxClipsIE from .xboxclips import XboxClipsIE
from .xfileshare import XFileShareIE from .xfileshare import XFileShareIE
@ -1480,3 +1479,4 @@ from .zattoo import (
) )
from .zdf import ZDFIE, ZDFChannelIE from .zdf import ZDFIE, ZDFChannelIE
from .zingmp3 import ZingMp3IE from .zingmp3 import ZingMp3IE
from .zype import ZypeIE

View File

@ -57,7 +57,7 @@ class FacebookIE(InfoExtractor):
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36' _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s' _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true' _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
_TESTS = [{ _TESTS = [{
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf', 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',

View File

@ -114,6 +114,7 @@ from .apa import APAIE
from .foxnews import FoxNewsIE from .foxnews import FoxNewsIE
from .viqeo import ViqeoIE from .viqeo import ViqeoIE
from .expressen import ExpressenIE from .expressen import ExpressenIE
from .zype import ZypeIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2070,6 +2071,20 @@ class GenericIE(InfoExtractor):
}, },
'playlist_count': 6, 'playlist_count': 6,
}, },
{
# Zype embed
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
'info_dict': {
'id': '5b400b834b32992a310622b9',
'ext': 'mp4',
'title': 'Smoky Barbecue Favorites',
'thumbnail': r're:^https?://.*\.jpe?g',
},
'add_ie': [ZypeIE.ie_key()],
'params': {
'skip_download': True,
},
},
{ {
# videojs embed # videojs embed
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904', 'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
@ -3129,6 +3144,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key()) expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
zype_urls = ZypeIE._extract_urls(webpage)
if zype_urls:
return self.playlist_from_matches(
zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
# Look for HTML5 media # Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries: if entries:

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
@ -32,7 +33,8 @@ class Laola1TvEmbedIE(InfoExtractor):
def _extract_token_url(self, stream_access_url, video_id, data): def _extract_token_url(self, stream_access_url, video_id, data):
return self._download_json( return self._download_json(
stream_access_url, video_id, headers={ self._proto_relative_url(stream_access_url, 'https:'), video_id,
headers={
'Content-Type': 'application/json', 'Content-Type': 'application/json',
}, data=json.dumps(data).encode())['data']['stream-access'][0] }, data=json.dumps(data).encode())['data']['stream-access'][0]
@ -119,9 +121,59 @@ class Laola1TvEmbedIE(InfoExtractor):
} }
class Laola1TvIE(Laola1TvEmbedIE): class Laola1TvBaseIE(Laola1TvEmbedIE):
def _extract_video(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
if 'Dieser Livestream ist bereits beendet.' in webpage:
raise ExtractorError('This live stream has already finished.', expected=True)
conf = self._parse_json(self._search_regex(
r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
display_id,
transform_source=lambda s: js_to_json(re.sub(r'shareurl:.+,', '', s)))
video_id = conf['videoid']
config = self._download_json(conf['configUrl'], video_id, query={
'videoid': video_id,
'partnerid': conf['partnerid'],
'language': conf.get('language', ''),
'portal': conf.get('portalid', ''),
})
error = config.get('error')
if error:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
video_data = config['video']
title = video_data['title']
is_live = video_data.get('isLivestream') and video_data.get('isLive')
meta = video_data.get('metaInformation')
sports = meta.get('sports')
categories = sports.split(',') if sports else []
token_url = self._extract_token_url(
video_data['streamAccess'], video_id,
video_data['abo']['required'])
formats = self._extract_formats(token_url, video_id)
return {
'id': video_id,
'display_id': display_id,
'title': self._live_title(title) if is_live else title,
'description': video_data.get('description'),
'thumbnail': video_data.get('image'),
'categories': categories,
'formats': formats,
'is_live': is_live,
}
class Laola1TvIE(Laola1TvBaseIE):
IE_NAME = 'laola1tv' IE_NAME = 'laola1tv'
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html', 'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
'info_dict': { 'info_dict': {
@ -169,52 +221,30 @@ class Laola1TvIE(Laola1TvEmbedIE):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) return self._extract_video(url)
webpage = self._download_webpage(url, display_id)
if 'Dieser Livestream ist bereits beendet.' in webpage: class EHFTVIE(Laola1TvBaseIE):
raise ExtractorError('This live stream has already finished.', expected=True) IE_NAME = 'ehftv'
_VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(?:-[a-z]+)?/[^/]+/(?P<id>[^/?#&]+)'
conf = self._parse_json(self._search_regex( _TESTS = [{
r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'), 'url': 'https://www.ehftv.com/int/video/paris-saint-germain-handball-pge-vive-kielce/1166761',
display_id, js_to_json) 'info_dict': {
'id': '1166761',
'display_id': 'paris-saint-germain-handball-pge-vive-kielce',
'ext': 'mp4',
'title': 'Paris Saint-Germain Handball - PGE Vive Kielce',
'is_live': False,
'categories': ['Handball'],
},
'params': {
'skip_download': True,
},
}]
video_id = conf['videoid'] def _real_extract(self, url):
return self._extract_video(url)
config = self._download_json(conf['configUrl'], video_id, query={
'videoid': video_id,
'partnerid': conf['partnerid'],
'language': conf.get('language', ''),
'portal': conf.get('portalid', ''),
})
error = config.get('error')
if error:
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
video_data = config['video']
title = video_data['title']
is_live = video_data.get('isLivestream') and video_data.get('isLive')
meta = video_data.get('metaInformation')
sports = meta.get('sports')
categories = sports.split(',') if sports else []
token_url = self._extract_token_url(
video_data['streamAccess'], video_id,
video_data['abo']['required'])
formats = self._extract_formats(token_url, video_id)
return {
'id': video_id,
'display_id': display_id,
'title': self._live_title(title) if is_live else title,
'description': video_data.get('description'),
'thumbnail': video_data.get('image'),
'categories': categories,
'formats': formats,
'is_live': is_live,
}
class ITTFIE(InfoExtractor): class ITTFIE(InfoExtractor):

View File

@ -35,7 +35,7 @@ class NovaEmbedIE(InfoExtractor):
bitrates = self._parse_json( bitrates = self._parse_json(
self._search_regex( self._search_regex(
r'(?s)bitrates\s*=\s*({.+?})\s*;', webpage, 'formats'), r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
video_id, transform_source=js_to_json) video_id, transform_source=js_to_json)
QUALITIES = ('lq', 'mq', 'hq', 'hd') QUALITIES = ('lq', 'mq', 'hq', 'hd')

View File

@ -243,7 +243,18 @@ class PhantomJSwrapper(object):
class OpenloadIE(InfoExtractor): class OpenloadIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' _VALID_URL = r'''(?x)
https?://
(?P<host>
(?:www\.)?
(?:
openload\.(?:co|io|link)|
oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun)
)
)/
(?:f|embed)/
(?P<id>[a-zA-Z0-9-_]+)
'''
_TESTS = [{ _TESTS = [{
'url': 'https://openload.co/f/kUEfGclsU9o', 'url': 'https://openload.co/f/kUEfGclsU9o',
@ -319,7 +330,10 @@ class OpenloadIE(InfoExtractor):
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'https://oload.icu/f/-_i4y_F_Hs8', 'url': 'https://oload.icu/f/-_i4y_F_Hs8',
'only_matching': True 'only_matching': True,
}, {
'url': 'https://oload.fun/f/gb6G1H4sHXY',
'only_matching': True,
}] }]
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
@ -331,8 +345,11 @@ class OpenloadIE(InfoExtractor):
webpage) webpage)
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
url_pattern = 'https://openload.co/%%s/%s/' % video_id host = mobj.group('host')
video_id = mobj.group('id')
url_pattern = 'https://%s/%%s/%s/' % (host, video_id)
headers = { headers = {
'User-Agent': self._USER_AGENT, 'User-Agent': self._USER_AGENT,
} }
@ -365,7 +382,7 @@ class OpenloadIE(InfoExtractor):
r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage, r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
'stream URL')) 'stream URL'))
video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id video_url = 'https://%s/stream/%s?mime=true' % (host, decoded_id)
title = self._og_search_title(webpage, default=None) or self._search_regex( title = self._og_search_title(webpage, default=None) or self._search_regex(
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage, r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
@ -376,7 +393,7 @@ class OpenloadIE(InfoExtractor):
entry = entries[0] if entries else {} entry = entries[0] if entries else {}
subtitles = entry.get('subtitles') subtitles = entry.get('subtitles')
info_dict = { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None), 'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
@ -385,4 +402,3 @@ class OpenloadIE(InfoExtractor):
'subtitles': subtitles, 'subtitles': subtitles,
'http_headers': headers, 'http_headers': headers,
} }
return info_dict

View File

@ -15,6 +15,7 @@ from ..utils import (
strip_jsonp, strip_jsonp,
unescapeHTML, unescapeHTML,
unified_strdate, unified_strdate,
url_or_none,
) )
@ -68,26 +69,35 @@ class ORFTVthekIE(InfoExtractor):
webpage, 'playlist', group='json'), webpage, 'playlist', group='json'),
playlist_id, transform_source=unescapeHTML)['playlist']['videos'] playlist_id, transform_source=unescapeHTML)['playlist']['videos']
def quality_to_int(s):
m = re.search('([0-9]+)', s)
if m is None:
return -1
return int(m.group(1))
entries = [] entries = []
for sd in data_jsb: for sd in data_jsb:
video_id, title = sd.get('id'), sd.get('title') video_id, title = sd.get('id'), sd.get('title')
if not video_id or not title: if not video_id or not title:
continue continue
video_id = compat_str(video_id) video_id = compat_str(video_id)
formats = [{ formats = []
'preference': -10 if fd['delivery'] == 'hls' else None, for fd in sd['sources']:
'format_id': '%s-%s-%s' % ( src = url_or_none(fd.get('src'))
fd['delivery'], fd['quality'], fd['quality_string']), if not src:
'url': fd['src'], continue
'protocol': fd['protocol'], format_id_list = []
'quality': quality_to_int(fd['quality']), for key in ('delivery', 'quality', 'quality_string'):
} for fd in sd['sources']] value = fd.get(key)
if value:
format_id_list.append(value)
format_id = '-'.join(format_id_list)
if determine_ext(fd['src']) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
fd['src'], video_id, 'mp4', m3u8_id=format_id))
elif determine_ext(fd['src']) == 'f4m':
formats.extend(self._extract_f4m_formats(
fd['src'], video_id, f4m_id=format_id))
else:
formats.append({
'format_id': format_id,
'url': src,
'protocol': fd.get('protocol'),
})
# Check for geoblocking. # Check for geoblocking.
# There is a property is_geoprotection, but that's always false # There is a property is_geoprotection, but that's always false

View File

@ -1,6 +1,7 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import time import time
from .common import InfoExtractor from .common import InfoExtractor
@ -15,7 +16,7 @@ from ..utils import (
class PicartoIE(InfoExtractor): class PicartoIE(InfoExtractor):
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)' _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
_TEST = { _TEST = {
'url': 'https://picarto.tv/Setz', 'url': 'https://picarto.tv/Setz',
'info_dict': { 'info_dict': {
@ -33,20 +34,14 @@ class PicartoIE(InfoExtractor):
return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url) return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
def _real_extract(self, url): def _real_extract(self, url):
channel_id = self._match_id(url) mobj = re.match(self._VALID_URL, url)
stream_page = self._download_webpage(url, channel_id) channel_id = mobj.group('id')
if '>This channel does not exist' in stream_page: metadata = self._download_json(
raise ExtractorError( 'https://api.picarto.tv/v1/channel/name/' + channel_id,
'Channel %s does not exist' % channel_id, expected=True) channel_id)
player = self._parse_json( if metadata.get('online') is False:
self._search_regex(
r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page,
'player settings'),
channel_id, transform_source=js_to_json)
if player.get('online') is False:
raise ExtractorError('Stream is offline', expected=True) raise ExtractorError('Stream is offline', expected=True)
cdn_data = self._download_json( cdn_data = self._download_json(
@ -54,20 +49,13 @@ class PicartoIE(InfoExtractor):
data=urlencode_postdata({'loadbalancinginfo': channel_id}), data=urlencode_postdata({'loadbalancinginfo': channel_id}),
note='Downloading load balancing info') note='Downloading load balancing info')
def get_event(key): token = mobj.group('token') or 'public'
return try_get(player, lambda x: x['event'][key], compat_str) or ''
params = { params = {
'token': player.get('token') or '',
'ticket': get_event('ticket'),
'con': int(time.time() * 1000), 'con': int(time.time() * 1000),
'type': get_event('ticket'), 'token': token,
'scope': get_event('scope'),
} }
prefered_edge = cdn_data.get('preferedEdge') prefered_edge = cdn_data.get('preferedEdge')
default_tech = player.get('defaultTech')
formats = [] formats = []
for edge in cdn_data['edges']: for edge in cdn_data['edges']:
@ -81,8 +69,6 @@ class PicartoIE(InfoExtractor):
preference = 0 preference = 0
if edge_id == prefered_edge: if edge_id == prefered_edge:
preference += 1 preference += 1
if tech_type == default_tech:
preference += 1
format_id = [] format_id = []
if edge_id: if edge_id:
format_id.append(edge_id) format_id.append(edge_id)
@ -109,7 +95,7 @@ class PicartoIE(InfoExtractor):
continue continue
self._sort_formats(formats) self._sort_formats(formats)
mature = player.get('mature') mature = metadata.get('adult')
if mature is None: if mature is None:
age_limit = None age_limit = None
else: else:
@ -117,9 +103,11 @@ class PicartoIE(InfoExtractor):
return { return {
'id': channel_id, 'id': channel_id,
'title': self._live_title(channel_id), 'title': self._live_title(metadata.get('title') or channel_id),
'is_live': True, 'is_live': True,
'thumbnail': player.get('vodThumb'), 'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']),
'channel': channel_id,
'channel_url': 'https://picarto.tv/%s' % channel_id,
'age_limit': age_limit, 'age_limit': age_limit,
'formats': formats, 'formats': formats,
} }

View File

@ -8,7 +8,10 @@ from ..compat import compat_HTTPError
from ..utils import ( from ..utils import (
float_or_none, float_or_none,
parse_iso8601, parse_iso8601,
str_or_none,
try_get,
unescapeHTML, unescapeHTML,
url_or_none,
ExtractorError, ExtractorError,
) )
@ -17,65 +20,87 @@ class RteBaseIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
item_id = self._match_id(url) item_id = self._match_id(url)
try: info_dict = {}
json_string = self._download_json(
'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id,
item_id)
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
if error_info:
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, error_info['message']),
expected=True)
raise
# NB the string values in the JSON are stored using XML escaping(!)
show = json_string['shows'][0]
title = unescapeHTML(show['title'])
description = unescapeHTML(show.get('description'))
thumbnail = show.get('thumbnail')
duration = float_or_none(show.get('duration'), 1000)
timestamp = parse_iso8601(show.get('published'))
mg = show['media:group'][0]
formats = [] formats = []
if mg.get('url'): ENDPOINTS = (
m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url']) 'https://feeds.rasset.ie/rteavgen/player/playlist?type=iptv&format=json&showId=',
if m: 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=',
m = m.groupdict() )
formats.append({
'url': m['url'] + '/' + m['app'],
'app': m['app'],
'play_path': m['playpath'],
'player_url': url,
'ext': 'flv',
'format_id': 'rtmp',
})
if mg.get('hls_server') and mg.get('hls_url'): for num, ep_url in enumerate(ENDPOINTS, start=1):
formats.extend(self._extract_m3u8_formats( try:
mg['hls_server'] + mg['hls_url'], item_id, 'mp4', data = self._download_json(ep_url + item_id, item_id)
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) except ExtractorError as ee:
if num < len(ENDPOINTS) or formats:
continue
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
if error_info:
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, error_info['message']),
expected=True)
raise
if mg.get('hds_server') and mg.get('hds_url'): # NB the string values in the JSON are stored using XML escaping(!)
formats.extend(self._extract_f4m_formats( show = try_get(data, lambda x: x['shows'][0], dict)
mg['hds_server'] + mg['hds_url'], item_id, if not show:
f4m_id='hds', fatal=False)) continue
if not info_dict:
title = unescapeHTML(show['title'])
description = unescapeHTML(show.get('description'))
thumbnail = show.get('thumbnail')
duration = float_or_none(show.get('duration'), 1000)
timestamp = parse_iso8601(show.get('published'))
info_dict = {
'id': item_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
}
mg = try_get(show, lambda x: x['media:group'][0], dict)
if not mg:
continue
if mg.get('url'):
m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
if m:
m = m.groupdict()
formats.append({
'url': m['url'] + '/' + m['app'],
'app': m['app'],
'play_path': m['playpath'],
'player_url': url,
'ext': 'flv',
'format_id': 'rtmp',
})
if mg.get('hls_server') and mg.get('hls_url'):
formats.extend(self._extract_m3u8_formats(
mg['hls_server'] + mg['hls_url'], item_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
if mg.get('hds_server') and mg.get('hds_url'):
formats.extend(self._extract_f4m_formats(
mg['hds_server'] + mg['hds_url'], item_id,
f4m_id='hds', fatal=False))
mg_rte_server = str_or_none(mg.get('rte:server'))
mg_url = str_or_none(mg.get('url'))
if mg_rte_server and mg_url:
hds_url = url_or_none(mg_rte_server + mg_url)
if hds_url:
formats.extend(self._extract_f4m_formats(
hds_url, item_id, f4m_id='hds', fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
return { info_dict['formats'] = formats
'id': item_id, return info_dict
'title': title,
'description': description,
'thumbnail': thumbnail,
'timestamp': timestamp,
'duration': duration,
'formats': formats,
}
class RteIE(RteBaseIE): class RteIE(RteBaseIE):

View File

@ -65,7 +65,8 @@ class RuutuIE(InfoExtractor):
video_id = self._match_id(url) video_id = self._match_id(url)
video_xml = self._download_xml( video_xml = self._download_xml(
'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id, video_id) 'https://gatling.nelonenmedia.fi/media-xml-cache', video_id,
query={'id': video_id})
formats = [] formats = []
processed_urls = [] processed_urls = []

View File

@ -5,6 +5,7 @@ from ..compat import compat_b64decode
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
url_or_none,
urlencode_postdata, urlencode_postdata,
) )
@ -86,9 +87,16 @@ class VivoIE(SharedBaseIE):
} }
def _extract_video_url(self, webpage, video_id, *args): def _extract_video_url(self, webpage, video_id, *args):
def decode_url(encoded_url):
return compat_b64decode(encoded_url).decode('utf-8')
stream_url = url_or_none(decode_url(self._search_regex(
r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
'stream url', default=None, group='url')))
if stream_url:
return stream_url
return self._parse_json( return self._parse_json(
self._search_regex( self._search_regex(
r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
webpage, 'stream', group='url'), webpage, 'stream', group='url'),
video_id, video_id, transform_source=decode_url)[0]
transform_source=lambda x: compat_b64decode(x).decode('utf-8'))[0]

View File

@ -18,8 +18,9 @@ from ..utils import (
class TNAFlixNetworkBaseIE(InfoExtractor): class TNAFlixNetworkBaseIE(InfoExtractor):
# May be overridden in descendants if necessary # May be overridden in descendants if necessary
_CONFIG_REGEX = [ _CONFIG_REGEX = [
r'flashvars\.config\s*=\s*escape\("([^"]+)"', r'flashvars\.config\s*=\s*escape\("(?P<url>[^"]+)"',
r'<input[^>]+name="config\d?" value="([^"]+)"', r'<input[^>]+name="config\d?" value="(?P<url>[^"]+)"',
r'config\s*=\s*(["\'])(?P<url>(?:https?:)?//(?:(?!\1).)+)\1',
] ]
_HOST = 'tna' _HOST = 'tna'
_VKEY_SUFFIX = '' _VKEY_SUFFIX = ''
@ -85,7 +86,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
cfg_url = self._proto_relative_url(self._html_search_regex( cfg_url = self._proto_relative_url(self._html_search_regex(
self._CONFIG_REGEX, webpage, 'flashvars.config', default=None), 'http:') self._CONFIG_REGEX, webpage, 'flashvars.config', default=None,
group='url'), 'http:')
if not cfg_url: if not cfg_url:
inputs = self._hidden_inputs(webpage) inputs = self._hidden_inputs(webpage)

View File

@ -0,0 +1,60 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
import re
class TwitCastingIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)'
_TEST = {
'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609',
'md5': '745243cad58c4681dc752490f7540d7f',
'info_dict': {
'id': '2357609',
'ext': 'mp4',
'title': 'Recorded Live #2357609',
'uploader_id': 'ivetesangalo',
'description': "Moi! I'm live on TwitCasting from my iPhone.",
'thumbnail': r're:^https?://.*\.jpg$',
},
'params': {
'skip_download': True,
},
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
uploader_id = mobj.group('uploader_id')
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(
r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)</',
webpage, 'title', default=None) or self._html_search_meta(
'twitter:title', webpage, fatal=True)
m3u8_url = self._search_regex(
(r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
r'(["\'])(?P<url>http.+?\.m3u8.*?)\1'),
webpage, 'm3u8 url', group='url')
formats = self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
thumbnail = self._og_search_thumbnail(webpage)
description = self._og_search_description(
webpage, default=None) or self._html_search_meta(
'twitter:description', webpage)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'uploader_id': uploader_id,
'formats': formats,
}

View File

@ -293,8 +293,12 @@ class VKIE(VKBaseIE):
# This video is no longer available, because its author has been blocked. # This video is no longer available, because its author has been blocked.
'url': 'https://vk.com/video-10639516_456240611', 'url': 'https://vk.com/video-10639516_456240611',
'only_matching': True, 'only_matching': True,
} },
] {
# The video is not available in your region.
'url': 'https://vk.com/video-51812607_171445436',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
@ -354,6 +358,9 @@ class VKIE(VKBaseIE):
r'<!>This video is no longer available, because it has been deleted.': r'<!>This video is no longer available, because it has been deleted.':
'Video %s is no longer available, because it has been deleted.', 'Video %s is no longer available, because it has been deleted.',
r'<!>The video .+? is not available in your region.':
'Video %s is not available in your region.',
} }
for error_re, error_msg in ERRORS.items(): for error_re, error_msg in ERRORS.items():

140
youtube_dl/extractor/wwe.py Normal file
View File

@ -0,0 +1,140 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
try_get,
unescapeHTML,
url_or_none,
urljoin,
)
class WWEBaseIE(InfoExtractor):
_SUBTITLE_LANGS = {
'English': 'en',
'Deutsch': 'de',
}
def _extract_entry(self, data, url, video_id=None):
video_id = compat_str(video_id or data['nid'])
title = data['title']
formats = self._extract_m3u8_formats(
data['file'], video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
description = data.get('description')
thumbnail = urljoin(url, data.get('image'))
series = data.get('show_name')
episode = data.get('episode_name')
subtitles = {}
tracks = data.get('tracks')
if isinstance(tracks, list):
for track in tracks:
if not isinstance(track, dict):
continue
if track.get('kind') != 'captions':
continue
track_file = url_or_none(track.get('file'))
if not track_file:
continue
label = track.get('label')
lang = self._SUBTITLE_LANGS.get(label, label) or 'en'
subtitles.setdefault(lang, []).append({
'url': track_file,
})
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'series': series,
'episode': episode,
'formats': formats,
'subtitles': subtitles,
}
class WWEIE(WWEBaseIE):
_VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*videos/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.wwe.com/videos/daniel-bryan-vs-andrade-cien-almas-smackdown-live-sept-4-2018',
'md5': '92811c6a14bfc206f7a6a9c5d9140184',
'info_dict': {
'id': '40048199',
'ext': 'mp4',
'title': 'Daniel Bryan vs. Andrade "Cien" Almas: SmackDown LIVE, Sept. 4, 2018',
'description': 'md5:2d7424dbc6755c61a0e649d2a8677f67',
'thumbnail': r're:^https?://.*\.jpg$',
}
}, {
'url': 'https://de.wwe.com/videos/gran-metalik-vs-tony-nese-wwe-205-live-sept-4-2018',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
landing = self._parse_json(
self._html_search_regex(
r'(?s)Drupal\.settings\s*,\s*({.+?})\s*\)\s*;',
webpage, 'drupal settings'),
display_id)['WWEVideoLanding']
data = landing['initialVideo']['playlist'][0]
video_id = landing.get('initialVideoId')
info = self._extract_entry(data, url, video_id)
info['display_id'] = display_id
return info
class WWEPlaylistIE(WWEBaseIE):
_VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.wwe.com/shows/raw/2018-11-12',
'info_dict': {
'id': '2018-11-12',
},
'playlist_mincount': 11,
}, {
'url': 'http://www.wwe.com/article/walk-the-prank-wwe-edition',
'only_matching': True,
}, {
'url': 'https://www.wwe.com/shows/wwenxt/article/matt-riddle-interview',
'only_matching': True,
}]
@classmethod
def suitable(cls, url):
return False if WWEIE.suitable(url) else super(WWEPlaylistIE, cls).suitable(url)
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
entries = []
for mobj in re.finditer(
r'data-video\s*=\s*(["\'])(?P<data>{.+?})\1', webpage):
video = self._parse_json(
mobj.group('data'), display_id, transform_source=unescapeHTML,
fatal=False)
if not video:
continue
data = try_get(video, lambda x: x['playlist'][0], dict)
if not data:
continue
try:
entry = self._extract_entry(data, url)
except Exception:
continue
entry['extractor_key'] = WWEIE.ie_key()
entries.append(entry)
return self.playlist_result(entries, display_id)

View File

@ -41,6 +41,7 @@ from ..utils import (
remove_quotes, remove_quotes,
remove_start, remove_start,
smuggle_url, smuggle_url,
str_or_none,
str_to_int, str_to_int,
try_get, try_get,
unescapeHTML, unescapeHTML,
@ -501,6 +502,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'categories': ['Science & Technology'], 'categories': ['Science & Technology'],
'tags': ['youtube-dl'], 'tags': ['youtube-dl'],
'duration': 10, 'duration': 10,
'view_count': int,
'like_count': int, 'like_count': int,
'dislike_count': int, 'dislike_count': int,
'start_time': 1, 'start_time': 1,
@ -583,6 +585,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'categories': ['Science & Technology'], 'categories': ['Science & Technology'],
'tags': ['youtube-dl'], 'tags': ['youtube-dl'],
'duration': 10, 'duration': 10,
'view_count': int,
'like_count': int, 'like_count': int,
'dislike_count': int, 'dislike_count': int,
}, },
@ -1189,7 +1192,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(', r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('), r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
jscode, 'Initial JS player signature function name', group='sig') jscode, 'Initial JS player signature function name', group='sig')
jsi = JSInterpreter(jscode) jsi = JSInterpreter(jscode)
@ -1538,6 +1542,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def extract_view_count(v_info): def extract_view_count(v_info):
return int_or_none(try_get(v_info, lambda x: x['view_count'][0])) return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
player_response = {}
# Get video info # Get video info
embed_webpage = None embed_webpage = None
if re.search(r'player-age-gate-content">', video_webpage) is not None: if re.search(r'player-age-gate-content">', video_webpage) is not None:
@ -1580,6 +1586,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if args.get('livestream') == '1' or args.get('live_playback') == 1: if args.get('livestream') == '1' or args.get('live_playback') == 1:
is_live = True is_live = True
sts = ytplayer_config.get('sts') sts = ytplayer_config.get('sts')
if not player_response:
pl_response = str_or_none(args.get('player_response'))
if pl_response:
pl_response = self._parse_json(pl_response, video_id, fatal=False)
if isinstance(pl_response, dict):
player_response = pl_response
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
# We also try looking in get_video_info since it may contain different dashmpd # We also try looking in get_video_info since it may contain different dashmpd
# URL that points to a DASH manifest with possibly different itag set (some itags # URL that points to a DASH manifest with possibly different itag set (some itags
@ -1608,6 +1620,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not video_info_webpage: if not video_info_webpage:
continue continue
get_video_info = compat_parse_qs(video_info_webpage) get_video_info = compat_parse_qs(video_info_webpage)
if not player_response:
pl_response = get_video_info.get('player_response', [None])[0]
if isinstance(pl_response, dict):
player_response = pl_response
add_dash_mpd(get_video_info) add_dash_mpd(get_video_info)
if view_count is None: if view_count is None:
view_count = extract_view_count(get_video_info) view_count = extract_view_count(get_video_info)
@ -1653,9 +1669,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'"token" parameter not in video info for unknown reason', '"token" parameter not in video info for unknown reason',
video_id=video_id) video_id=video_id)
video_details = try_get(
player_response, lambda x: x['videoDetails'], dict) or {}
# title # title
if 'title' in video_info: if 'title' in video_info:
video_title = video_info['title'][0] video_title = video_info['title'][0]
elif 'title' in player_response:
video_title = video_details['title']
else: else:
self._downloader.report_warning('Unable to extract video title') self._downloader.report_warning('Unable to extract video title')
video_title = '_' video_title = '_'
@ -1718,6 +1739,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if view_count is None: if view_count is None:
view_count = extract_view_count(video_info) view_count = extract_view_count(video_info)
if view_count is None and video_details:
view_count = int_or_none(video_details.get('viewCount'))
# Check for "rental" videos # Check for "rental" videos
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
@ -1898,7 +1921,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
# uploader # uploader
video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str) video_uploader = try_get(
video_info, lambda x: x['author'][0],
compat_str) or str_or_none(video_details.get('author'))
if video_uploader: if video_uploader:
video_uploader = compat_urllib_parse_unquote_plus(video_uploader) video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
else: else:
@ -2011,12 +2036,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
like_count = _extract_count('like') like_count = _extract_count('like')
dislike_count = _extract_count('dislike') dislike_count = _extract_count('dislike')
if view_count is None:
view_count = str_to_int(self._search_regex(
r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
'view count', default=None))
# subtitles # subtitles
video_subtitles = self.extract_subtitles(video_id, video_webpage) video_subtitles = self.extract_subtitles(video_id, video_webpage)
automatic_captions = self.extract_automatic_captions(video_id, video_webpage) automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
video_duration = try_get( video_duration = try_get(
video_info, lambda x: int_or_none(x['length_seconds'][0])) video_info, lambda x: int_or_none(x['length_seconds'][0]))
if not video_duration:
video_duration = int_or_none(video_details.get('lengthSeconds'))
if not video_duration: if not video_duration:
video_duration = parse_duration(self._html_search_meta( video_duration = parse_duration(self._html_search_meta(
'duration', video_webpage, 'video duration')) 'duration', video_webpage, 'video duration'))
@ -2131,7 +2163,11 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
(?:https?://)? (?:https?://)?
(?:\w+\.)? (?:\w+\.)?
(?: (?:
youtube\.com/ (?:
youtube\.com|
invidio\.us
)
/
(?: (?:
(?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11})) (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
\? (?:.*?[&;])*? (?:p|a|list)= \? (?:.*?[&;])*? (?:p|a|list)=
@ -2244,6 +2280,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'description': 'md5:507cdcb5a49ac0da37a920ece610be80', 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
'categories': ['People & Blogs'], 'categories': ['People & Blogs'],
'tags': list, 'tags': list,
'view_count': int,
'like_count': int, 'like_count': int,
'dislike_count': int, 'dislike_count': int,
}, },
@ -2282,6 +2319,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
# music album playlist # music album playlist
'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM', 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
'only_matching': True,
}] }]
def _real_initialize(self): def _real_initialize(self):

View File

@ -22,7 +22,7 @@ class ZattooPlatformBaseIE(InfoExtractor):
_power_guide_hash = None _power_guide_hash = None
def _host_url(self): def _host_url(self):
return 'https://%s' % self._HOST return 'https://%s' % (self._API_HOST if hasattr(self, '_API_HOST') else self._HOST)
def _login(self): def _login(self):
username, password = self._get_login_info() username, password = self._get_login_info()
@ -286,6 +286,7 @@ class ZattooLiveIE(ZattooBaseIE):
class NetPlusIE(ZattooIE): class NetPlusIE(ZattooIE):
_NETRC_MACHINE = 'netplus' _NETRC_MACHINE = 'netplus'
_HOST = 'netplus.tv' _HOST = 'netplus.tv'
_API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{ _TESTS = [{
@ -300,7 +301,7 @@ class MNetTVIE(ZattooIE):
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{ _TESTS = [{
'url': 'https://www.tvplus.m-net.de/watch/abc/123-abc', 'url': 'https://tvplus.m-net.de/watch/abc/123-abc',
'only_matching': True, 'only_matching': True,
}] }]
@ -311,7 +312,7 @@ class WalyTVIE(ZattooIE):
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{ _TESTS = [{
'url': 'https://www.player.waly.tv/watch/abc/123-abc', 'url': 'https://player.waly.tv/watch/abc/123-abc',
'only_matching': True, 'only_matching': True,
}] }]
@ -319,6 +320,7 @@ class WalyTVIE(ZattooIE):
class BBVTVIE(ZattooIE): class BBVTVIE(ZattooIE):
_NETRC_MACHINE = 'bbvtv' _NETRC_MACHINE = 'bbvtv'
_HOST = 'bbv-tv.net' _HOST = 'bbv-tv.net'
_API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{ _TESTS = [{
@ -330,6 +332,7 @@ class BBVTVIE(ZattooIE):
class VTXTVIE(ZattooIE): class VTXTVIE(ZattooIE):
_NETRC_MACHINE = 'vtxtv' _NETRC_MACHINE = 'vtxtv'
_HOST = 'vtxtv.ch' _HOST = 'vtxtv.ch'
_API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{ _TESTS = [{
@ -341,6 +344,7 @@ class VTXTVIE(ZattooIE):
class MyVisionTVIE(ZattooIE): class MyVisionTVIE(ZattooIE):
_NETRC_MACHINE = 'myvisiontv' _NETRC_MACHINE = 'myvisiontv'
_HOST = 'myvisiontv.ch' _HOST = 'myvisiontv.ch'
_API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{ _TESTS = [{
@ -355,7 +359,7 @@ class GlattvisionTVIE(ZattooIE):
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{ _TESTS = [{
'url': 'https://www.iptv.glattvision.ch/watch/abc/123-abc', 'url': 'https://iptv.glattvision.ch/watch/abc/123-abc',
'only_matching': True, 'only_matching': True,
}] }]
@ -363,6 +367,7 @@ class GlattvisionTVIE(ZattooIE):
class SAKTVIE(ZattooIE): class SAKTVIE(ZattooIE):
_NETRC_MACHINE = 'saktv' _NETRC_MACHINE = 'saktv'
_HOST = 'saktv.ch' _HOST = 'saktv.ch'
_API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{ _TESTS = [{
@ -377,7 +382,7 @@ class EWETVIE(ZattooIE):
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{ _TESTS = [{
'url': 'https://www.tvonline.ewe.de/watch/abc/123-abc', 'url': 'https://tvonline.ewe.de/watch/abc/123-abc',
'only_matching': True, 'only_matching': True,
}] }]
@ -385,6 +390,7 @@ class EWETVIE(ZattooIE):
class QuantumTVIE(ZattooIE): class QuantumTVIE(ZattooIE):
_NETRC_MACHINE = 'quantumtv' _NETRC_MACHINE = 'quantumtv'
_HOST = 'quantum-tv.com' _HOST = 'quantum-tv.com'
_API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{ _TESTS = [{
@ -395,11 +401,11 @@ class QuantumTVIE(ZattooIE):
class OsnatelTVIE(ZattooIE): class OsnatelTVIE(ZattooIE):
_NETRC_MACHINE = 'osnateltv' _NETRC_MACHINE = 'osnateltv'
_HOST = 'onlinetv.osnatel.de' _HOST = 'tvonline.osnatel.de'
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{ _TESTS = [{
'url': 'https://www.onlinetv.osnatel.de/watch/abc/123-abc', 'url': 'https://tvonline.osnatel.de/watch/abc/123-abc',
'only_matching': True, 'only_matching': True,
}] }]
@ -407,6 +413,7 @@ class OsnatelTVIE(ZattooIE):
class EinsUndEinsTVIE(ZattooIE): class EinsUndEinsTVIE(ZattooIE):
_NETRC_MACHINE = '1und1tv' _NETRC_MACHINE = '1und1tv'
_HOST = '1und1.tv' _HOST = '1und1.tv'
_API_HOST = 'www.%s' % _HOST
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
_TESTS = [{ _TESTS = [{

View File

@ -0,0 +1,57 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
class ZypeIE(InfoExtractor):
_VALID_URL = r'https?://player\.zype\.com/embed/(?P<id>[\da-fA-F]+)\.js\?.*?api_key=[^&]+'
_TEST = {
'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false',
'md5': 'eaee31d474c76a955bdaba02a505c595',
'info_dict': {
'id': '5b400b834b32992a310622b9',
'ext': 'mp4',
'title': 'Smoky Barbecue Favorites',
'thumbnail': r're:^https?://.*\.jpe?g',
},
}
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url')
for mobj in re.finditer(
r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//player\.zype\.com/embed/[\da-fA-F]+\.js\?.*?api_key=.+?)\1',
webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
title = self._search_regex(
r'video_title\s*[:=]\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
'title', group='value')
m3u8_url = self._search_regex(
r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1', webpage,
'm3u8 url', group='url')
formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
self._sort_formats(formats)
thumbnail = self._search_regex(
r'poster\s*[:=]\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'thumbnail',
default=False, group='url')
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'formats': formats,
}

View File

@ -1,3 +1,3 @@
from __future__ import unicode_literals from __future__ import unicode_literals
__version__ = '2018.10.29' __version__ = '2018.11.18'