mirror of
https://github.com/l1ving/youtube-dl
synced 2020-11-18 19:53:54 -08:00
Merge branch 'master' into PornHub-issue-16078
This commit is contained in:
commit
8481dbce73
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.29*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.29**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.18**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2018.10.29
|
||||
[debug] youtube-dl version 2018.11.18
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@ -296,5 +296,26 @@ title = self._search_regex(
|
||||
|
||||
### Use safe conversion functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
||||
Use `url_or_none` for safe URL processing.
|
||||
|
||||
Use `try_get` for safe metadata extraction from parsed JSON.
|
||||
|
||||
Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
|
||||
|
||||
#### More examples
|
||||
|
||||
##### Safely extract optional description from parsed JSON
|
||||
```python
|
||||
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
|
||||
```
|
||||
|
||||
##### Safely extract more optional metadata
|
||||
```python
|
||||
video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
|
||||
description = video.get('summary')
|
||||
duration = float_or_none(video.get('durationMs'), scale=1000)
|
||||
view_count = int_or_none(video.get('views'))
|
||||
```
|
||||
|
||||
|
51
ChangeLog
51
ChangeLog
@ -1,3 +1,54 @@
|
||||
version 2018.11.18
|
||||
|
||||
Extractors
|
||||
+ [wwe] Extract subtitles
|
||||
+ [wwe] Add support for playlistst (#14781)
|
||||
+ [wwe] Add support for wwe.com (#14781, #17450)
|
||||
* [vk] Detect geo restriction (#17767)
|
||||
* [openload] Use original host during extraction (#18211)
|
||||
* [atvat] Fix extraction (#18041)
|
||||
+ [rte] Add support for new API endpoint (#18206)
|
||||
* [tnaflixnetwork:embed] Fix extraction (#18205)
|
||||
* [picarto] Use API and add token support (#16518)
|
||||
+ [zype] Add support for player.zype.com (#18143)
|
||||
* [vivo] Fix extraction (#18139)
|
||||
* [ruutu] Update API endpoint (#18138)
|
||||
|
||||
|
||||
version 2018.11.07
|
||||
|
||||
Extractors
|
||||
+ [youtube] Add another JS signature function name regex (#18091, #18093,
|
||||
#18094)
|
||||
* [facebook] Fix tahoe request (#17171)
|
||||
* [cliphunter] Fix extraction (#18083)
|
||||
+ [youtube:playlist] Add support for invidio.us (#18077)
|
||||
* [zattoo] Arrange API hosts for derived extractors (#18035)
|
||||
+ [youtube] Add fallback metadata extraction from videoDetails (#18052)
|
||||
|
||||
|
||||
version 2018.11.03
|
||||
|
||||
Core
|
||||
* [extractor/common] Ensure response handle is not prematurely closed before
|
||||
it can be read if it matches expected_status (#17195, #17846, #17447)
|
||||
|
||||
Extractors
|
||||
* [laola1tv:embed] Set correct stream access URL scheme (#16341)
|
||||
+ [ehftv] Add support for ehftv.com (#15408)
|
||||
* [azmedien] Adopt to major site redesign (#17745, #17746)
|
||||
+ [twitcasting] Add support for twitcasting.tv (#17981)
|
||||
* [orf:tvthek] Fix extraction (#17737, #17956, #18024)
|
||||
+ [openload] Add support for oload.fun (#18045)
|
||||
* [njpwworld] Fix authentication (#17427)
|
||||
+ [linkedin:learning] Add support for linkedin.com/learning (#13545)
|
||||
* [theplatform] Improve error detection (#13222)
|
||||
* [cnbc] Simplify extraction (#14280, #17110)
|
||||
+ [cbnc] Add support for new URL schema (#14193)
|
||||
* [aparat] Improve extraction and extract more metadata (#17445, #18008)
|
||||
* [aparat] Fix extraction
|
||||
|
||||
|
||||
version 2018.10.29
|
||||
|
||||
Core
|
||||
|
23
README.md
23
README.md
@ -1168,7 +1168,28 @@ title = self._search_regex(
|
||||
|
||||
### Use safe conversion functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
||||
Use `url_or_none` for safe URL processing.
|
||||
|
||||
Use `try_get` for safe metadata extraction from parsed JSON.
|
||||
|
||||
Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
|
||||
|
||||
#### More examples
|
||||
|
||||
##### Safely extract optional description from parsed JSON
|
||||
```python
|
||||
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
|
||||
```
|
||||
|
||||
##### Safely extract more optional metadata
|
||||
```python
|
||||
video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
|
||||
description = video.get('summary')
|
||||
duration = float_or_none(video.get('durationMs'), scale=1000)
|
||||
view_count = int_or_none(video.get('views'))
|
||||
```
|
||||
|
||||
# EMBEDDING YOUTUBE-DL
|
||||
|
||||
|
@ -84,8 +84,6 @@
|
||||
- **awaan:season**
|
||||
- **awaan:video**
|
||||
- **AZMedien**: AZ Medien videos
|
||||
- **AZMedienPlaylist**: AZ Medien playlists
|
||||
- **AZMedienShowPlaylist**: AZ Medien show playlists
|
||||
- **BaiduVideo**: 百度视频
|
||||
- **bambuser**
|
||||
- **bambuser:channel**
|
||||
@ -178,6 +176,7 @@
|
||||
- **Clyp**
|
||||
- **cmt.com**
|
||||
- **CNBC**
|
||||
- **CNBCVideo**
|
||||
- **CNN**
|
||||
- **CNNArticle**
|
||||
- **CNNBlogs**
|
||||
@ -251,6 +250,7 @@
|
||||
- **EchoMsk**
|
||||
- **egghead:course**: egghead.io course
|
||||
- **egghead:lesson**: egghead.io lesson
|
||||
- **ehftv**
|
||||
- **eHow**
|
||||
- **EinsUndEinsTV**
|
||||
- **Einthusan**
|
||||
@ -445,6 +445,8 @@
|
||||
- **limelight:channel**
|
||||
- **limelight:channel_list**
|
||||
- **LineTV**
|
||||
- **linkedin:learning**
|
||||
- **linkedin:learning:course**
|
||||
- **LiTV**
|
||||
- **LiveLeak**
|
||||
- **LiveLeakEmbed**
|
||||
@ -930,6 +932,7 @@
|
||||
- **TVPlayer**
|
||||
- **TVPlayHome**
|
||||
- **Tweakers**
|
||||
- **TwitCasting**
|
||||
- **twitch:chapter**
|
||||
- **twitch:clips**
|
||||
- **twitch:profile**
|
||||
@ -1077,6 +1080,7 @@
|
||||
- **wrzuta.pl:playlist**
|
||||
- **WSJ**: Wall Street Journal
|
||||
- **WSJArticle**
|
||||
- **WWE**
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
|
||||
@ -1136,3 +1140,4 @@
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
- **zingmp3**: mp3.zing.vn
|
||||
- **Zype**
|
||||
|
@ -7,6 +7,7 @@ import json
|
||||
import os.path
|
||||
import re
|
||||
import types
|
||||
import ssl
|
||||
import sys
|
||||
|
||||
import youtube_dl.extractor
|
||||
@ -244,3 +245,12 @@ def expect_warnings(ydl, warnings_re):
|
||||
real_warning(w)
|
||||
|
||||
ydl.report_warning = _report_warning
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
@ -9,11 +9,30 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL, expect_dict, expect_value
|
||||
from youtube_dl.compat import compat_etree_fromstring
|
||||
from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
|
||||
from youtube_dl.compat import compat_etree_fromstring, compat_http_server
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
||||
import threading
|
||||
|
||||
|
||||
TEAPOT_RESPONSE_STATUS = 418
|
||||
TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
|
||||
|
||||
|
||||
class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == '/teapot':
|
||||
self.send_response(TEAPOT_RESPONSE_STATUS)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
|
||||
else:
|
||||
assert False
|
||||
|
||||
|
||||
class TestIE(InfoExtractor):
|
||||
@ -743,6 +762,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||
for i in range(len(entries)):
|
||||
expect_dict(self, entries[i], expected_entries[i])
|
||||
|
||||
def test_response_with_expected_status_returns_content(self):
|
||||
# Checks for mitigations against the effects of
|
||||
# <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which
|
||||
# manifest as `_download_webpage`, `_download_xml`, `_download_json`,
|
||||
# or the underlying `_download_webpage_handle` returning no content
|
||||
# when a response matches `expected_status`.
|
||||
|
||||
httpd = compat_http_server.HTTPServer(
|
||||
('127.0.0.1', 0), InfoExtractorTestRequestHandler)
|
||||
port = http_server_port(httpd)
|
||||
server_thread = threading.Thread(target=httpd.serve_forever)
|
||||
server_thread.daemon = True
|
||||
server_thread.start()
|
||||
|
||||
(content, urlh) = self.ie._download_webpage_handle(
|
||||
'http://127.0.0.1:%d/teapot' % port, None,
|
||||
expected_status=TEAPOT_RESPONSE_STATUS)
|
||||
self.assertEqual(content, TEAPOT_RESPONSE_BODY)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -9,26 +9,16 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import try_rm
|
||||
from test.helper import http_server_port, try_rm
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_http_server
|
||||
from youtube_dl.downloader.http import HttpFD
|
||||
from youtube_dl.utils import encodeFilename
|
||||
import ssl
|
||||
import threading
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
||||
|
||||
TEST_SIZE = 10 * 1024
|
||||
|
||||
|
||||
|
@ -8,6 +8,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import http_server_port
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_http_server, compat_urllib_request
|
||||
import ssl
|
||||
@ -16,15 +17,6 @@ import threading
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
||||
|
||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
@ -28,8 +28,10 @@ class ATVAtIE(InfoExtractor):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_data = self._parse_json(unescapeHTML(self._search_regex(
|
||||
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"',
|
||||
webpage, 'player data')), display_id)['config']['initial_video']
|
||||
[r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1',
|
||||
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'],
|
||||
webpage, 'player data', group='json')),
|
||||
display_id)['config']['initial_video']
|
||||
|
||||
video_id = video_data['id']
|
||||
video_title = video_data['title']
|
||||
|
@ -1,213 +1,90 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
strip_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class AZMedienBaseIE(InfoExtractor):
|
||||
def _kaltura_video(self, partner_id, entry_id):
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
|
||||
video_id=entry_id)
|
||||
|
||||
|
||||
class AZMedienIE(AZMedienBaseIE):
|
||||
class AZMedienIE(InfoExtractor):
|
||||
IE_DESC = 'AZ Medien videos'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
(?P<host>
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
[0-9]+-show-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-episode-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-segment-(?:[^/\#]+\#)?|
|
||||
\#
|
||||
)|
|
||||
\#
|
||||
[^/]+/
|
||||
(?P<id>
|
||||
[^/]+-(?P<article_id>\d+)
|
||||
)
|
||||
(?P<id>[^\#]+)
|
||||
(?:
|
||||
\#video=
|
||||
(?P<kaltura_id>
|
||||
[_0-9a-z]+
|
||||
)
|
||||
)?
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# URL with 'segment'
|
||||
'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
|
||||
'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
|
||||
'info_dict': {
|
||||
'id': '1_2444peh4',
|
||||
'id': '1_anruz3wy',
|
||||
'ext': 'mp4',
|
||||
'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
|
||||
'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
|
||||
'uploader_id': 'TeleZ?ri',
|
||||
'upload_date': '20161218',
|
||||
'timestamp': 1482084490,
|
||||
'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
|
||||
'description': 'md5:dd9f96751ec9c35e409a698a328402f3',
|
||||
'uploader_id': 'TVOnline',
|
||||
'upload_date': '20180930',
|
||||
'timestamp': 1538328802,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# URL with 'segment' and fragment:
|
||||
'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
|
||||
'only_matching': True
|
||||
}, {
|
||||
# URL with 'episode' and fragment:
|
||||
'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
|
||||
'only_matching': True
|
||||
}, {
|
||||
# URL with 'show' and fragment:
|
||||
'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
|
||||
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
|
||||
webpage, 'kaltura partner id')
|
||||
entry_id = self._html_search_regex(
|
||||
r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
|
||||
% re.escape(video_id), webpage, 'kaltura entry id', group='id')
|
||||
|
||||
return self._kaltura_video(partner_id, entry_id)
|
||||
|
||||
|
||||
class AZMedienPlaylistIE(AZMedienBaseIE):
|
||||
IE_DESC = 'AZ Medien playlists'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
(?P<id>[0-9]+-
|
||||
(?:
|
||||
show|
|
||||
topic|
|
||||
themen
|
||||
)-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-episode-[^/\#]+
|
||||
)?
|
||||
)$
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# URL with 'episode'
|
||||
'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016',
|
||||
'info_dict': {
|
||||
'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016',
|
||||
'title': 'News - Donnerstag, 15. Dezember 2016',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
# URL with 'themen'
|
||||
'url': 'http://www.telem1.ch/258-themen-tele-m1-classics',
|
||||
'info_dict': {
|
||||
'id': '258-themen-tele-m1-classics',
|
||||
'title': 'Tele M1 Classics',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
# URL with 'topic', contains nested playlists
|
||||
'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# URL with 'show' only
|
||||
'url': 'http://www.telezueri.ch/86-show-talktaeglich',
|
||||
'only_matching': True
|
||||
}]
|
||||
_PARTNER_ID = '1719221'
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
entry_id = mobj.group('kaltura_id')
|
||||
|
||||
entries = []
|
||||
if not entry_id:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
api_path = self._search_regex(
|
||||
r'["\']apiPath["\']\s*:\s*["\']([^"^\']+)["\']',
|
||||
webpage, 'api path')
|
||||
api_url = 'https://www.%s%s' % (mobj.group('host'), api_path)
|
||||
payload = {
|
||||
'query': '''query VideoContext($articleId: ID!) {
|
||||
article: node(id: $articleId) {
|
||||
... on Article {
|
||||
mainAssetRelation {
|
||||
asset {
|
||||
... on VideoAsset {
|
||||
kalturaId
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}''',
|
||||
'variables': {'articleId': 'Article:%s' % mobj.group('article_id')},
|
||||
}
|
||||
json_data = self._download_json(
|
||||
api_url, video_id, headers={
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
data=json.dumps(payload).encode())
|
||||
entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId']
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||
webpage, 'kaltura partner id', default=None)
|
||||
|
||||
if partner_id:
|
||||
entries = [
|
||||
self._kaltura_video(partner_id, m.group('id'))
|
||||
for m in re.finditer(
|
||||
r'data-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage)]
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
self.url_result(m.group('url'), ie=AZMedienIE.ie_key())
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
# May contain nested playlists (e.g. [1]) thus no explicit
|
||||
# ie_key
|
||||
# 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen)
|
||||
self.url_result(urljoin(url, m.group('url')))
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+name=[^>]+href=(["\'])(?P<url>/.+?)\1', webpage)]
|
||||
|
||||
title = self._search_regex(
|
||||
r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
webpage, 'title',
|
||||
default=strip_or_none(get_element_by_id(
|
||||
'video-title', webpage)), group='title')
|
||||
|
||||
return self.playlist_result(entries, show_id, title)
|
||||
|
||||
|
||||
class AZMedienShowPlaylistIE(AZMedienBaseIE):
|
||||
IE_DESC = 'AZ Medien show playlists'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
(?:
|
||||
all-episodes|
|
||||
alle-episoden
|
||||
)/
|
||||
(?P<id>[^/?#&]+)
|
||||
'''
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.telezueri.ch/all-episodes/astrotalk',
|
||||
'info_dict': {
|
||||
'id': 'astrotalk',
|
||||
'title': 'TeleZüri: AstroTalk - alle episoden',
|
||||
'description': 'md5:4c0f7e7d741d906004266e295ceb4a26',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
episodes = get_element_by_class('search-mobile-box', webpage)
|
||||
entries = [self.url_result(
|
||||
urljoin(url, m.group('url'))) for m in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)]
|
||||
title = self._og_search_title(webpage, fatal=False)
|
||||
description = self._og_search_description(webpage)
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
|
||||
ie=KalturaIE.ie_key(), video_id=entry_id)
|
||||
|
@ -1,19 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
_translation_table = {
|
||||
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
|
||||
'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
|
||||
'y': 'l', 'z': 'i',
|
||||
'$': ':', '&': '.', '(': '=', '^': '&', '=': '/',
|
||||
}
|
||||
|
||||
|
||||
def _decode(s):
|
||||
return ''.join(_translation_table.get(c, c) for c in s)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CliphunterIE(InfoExtractor):
|
||||
@ -60,14 +51,14 @@ class CliphunterIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for format_id, f in gexo_files.items():
|
||||
video_url = f.get('url')
|
||||
video_url = url_or_none(f.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
fmt = f.get('fmt')
|
||||
height = f.get('h')
|
||||
format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
|
||||
formats.append({
|
||||
'url': _decode(video_url),
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'width': int_or_none(f.get('w')),
|
||||
'height': int_or_none(height),
|
||||
|
@ -606,6 +606,11 @@ class InfoExtractor(object):
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
if isinstance(err, compat_urllib_error.HTTPError):
|
||||
if self.__can_accept_status_code(err, expected_status):
|
||||
# Retain reference to error to prevent file object from
|
||||
# being closed before it can be read. Works around the
|
||||
# effects of <https://bugs.python.org/issue15002>
|
||||
# introduced in Python 3.4.1.
|
||||
err.fp._error = err
|
||||
return err.fp
|
||||
|
||||
if errnote is False:
|
||||
|
@ -88,11 +88,7 @@ from .awaan import (
|
||||
AWAANLiveIE,
|
||||
AWAANSeasonIE,
|
||||
)
|
||||
from .azmedien import (
|
||||
AZMedienIE,
|
||||
AZMedienPlaylistIE,
|
||||
AZMedienShowPlaylistIE,
|
||||
)
|
||||
from .azmedien import AZMedienIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||
@ -543,6 +539,7 @@ from .la7 import LA7IE
|
||||
from .laola1tv import (
|
||||
Laola1TvEmbedIE,
|
||||
Laola1TvIE,
|
||||
EHFTVIE,
|
||||
ITTFIE,
|
||||
)
|
||||
from .lci import LCIIE
|
||||
@ -1196,6 +1193,7 @@ from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .twentythreevideo import TwentyThreeVideoIE
|
||||
from .twitcasting import TwitCastingIE
|
||||
from .twitch import (
|
||||
TwitchVideoIE,
|
||||
TwitchChapterIE,
|
||||
@ -1388,6 +1386,7 @@ from .wsj import (
|
||||
WSJIE,
|
||||
WSJArticleIE,
|
||||
)
|
||||
from .wwe import WWEIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xfileshare import XFileShareIE
|
||||
@ -1480,3 +1479,4 @@ from .zattoo import (
|
||||
)
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zingmp3 import ZingMp3IE
|
||||
from .zype import ZypeIE
|
||||
|
@ -57,7 +57,7 @@ class FacebookIE(InfoExtractor):
|
||||
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
||||
|
||||
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
||||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true'
|
||||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
||||
|
@ -114,6 +114,7 @@ from .apa import APAIE
|
||||
from .foxnews import FoxNewsIE
|
||||
from .viqeo import ViqeoIE
|
||||
from .expressen import ExpressenIE
|
||||
from .zype import ZypeIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -2070,6 +2071,20 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 6,
|
||||
},
|
||||
{
|
||||
# Zype embed
|
||||
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
||||
'info_dict': {
|
||||
'id': '5b400b834b32992a310622b9',
|
||||
'ext': 'mp4',
|
||||
'title': 'Smoky Barbecue Favorites',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
},
|
||||
'add_ie': [ZypeIE.ie_key()],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# videojs embed
|
||||
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
|
||||
@ -3129,6 +3144,11 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
|
||||
|
||||
zype_urls = ZypeIE._extract_urls(webpage)
|
||||
if zype_urls:
|
||||
return self.playlist_from_matches(
|
||||
zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
|
||||
|
||||
# Look for HTML5 media
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||
if entries:
|
||||
|
@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@ -32,7 +33,8 @@ class Laola1TvEmbedIE(InfoExtractor):
|
||||
|
||||
def _extract_token_url(self, stream_access_url, video_id, data):
|
||||
return self._download_json(
|
||||
stream_access_url, video_id, headers={
|
||||
self._proto_relative_url(stream_access_url, 'https:'), video_id,
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
}, data=json.dumps(data).encode())['data']['stream-access'][0]
|
||||
|
||||
@ -119,9 +121,59 @@ class Laola1TvEmbedIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class Laola1TvIE(Laola1TvEmbedIE):
|
||||
class Laola1TvBaseIE(Laola1TvEmbedIE):
|
||||
def _extract_video(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if 'Dieser Livestream ist bereits beendet.' in webpage:
|
||||
raise ExtractorError('This live stream has already finished.', expected=True)
|
||||
|
||||
conf = self._parse_json(self._search_regex(
|
||||
r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
|
||||
display_id,
|
||||
transform_source=lambda s: js_to_json(re.sub(r'shareurl:.+,', '', s)))
|
||||
video_id = conf['videoid']
|
||||
|
||||
config = self._download_json(conf['configUrl'], video_id, query={
|
||||
'videoid': video_id,
|
||||
'partnerid': conf['partnerid'],
|
||||
'language': conf.get('language', ''),
|
||||
'portal': conf.get('portalid', ''),
|
||||
})
|
||||
error = config.get('error')
|
||||
if error:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
video_data = config['video']
|
||||
title = video_data['title']
|
||||
is_live = video_data.get('isLivestream') and video_data.get('isLive')
|
||||
meta = video_data.get('metaInformation')
|
||||
sports = meta.get('sports')
|
||||
categories = sports.split(',') if sports else []
|
||||
|
||||
token_url = self._extract_token_url(
|
||||
video_data['streamAccess'], video_id,
|
||||
video_data['abo']['required'])
|
||||
|
||||
formats = self._extract_formats(token_url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('image'),
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
||||
class Laola1TvIE(Laola1TvBaseIE):
|
||||
IE_NAME = 'laola1tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
|
||||
'info_dict': {
|
||||
@ -169,52 +221,30 @@ class Laola1TvIE(Laola1TvEmbedIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._extract_video(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if 'Dieser Livestream ist bereits beendet.' in webpage:
|
||||
raise ExtractorError('This live stream has already finished.', expected=True)
|
||||
class EHFTVIE(Laola1TvBaseIE):
|
||||
IE_NAME = 'ehftv'
|
||||
_VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(?:-[a-z]+)?/[^/]+/(?P<id>[^/?#&]+)'
|
||||
|
||||
conf = self._parse_json(self._search_regex(
|
||||
r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
|
||||
display_id, js_to_json)
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ehftv.com/int/video/paris-saint-germain-handball-pge-vive-kielce/1166761',
|
||||
'info_dict': {
|
||||
'id': '1166761',
|
||||
'display_id': 'paris-saint-germain-handball-pge-vive-kielce',
|
||||
'ext': 'mp4',
|
||||
'title': 'Paris Saint-Germain Handball - PGE Vive Kielce',
|
||||
'is_live': False,
|
||||
'categories': ['Handball'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
video_id = conf['videoid']
|
||||
|
||||
config = self._download_json(conf['configUrl'], video_id, query={
|
||||
'videoid': video_id,
|
||||
'partnerid': conf['partnerid'],
|
||||
'language': conf.get('language', ''),
|
||||
'portal': conf.get('portalid', ''),
|
||||
})
|
||||
error = config.get('error')
|
||||
if error:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
video_data = config['video']
|
||||
title = video_data['title']
|
||||
is_live = video_data.get('isLivestream') and video_data.get('isLive')
|
||||
meta = video_data.get('metaInformation')
|
||||
sports = meta.get('sports')
|
||||
categories = sports.split(',') if sports else []
|
||||
|
||||
token_url = self._extract_token_url(
|
||||
video_data['streamAccess'], video_id,
|
||||
video_data['abo']['required'])
|
||||
|
||||
formats = self._extract_formats(token_url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('image'),
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
}
|
||||
def _real_extract(self, url):
|
||||
return self._extract_video(url)
|
||||
|
||||
|
||||
class ITTFIE(InfoExtractor):
|
||||
|
@ -35,7 +35,7 @@ class NovaEmbedIE(InfoExtractor):
|
||||
|
||||
bitrates = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)bitrates\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
||||
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
||||
|
@ -243,7 +243,18 @@ class PhantomJSwrapper(object):
|
||||
|
||||
|
||||
class OpenloadIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?P<host>
|
||||
(?:www\.)?
|
||||
(?:
|
||||
openload\.(?:co|io|link)|
|
||||
oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun)
|
||||
)
|
||||
)/
|
||||
(?:f|embed)/
|
||||
(?P<id>[a-zA-Z0-9-_]+)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://openload.co/f/kUEfGclsU9o',
|
||||
@ -319,7 +330,10 @@ class OpenloadIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oload.icu/f/-_i4y_F_Hs8',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oload.fun/f/gb6G1H4sHXY',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
||||
@ -331,8 +345,11 @@ class OpenloadIE(InfoExtractor):
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url_pattern = 'https://openload.co/%%s/%s/' % video_id
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url_pattern = 'https://%s/%%s/%s/' % (host, video_id)
|
||||
headers = {
|
||||
'User-Agent': self._USER_AGENT,
|
||||
}
|
||||
@ -365,7 +382,7 @@ class OpenloadIE(InfoExtractor):
|
||||
r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
|
||||
'stream URL'))
|
||||
|
||||
video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
|
||||
video_url = 'https://%s/stream/%s?mime=true' % (host, decoded_id)
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
||||
@ -376,7 +393,7 @@ class OpenloadIE(InfoExtractor):
|
||||
entry = entries[0] if entries else {}
|
||||
subtitles = entry.get('subtitles')
|
||||
|
||||
info_dict = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
|
||||
@ -385,4 +402,3 @@ class OpenloadIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
'http_headers': headers,
|
||||
}
|
||||
return info_dict
|
||||
|
@ -15,6 +15,7 @@ from ..utils import (
|
||||
strip_jsonp,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -68,26 +69,35 @@ class ORFTVthekIE(InfoExtractor):
|
||||
webpage, 'playlist', group='json'),
|
||||
playlist_id, transform_source=unescapeHTML)['playlist']['videos']
|
||||
|
||||
def quality_to_int(s):
|
||||
m = re.search('([0-9]+)', s)
|
||||
if m is None:
|
||||
return -1
|
||||
return int(m.group(1))
|
||||
|
||||
entries = []
|
||||
for sd in data_jsb:
|
||||
video_id, title = sd.get('id'), sd.get('title')
|
||||
if not video_id or not title:
|
||||
continue
|
||||
video_id = compat_str(video_id)
|
||||
formats = [{
|
||||
'preference': -10 if fd['delivery'] == 'hls' else None,
|
||||
'format_id': '%s-%s-%s' % (
|
||||
fd['delivery'], fd['quality'], fd['quality_string']),
|
||||
'url': fd['src'],
|
||||
'protocol': fd['protocol'],
|
||||
'quality': quality_to_int(fd['quality']),
|
||||
} for fd in sd['sources']]
|
||||
formats = []
|
||||
for fd in sd['sources']:
|
||||
src = url_or_none(fd.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
format_id_list = []
|
||||
for key in ('delivery', 'quality', 'quality_string'):
|
||||
value = fd.get(key)
|
||||
if value:
|
||||
format_id_list.append(value)
|
||||
format_id = '-'.join(format_id_list)
|
||||
if determine_ext(fd['src']) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
fd['src'], video_id, 'mp4', m3u8_id=format_id))
|
||||
elif determine_ext(fd['src']) == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
fd['src'], video_id, f4m_id=format_id))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': src,
|
||||
'protocol': fd.get('protocol'),
|
||||
})
|
||||
|
||||
# Check for geoblocking.
|
||||
# There is a property is_geoprotection, but that's always false
|
||||
|
@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -15,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class PicartoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
|
||||
_TEST = {
|
||||
'url': 'https://picarto.tv/Setz',
|
||||
'info_dict': {
|
||||
@ -33,20 +34,14 @@ class PicartoIE(InfoExtractor):
|
||||
return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
stream_page = self._download_webpage(url, channel_id)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
|
||||
if '>This channel does not exist' in stream_page:
|
||||
raise ExtractorError(
|
||||
'Channel %s does not exist' % channel_id, expected=True)
|
||||
metadata = self._download_json(
|
||||
'https://api.picarto.tv/v1/channel/name/' + channel_id,
|
||||
channel_id)
|
||||
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page,
|
||||
'player settings'),
|
||||
channel_id, transform_source=js_to_json)
|
||||
|
||||
if player.get('online') is False:
|
||||
if metadata.get('online') is False:
|
||||
raise ExtractorError('Stream is offline', expected=True)
|
||||
|
||||
cdn_data = self._download_json(
|
||||
@ -54,20 +49,13 @@ class PicartoIE(InfoExtractor):
|
||||
data=urlencode_postdata({'loadbalancinginfo': channel_id}),
|
||||
note='Downloading load balancing info')
|
||||
|
||||
def get_event(key):
|
||||
return try_get(player, lambda x: x['event'][key], compat_str) or ''
|
||||
|
||||
token = mobj.group('token') or 'public'
|
||||
params = {
|
||||
'token': player.get('token') or '',
|
||||
'ticket': get_event('ticket'),
|
||||
'con': int(time.time() * 1000),
|
||||
'type': get_event('ticket'),
|
||||
'scope': get_event('scope'),
|
||||
'token': token,
|
||||
}
|
||||
|
||||
prefered_edge = cdn_data.get('preferedEdge')
|
||||
default_tech = player.get('defaultTech')
|
||||
|
||||
formats = []
|
||||
|
||||
for edge in cdn_data['edges']:
|
||||
@ -81,8 +69,6 @@ class PicartoIE(InfoExtractor):
|
||||
preference = 0
|
||||
if edge_id == prefered_edge:
|
||||
preference += 1
|
||||
if tech_type == default_tech:
|
||||
preference += 1
|
||||
format_id = []
|
||||
if edge_id:
|
||||
format_id.append(edge_id)
|
||||
@ -109,7 +95,7 @@ class PicartoIE(InfoExtractor):
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
|
||||
mature = player.get('mature')
|
||||
mature = metadata.get('adult')
|
||||
if mature is None:
|
||||
age_limit = None
|
||||
else:
|
||||
@ -117,9 +103,11 @@ class PicartoIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'title': self._live_title(channel_id),
|
||||
'title': self._live_title(metadata.get('title') or channel_id),
|
||||
'is_live': True,
|
||||
'thumbnail': player.get('vodThumb'),
|
||||
'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']),
|
||||
'channel': channel_id,
|
||||
'channel_url': 'https://picarto.tv/%s' % channel_id,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -8,7 +8,10 @@ from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
@ -17,65 +20,87 @@ class RteBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
json_string = self._download_json(
|
||||
'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id,
|
||||
item_id)
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
||||
error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
|
||||
if error_info:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error_info['message']),
|
||||
expected=True)
|
||||
raise
|
||||
|
||||
# NB the string values in the JSON are stored using XML escaping(!)
|
||||
show = json_string['shows'][0]
|
||||
title = unescapeHTML(show['title'])
|
||||
description = unescapeHTML(show.get('description'))
|
||||
thumbnail = show.get('thumbnail')
|
||||
duration = float_or_none(show.get('duration'), 1000)
|
||||
timestamp = parse_iso8601(show.get('published'))
|
||||
|
||||
mg = show['media:group'][0]
|
||||
|
||||
info_dict = {}
|
||||
formats = []
|
||||
|
||||
if mg.get('url'):
|
||||
m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
|
||||
if m:
|
||||
m = m.groupdict()
|
||||
formats.append({
|
||||
'url': m['url'] + '/' + m['app'],
|
||||
'app': m['app'],
|
||||
'play_path': m['playpath'],
|
||||
'player_url': url,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp',
|
||||
})
|
||||
ENDPOINTS = (
|
||||
'https://feeds.rasset.ie/rteavgen/player/playlist?type=iptv&format=json&showId=',
|
||||
'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=',
|
||||
)
|
||||
|
||||
if mg.get('hls_server') and mg.get('hls_url'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
mg['hls_server'] + mg['hls_url'], item_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
for num, ep_url in enumerate(ENDPOINTS, start=1):
|
||||
try:
|
||||
data = self._download_json(ep_url + item_id, item_id)
|
||||
except ExtractorError as ee:
|
||||
if num < len(ENDPOINTS) or formats:
|
||||
continue
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
||||
error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
|
||||
if error_info:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error_info['message']),
|
||||
expected=True)
|
||||
raise
|
||||
|
||||
if mg.get('hds_server') and mg.get('hds_url'):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
mg['hds_server'] + mg['hds_url'], item_id,
|
||||
f4m_id='hds', fatal=False))
|
||||
# NB the string values in the JSON are stored using XML escaping(!)
|
||||
show = try_get(data, lambda x: x['shows'][0], dict)
|
||||
if not show:
|
||||
continue
|
||||
|
||||
if not info_dict:
|
||||
title = unescapeHTML(show['title'])
|
||||
description = unescapeHTML(show.get('description'))
|
||||
thumbnail = show.get('thumbnail')
|
||||
duration = float_or_none(show.get('duration'), 1000)
|
||||
timestamp = parse_iso8601(show.get('published'))
|
||||
info_dict = {
|
||||
'id': item_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
}
|
||||
|
||||
mg = try_get(show, lambda x: x['media:group'][0], dict)
|
||||
if not mg:
|
||||
continue
|
||||
|
||||
if mg.get('url'):
|
||||
m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
|
||||
if m:
|
||||
m = m.groupdict()
|
||||
formats.append({
|
||||
'url': m['url'] + '/' + m['app'],
|
||||
'app': m['app'],
|
||||
'play_path': m['playpath'],
|
||||
'player_url': url,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp',
|
||||
})
|
||||
|
||||
if mg.get('hls_server') and mg.get('hls_url'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
mg['hls_server'] + mg['hls_url'], item_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
if mg.get('hds_server') and mg.get('hds_url'):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
mg['hds_server'] + mg['hds_url'], item_id,
|
||||
f4m_id='hds', fatal=False))
|
||||
|
||||
mg_rte_server = str_or_none(mg.get('rte:server'))
|
||||
mg_url = str_or_none(mg.get('url'))
|
||||
if mg_rte_server and mg_url:
|
||||
hds_url = url_or_none(mg_rte_server + mg_url)
|
||||
if hds_url:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
hds_url, item_id, f4m_id='hds', fatal=False))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': item_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
info_dict['formats'] = formats
|
||||
return info_dict
|
||||
|
||||
|
||||
class RteIE(RteBaseIE):
|
||||
|
@ -65,7 +65,8 @@ class RuutuIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_xml = self._download_xml(
|
||||
'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id, video_id)
|
||||
'https://gatling.nelonenmedia.fi/media-xml-cache', video_id,
|
||||
query={'id': video_id})
|
||||
|
||||
formats = []
|
||||
processed_urls = []
|
||||
|
@ -5,6 +5,7 @@ from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@ -86,9 +87,16 @@ class VivoIE(SharedBaseIE):
|
||||
}
|
||||
|
||||
def _extract_video_url(self, webpage, video_id, *args):
|
||||
def decode_url(encoded_url):
|
||||
return compat_b64decode(encoded_url).decode('utf-8')
|
||||
|
||||
stream_url = url_or_none(decode_url(self._search_regex(
|
||||
r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'stream url', default=None, group='url')))
|
||||
if stream_url:
|
||||
return stream_url
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'stream', group='url'),
|
||||
video_id,
|
||||
transform_source=lambda x: compat_b64decode(x).decode('utf-8'))[0]
|
||||
video_id, transform_source=decode_url)[0]
|
||||
|
@ -18,8 +18,9 @@ from ..utils import (
|
||||
class TNAFlixNetworkBaseIE(InfoExtractor):
|
||||
# May be overridden in descendants if necessary
|
||||
_CONFIG_REGEX = [
|
||||
r'flashvars\.config\s*=\s*escape\("([^"]+)"',
|
||||
r'<input[^>]+name="config\d?" value="([^"]+)"',
|
||||
r'flashvars\.config\s*=\s*escape\("(?P<url>[^"]+)"',
|
||||
r'<input[^>]+name="config\d?" value="(?P<url>[^"]+)"',
|
||||
r'config\s*=\s*(["\'])(?P<url>(?:https?:)?//(?:(?!\1).)+)\1',
|
||||
]
|
||||
_HOST = 'tna'
|
||||
_VKEY_SUFFIX = ''
|
||||
@ -85,7 +86,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
cfg_url = self._proto_relative_url(self._html_search_regex(
|
||||
self._CONFIG_REGEX, webpage, 'flashvars.config', default=None), 'http:')
|
||||
self._CONFIG_REGEX, webpage, 'flashvars.config', default=None,
|
||||
group='url'), 'http:')
|
||||
|
||||
if not cfg_url:
|
||||
inputs = self._hidden_inputs(webpage)
|
||||
|
60
youtube_dl/extractor/twitcasting.py
Normal file
60
youtube_dl/extractor/twitcasting.py
Normal file
@ -0,0 +1,60 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
import re
|
||||
|
||||
|
||||
class TwitCastingIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609',
|
||||
'md5': '745243cad58c4681dc752490f7540d7f',
|
||||
'info_dict': {
|
||||
'id': '2357609',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recorded Live #2357609',
|
||||
'uploader_id': 'ivetesangalo',
|
||||
'description': "Moi! I'm live on TwitCasting from my iPhone.",
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
uploader_id = mobj.group('uploader_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)</',
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, fatal=True)
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
(r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'(["\'])(?P<url>http.+?\.m3u8.*?)\1'),
|
||||
webpage, 'm3u8 url', group='url')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:description', webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
}
|
@ -293,8 +293,12 @@ class VKIE(VKBaseIE):
|
||||
# This video is no longer available, because its author has been blocked.
|
||||
'url': 'https://vk.com/video-10639516_456240611',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
# The video is not available in your region.
|
||||
'url': 'https://vk.com/video-51812607_171445436',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@ -354,6 +358,9 @@ class VKIE(VKBaseIE):
|
||||
|
||||
r'<!>This video is no longer available, because it has been deleted.':
|
||||
'Video %s is no longer available, because it has been deleted.',
|
||||
|
||||
r'<!>The video .+? is not available in your region.':
|
||||
'Video %s is not available in your region.',
|
||||
}
|
||||
|
||||
for error_re, error_msg in ERRORS.items():
|
||||
|
140
youtube_dl/extractor/wwe.py
Normal file
140
youtube_dl/extractor/wwe.py
Normal file
@ -0,0 +1,140 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class WWEBaseIE(InfoExtractor):
|
||||
_SUBTITLE_LANGS = {
|
||||
'English': 'en',
|
||||
'Deutsch': 'de',
|
||||
}
|
||||
|
||||
def _extract_entry(self, data, url, video_id=None):
|
||||
video_id = compat_str(video_id or data['nid'])
|
||||
title = data['title']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
data['file'], video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
|
||||
description = data.get('description')
|
||||
thumbnail = urljoin(url, data.get('image'))
|
||||
series = data.get('show_name')
|
||||
episode = data.get('episode_name')
|
||||
|
||||
subtitles = {}
|
||||
tracks = data.get('tracks')
|
||||
if isinstance(tracks, list):
|
||||
for track in tracks:
|
||||
if not isinstance(track, dict):
|
||||
continue
|
||||
if track.get('kind') != 'captions':
|
||||
continue
|
||||
track_file = url_or_none(track.get('file'))
|
||||
if not track_file:
|
||||
continue
|
||||
label = track.get('label')
|
||||
lang = self._SUBTITLE_LANGS.get(label, label) or 'en'
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': track_file,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class WWEIE(WWEBaseIE):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*videos/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.wwe.com/videos/daniel-bryan-vs-andrade-cien-almas-smackdown-live-sept-4-2018',
|
||||
'md5': '92811c6a14bfc206f7a6a9c5d9140184',
|
||||
'info_dict': {
|
||||
'id': '40048199',
|
||||
'ext': 'mp4',
|
||||
'title': 'Daniel Bryan vs. Andrade "Cien" Almas: SmackDown LIVE, Sept. 4, 2018',
|
||||
'description': 'md5:2d7424dbc6755c61a0e649d2a8677f67',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://de.wwe.com/videos/gran-metalik-vs-tony-nese-wwe-205-live-sept-4-2018',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
landing = self._parse_json(
|
||||
self._html_search_regex(
|
||||
r'(?s)Drupal\.settings\s*,\s*({.+?})\s*\)\s*;',
|
||||
webpage, 'drupal settings'),
|
||||
display_id)['WWEVideoLanding']
|
||||
|
||||
data = landing['initialVideo']['playlist'][0]
|
||||
video_id = landing.get('initialVideoId')
|
||||
|
||||
info = self._extract_entry(data, url, video_id)
|
||||
info['display_id'] = display_id
|
||||
return info
|
||||
|
||||
|
||||
class WWEPlaylistIE(WWEBaseIE):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.wwe.com/shows/raw/2018-11-12',
|
||||
'info_dict': {
|
||||
'id': '2018-11-12',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}, {
|
||||
'url': 'http://www.wwe.com/article/walk-the-prank-wwe-edition',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.wwe.com/shows/wwenxt/article/matt-riddle-interview',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if WWEIE.suitable(url) else super(WWEPlaylistIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
entries = []
|
||||
for mobj in re.finditer(
|
||||
r'data-video\s*=\s*(["\'])(?P<data>{.+?})\1', webpage):
|
||||
video = self._parse_json(
|
||||
mobj.group('data'), display_id, transform_source=unescapeHTML,
|
||||
fatal=False)
|
||||
if not video:
|
||||
continue
|
||||
data = try_get(video, lambda x: x['playlist'][0], dict)
|
||||
if not data:
|
||||
continue
|
||||
try:
|
||||
entry = self._extract_entry(data, url)
|
||||
except Exception:
|
||||
continue
|
||||
entry['extractor_key'] = WWEIE.ie_key()
|
||||
entries.append(entry)
|
||||
|
||||
return self.playlist_result(entries, display_id)
|
@ -41,6 +41,7 @@ from ..utils import (
|
||||
remove_quotes,
|
||||
remove_start,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
@ -501,6 +502,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'categories': ['Science & Technology'],
|
||||
'tags': ['youtube-dl'],
|
||||
'duration': 10,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'start_time': 1,
|
||||
@ -583,6 +585,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'categories': ['Science & Technology'],
|
||||
'tags': ['youtube-dl'],
|
||||
'duration': 10,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
@ -1189,7 +1192,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
(r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('),
|
||||
r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
|
||||
jscode, 'Initial JS player signature function name', group='sig')
|
||||
|
||||
jsi = JSInterpreter(jscode)
|
||||
@ -1538,6 +1542,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
def extract_view_count(v_info):
|
||||
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
|
||||
|
||||
player_response = {}
|
||||
|
||||
# Get video info
|
||||
embed_webpage = None
|
||||
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
||||
@ -1580,6 +1586,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if args.get('livestream') == '1' or args.get('live_playback') == 1:
|
||||
is_live = True
|
||||
sts = ytplayer_config.get('sts')
|
||||
if not player_response:
|
||||
pl_response = str_or_none(args.get('player_response'))
|
||||
if pl_response:
|
||||
pl_response = self._parse_json(pl_response, video_id, fatal=False)
|
||||
if isinstance(pl_response, dict):
|
||||
player_response = pl_response
|
||||
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||
# We also try looking in get_video_info since it may contain different dashmpd
|
||||
# URL that points to a DASH manifest with possibly different itag set (some itags
|
||||
@ -1608,6 +1620,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if not video_info_webpage:
|
||||
continue
|
||||
get_video_info = compat_parse_qs(video_info_webpage)
|
||||
if not player_response:
|
||||
pl_response = get_video_info.get('player_response', [None])[0]
|
||||
if isinstance(pl_response, dict):
|
||||
player_response = pl_response
|
||||
add_dash_mpd(get_video_info)
|
||||
if view_count is None:
|
||||
view_count = extract_view_count(get_video_info)
|
||||
@ -1653,9 +1669,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'"token" parameter not in video info for unknown reason',
|
||||
video_id=video_id)
|
||||
|
||||
video_details = try_get(
|
||||
player_response, lambda x: x['videoDetails'], dict) or {}
|
||||
|
||||
# title
|
||||
if 'title' in video_info:
|
||||
video_title = video_info['title'][0]
|
||||
elif 'title' in player_response:
|
||||
video_title = video_details['title']
|
||||
else:
|
||||
self._downloader.report_warning('Unable to extract video title')
|
||||
video_title = '_'
|
||||
@ -1718,6 +1739,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
if view_count is None:
|
||||
view_count = extract_view_count(video_info)
|
||||
if view_count is None and video_details:
|
||||
view_count = int_or_none(video_details.get('viewCount'))
|
||||
|
||||
# Check for "rental" videos
|
||||
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
||||
@ -1898,7 +1921,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
||||
|
||||
# uploader
|
||||
video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
|
||||
video_uploader = try_get(
|
||||
video_info, lambda x: x['author'][0],
|
||||
compat_str) or str_or_none(video_details.get('author'))
|
||||
if video_uploader:
|
||||
video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
|
||||
else:
|
||||
@ -2011,12 +2036,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
like_count = _extract_count('like')
|
||||
dislike_count = _extract_count('dislike')
|
||||
|
||||
if view_count is None:
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
|
||||
'view count', default=None))
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
||||
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
|
||||
|
||||
video_duration = try_get(
|
||||
video_info, lambda x: int_or_none(x['length_seconds'][0]))
|
||||
if not video_duration:
|
||||
video_duration = int_or_none(video_details.get('lengthSeconds'))
|
||||
if not video_duration:
|
||||
video_duration = parse_duration(self._html_search_meta(
|
||||
'duration', video_webpage, 'video duration'))
|
||||
@ -2131,7 +2163,11 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
(?:https?://)?
|
||||
(?:\w+\.)?
|
||||
(?:
|
||||
youtube\.com/
|
||||
(?:
|
||||
youtube\.com|
|
||||
invidio\.us
|
||||
)
|
||||
/
|
||||
(?:
|
||||
(?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
|
||||
\? (?:.*?[&;])*? (?:p|a|list)=
|
||||
@ -2244,6 +2280,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
|
||||
'categories': ['People & Blogs'],
|
||||
'tags': list,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
@ -2282,6 +2319,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
# music album playlist
|
||||
'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
|
@ -22,7 +22,7 @@ class ZattooPlatformBaseIE(InfoExtractor):
|
||||
_power_guide_hash = None
|
||||
|
||||
def _host_url(self):
|
||||
return 'https://%s' % self._HOST
|
||||
return 'https://%s' % (self._API_HOST if hasattr(self, '_API_HOST') else self._HOST)
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
@ -286,6 +286,7 @@ class ZattooLiveIE(ZattooBaseIE):
|
||||
class NetPlusIE(ZattooIE):
|
||||
_NETRC_MACHINE = 'netplus'
|
||||
_HOST = 'netplus.tv'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
@ -300,7 +301,7 @@ class MNetTVIE(ZattooIE):
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tvplus.m-net.de/watch/abc/123-abc',
|
||||
'url': 'https://tvplus.m-net.de/watch/abc/123-abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@ -311,7 +312,7 @@ class WalyTVIE(ZattooIE):
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.player.waly.tv/watch/abc/123-abc',
|
||||
'url': 'https://player.waly.tv/watch/abc/123-abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@ -319,6 +320,7 @@ class WalyTVIE(ZattooIE):
|
||||
class BBVTVIE(ZattooIE):
|
||||
_NETRC_MACHINE = 'bbvtv'
|
||||
_HOST = 'bbv-tv.net'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
@ -330,6 +332,7 @@ class BBVTVIE(ZattooIE):
|
||||
class VTXTVIE(ZattooIE):
|
||||
_NETRC_MACHINE = 'vtxtv'
|
||||
_HOST = 'vtxtv.ch'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
@ -341,6 +344,7 @@ class VTXTVIE(ZattooIE):
|
||||
class MyVisionTVIE(ZattooIE):
|
||||
_NETRC_MACHINE = 'myvisiontv'
|
||||
_HOST = 'myvisiontv.ch'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
@ -355,7 +359,7 @@ class GlattvisionTVIE(ZattooIE):
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.iptv.glattvision.ch/watch/abc/123-abc',
|
||||
'url': 'https://iptv.glattvision.ch/watch/abc/123-abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@ -363,6 +367,7 @@ class GlattvisionTVIE(ZattooIE):
|
||||
class SAKTVIE(ZattooIE):
|
||||
_NETRC_MACHINE = 'saktv'
|
||||
_HOST = 'saktv.ch'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
@ -377,7 +382,7 @@ class EWETVIE(ZattooIE):
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tvonline.ewe.de/watch/abc/123-abc',
|
||||
'url': 'https://tvonline.ewe.de/watch/abc/123-abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@ -385,6 +390,7 @@ class EWETVIE(ZattooIE):
|
||||
class QuantumTVIE(ZattooIE):
|
||||
_NETRC_MACHINE = 'quantumtv'
|
||||
_HOST = 'quantum-tv.com'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
@ -395,11 +401,11 @@ class QuantumTVIE(ZattooIE):
|
||||
|
||||
class OsnatelTVIE(ZattooIE):
|
||||
_NETRC_MACHINE = 'osnateltv'
|
||||
_HOST = 'onlinetv.osnatel.de'
|
||||
_HOST = 'tvonline.osnatel.de'
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.onlinetv.osnatel.de/watch/abc/123-abc',
|
||||
'url': 'https://tvonline.osnatel.de/watch/abc/123-abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@ -407,6 +413,7 @@ class OsnatelTVIE(ZattooIE):
|
||||
class EinsUndEinsTVIE(ZattooIE):
|
||||
_NETRC_MACHINE = '1und1tv'
|
||||
_HOST = '1und1.tv'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
|
57
youtube_dl/extractor/zype.py
Normal file
57
youtube_dl/extractor/zype.py
Normal file
@ -0,0 +1,57 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ZypeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player\.zype\.com/embed/(?P<id>[\da-fA-F]+)\.js\?.*?api_key=[^&]+'
|
||||
_TEST = {
|
||||
'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false',
|
||||
'md5': 'eaee31d474c76a955bdaba02a505c595',
|
||||
'info_dict': {
|
||||
'id': '5b400b834b32992a310622b9',
|
||||
'ext': 'mp4',
|
||||
'title': 'Smoky Barbecue Favorites',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//player\.zype\.com/embed/[\da-fA-F]+\.js\?.*?api_key=.+?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._search_regex(
|
||||
r'video_title\s*[:=]\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'title', group='value')
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1', webpage,
|
||||
'm3u8 url', group='url')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'poster\s*[:=]\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'thumbnail',
|
||||
default=False, group='url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2018.10.29'
|
||||
__version__ = '2018.11.18'
|
||||
|
Loading…
x
Reference in New Issue
Block a user