mirror of
https://github.com/l1ving/youtube-dl
synced 2020-11-18 19:53:54 -08:00
Merge remote-tracking branch 'origin/master'
This commit is contained in:
commit
0b4a957f28
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.10.05*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.12.03*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.10.05**
|
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.12.03**
|
||||||
|
|
||||||
### Before submitting an *issue* make sure you have:
|
### Before submitting an *issue* make sure you have:
|
||||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||||
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
|||||||
[debug] User config: []
|
[debug] User config: []
|
||||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||||
[debug] youtube-dl version 2018.10.05
|
[debug] youtube-dl version 2018.12.03
|
||||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||||
[debug] Proxy map: {}
|
[debug] Proxy map: {}
|
||||||
|
12
.travis.yml
12
.travis.yml
@ -15,6 +15,18 @@ env:
|
|||||||
- YTDL_TEST_SET=download
|
- YTDL_TEST_SET=download
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
|
- python: 3.7
|
||||||
|
dist: xenial
|
||||||
|
env: YTDL_TEST_SET=core
|
||||||
|
- python: 3.7
|
||||||
|
dist: xenial
|
||||||
|
env: YTDL_TEST_SET=download
|
||||||
|
- python: 3.8-dev
|
||||||
|
dist: xenial
|
||||||
|
env: YTDL_TEST_SET=core
|
||||||
|
- python: 3.8-dev
|
||||||
|
dist: xenial
|
||||||
|
env: YTDL_TEST_SET=download
|
||||||
- env: JYTHON=true; YTDL_TEST_SET=core
|
- env: JYTHON=true; YTDL_TEST_SET=core
|
||||||
- env: JYTHON=true; YTDL_TEST_SET=download
|
- env: JYTHON=true; YTDL_TEST_SET=download
|
||||||
fast_finish: true
|
fast_finish: true
|
||||||
|
@ -296,5 +296,26 @@ title = self._search_regex(
|
|||||||
|
|
||||||
### Use safe conversion functions
|
### Use safe conversion functions
|
||||||
|
|
||||||
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||||
|
|
||||||
|
Use `url_or_none` for safe URL processing.
|
||||||
|
|
||||||
|
Use `try_get` for safe metadata extraction from parsed JSON.
|
||||||
|
|
||||||
|
Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
|
||||||
|
|
||||||
|
#### More examples
|
||||||
|
|
||||||
|
##### Safely extract optional description from parsed JSON
|
||||||
|
```python
|
||||||
|
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Safely extract more optional metadata
|
||||||
|
```python
|
||||||
|
video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
|
||||||
|
description = video.get('summary')
|
||||||
|
duration = float_or_none(video.get('durationMs'), scale=1000)
|
||||||
|
view_count = int_or_none(video.get('views'))
|
||||||
|
```
|
||||||
|
|
||||||
|
112
ChangeLog
112
ChangeLog
@ -1,3 +1,115 @@
|
|||||||
|
version 2018.12.03
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [utils] Fix random_birthday to generate existing dates only (#18284)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [tiktok] Add support for tiktok.com (#18108, #18135)
|
||||||
|
* [pornhub] Use actual URL host for requests (#18359)
|
||||||
|
* [lynda] Fix authentication (#18158, #18217)
|
||||||
|
* [gfycat] Update API endpoint (#18333, #18343)
|
||||||
|
+ [hotstar] Add support for alternative app state layout (#18320)
|
||||||
|
* [azmedien] Fix extraction (#18334, #18336)
|
||||||
|
+ [vimeo] Add support for VHX (Vimeo OTT) (#14835)
|
||||||
|
* [joj] Fix extraction (#18280, #18281)
|
||||||
|
+ [wistia] Add support for fast.wistia.com (#18287)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.11.23
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [setup.py] Add more relevant classifiers
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [mixcloud] Fallback to hardcoded decryption key (#18016)
|
||||||
|
* [nbc:news] Fix article extraction (#16194)
|
||||||
|
* [foxsports] Fix extraction (#17543)
|
||||||
|
* [loc] Relax regular expression and improve formats extraction
|
||||||
|
+ [ciscolive] Add support for ciscolive.cisco.com (#17984)
|
||||||
|
* [nzz] Relax kaltura regex (#18228)
|
||||||
|
* [sixplay] Fix formats extraction
|
||||||
|
* [bitchute] Improve title extraction
|
||||||
|
* [kaltura] Limit requested MediaEntry fields
|
||||||
|
+ [americastestkitchen] Add support for zype embeds (#18225)
|
||||||
|
+ [pornhub] Add pornhub.net alias
|
||||||
|
* [nova:embed] Fix extraction (#18222)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.11.18
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [wwe] Extract subtitles
|
||||||
|
+ [wwe] Add support for playlistst (#14781)
|
||||||
|
+ [wwe] Add support for wwe.com (#14781, #17450)
|
||||||
|
* [vk] Detect geo restriction (#17767)
|
||||||
|
* [openload] Use original host during extraction (#18211)
|
||||||
|
* [atvat] Fix extraction (#18041)
|
||||||
|
+ [rte] Add support for new API endpoint (#18206)
|
||||||
|
* [tnaflixnetwork:embed] Fix extraction (#18205)
|
||||||
|
* [picarto] Use API and add token support (#16518)
|
||||||
|
+ [zype] Add support for player.zype.com (#18143)
|
||||||
|
* [vivo] Fix extraction (#18139)
|
||||||
|
* [ruutu] Update API endpoint (#18138)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.11.07
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [youtube] Add another JS signature function name regex (#18091, #18093,
|
||||||
|
#18094)
|
||||||
|
* [facebook] Fix tahoe request (#17171)
|
||||||
|
* [cliphunter] Fix extraction (#18083)
|
||||||
|
+ [youtube:playlist] Add support for invidio.us (#18077)
|
||||||
|
* [zattoo] Arrange API hosts for derived extractors (#18035)
|
||||||
|
+ [youtube] Add fallback metadata extraction from videoDetails (#18052)
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.11.03
|
||||||
|
|
||||||
|
Core
|
||||||
|
* [extractor/common] Ensure response handle is not prematurely closed before
|
||||||
|
it can be read if it matches expected_status (#17195, #17846, #17447)
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
* [laola1tv:embed] Set correct stream access URL scheme (#16341)
|
||||||
|
+ [ehftv] Add support for ehftv.com (#15408)
|
||||||
|
* [azmedien] Adopt to major site redesign (#17745, #17746)
|
||||||
|
+ [twitcasting] Add support for twitcasting.tv (#17981)
|
||||||
|
* [orf:tvthek] Fix extraction (#17737, #17956, #18024)
|
||||||
|
+ [openload] Add support for oload.fun (#18045)
|
||||||
|
* [njpwworld] Fix authentication (#17427)
|
||||||
|
+ [linkedin:learning] Add support for linkedin.com/learning (#13545)
|
||||||
|
* [theplatform] Improve error detection (#13222)
|
||||||
|
* [cnbc] Simplify extraction (#14280, #17110)
|
||||||
|
+ [cbnc] Add support for new URL schema (#14193)
|
||||||
|
* [aparat] Improve extraction and extract more metadata (#17445, #18008)
|
||||||
|
* [aparat] Fix extraction
|
||||||
|
|
||||||
|
|
||||||
|
version 2018.10.29
|
||||||
|
|
||||||
|
Core
|
||||||
|
+ [extractor/common] Add validation for JSON-LD URLs
|
||||||
|
|
||||||
|
Extractors
|
||||||
|
+ [sportbox] Add support for matchtv.ru
|
||||||
|
* [sportbox] Fix extraction (#17978)
|
||||||
|
* [screencast] Fix extraction (#14590, #14617, #17990)
|
||||||
|
+ [openload] Add support for oload.icu
|
||||||
|
+ [ivi] Add support for ivi.tv
|
||||||
|
* [crunchyroll] Improve extraction failsafeness (#17991)
|
||||||
|
* [dailymail] Fix formats extraction (#17976)
|
||||||
|
* [viewster] Reduce format requests
|
||||||
|
* [cwtv] Handle API errors (#17905)
|
||||||
|
+ [rutube] Use geo verification headers (#17897)
|
||||||
|
+ [brightcove:legacy] Add fallbacks to brightcove:new (#13912)
|
||||||
|
- [tv3] Remove extractor (#10461, #15339)
|
||||||
|
* [ted] Fix extraction for HTTP and RTMP formats (#5941, #17572, #17894)
|
||||||
|
+ [openload] Add support for oload.cc (#17823)
|
||||||
|
+ [patreon] Extract post_file URL (#17792)
|
||||||
|
* [patreon] Fix extraction (#14502, #10471)
|
||||||
|
|
||||||
|
|
||||||
version 2018.10.05
|
version 2018.10.05
|
||||||
|
|
||||||
Extractors
|
Extractors
|
||||||
|
23
README.md
23
README.md
@ -1168,7 +1168,28 @@ title = self._search_regex(
|
|||||||
|
|
||||||
### Use safe conversion functions
|
### Use safe conversion functions
|
||||||
|
|
||||||
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||||
|
|
||||||
|
Use `url_or_none` for safe URL processing.
|
||||||
|
|
||||||
|
Use `try_get` for safe metadata extraction from parsed JSON.
|
||||||
|
|
||||||
|
Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
|
||||||
|
|
||||||
|
#### More examples
|
||||||
|
|
||||||
|
##### Safely extract optional description from parsed JSON
|
||||||
|
```python
|
||||||
|
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Safely extract more optional metadata
|
||||||
|
```python
|
||||||
|
video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
|
||||||
|
description = video.get('summary')
|
||||||
|
duration = float_or_none(video.get('durationMs'), scale=1000)
|
||||||
|
view_count = int_or_none(video.get('views'))
|
||||||
|
```
|
||||||
|
|
||||||
# EMBEDDING YOUTUBE-DL
|
# EMBEDDING YOUTUBE-DL
|
||||||
|
|
||||||
|
@ -84,8 +84,6 @@
|
|||||||
- **awaan:season**
|
- **awaan:season**
|
||||||
- **awaan:video**
|
- **awaan:video**
|
||||||
- **AZMedien**: AZ Medien videos
|
- **AZMedien**: AZ Medien videos
|
||||||
- **AZMedienPlaylist**: AZ Medien playlists
|
|
||||||
- **AZMedienShowPlaylist**: AZ Medien show playlists
|
|
||||||
- **BaiduVideo**: 百度视频
|
- **BaiduVideo**: 百度视频
|
||||||
- **bambuser**
|
- **bambuser**
|
||||||
- **bambuser:channel**
|
- **bambuser:channel**
|
||||||
@ -165,6 +163,8 @@
|
|||||||
- **chirbit**
|
- **chirbit**
|
||||||
- **chirbit:profile**
|
- **chirbit:profile**
|
||||||
- **Cinchcast**
|
- **Cinchcast**
|
||||||
|
- **CiscoLiveSearch**
|
||||||
|
- **CiscoLiveSession**
|
||||||
- **CJSW**
|
- **CJSW**
|
||||||
- **cliphunter**
|
- **cliphunter**
|
||||||
- **Clippit**
|
- **Clippit**
|
||||||
@ -178,6 +178,7 @@
|
|||||||
- **Clyp**
|
- **Clyp**
|
||||||
- **cmt.com**
|
- **cmt.com**
|
||||||
- **CNBC**
|
- **CNBC**
|
||||||
|
- **CNBCVideo**
|
||||||
- **CNN**
|
- **CNN**
|
||||||
- **CNNArticle**
|
- **CNNArticle**
|
||||||
- **CNNBlogs**
|
- **CNNBlogs**
|
||||||
@ -251,6 +252,7 @@
|
|||||||
- **EchoMsk**
|
- **EchoMsk**
|
||||||
- **egghead:course**: egghead.io course
|
- **egghead:course**: egghead.io course
|
||||||
- **egghead:lesson**: egghead.io lesson
|
- **egghead:lesson**: egghead.io lesson
|
||||||
|
- **ehftv**
|
||||||
- **eHow**
|
- **eHow**
|
||||||
- **EinsUndEinsTV**
|
- **EinsUndEinsTV**
|
||||||
- **Einthusan**
|
- **Einthusan**
|
||||||
@ -445,6 +447,8 @@
|
|||||||
- **limelight:channel**
|
- **limelight:channel**
|
||||||
- **limelight:channel_list**
|
- **limelight:channel_list**
|
||||||
- **LineTV**
|
- **LineTV**
|
||||||
|
- **linkedin:learning**
|
||||||
|
- **linkedin:learning:course**
|
||||||
- **LiTV**
|
- **LiTV**
|
||||||
- **LiveLeak**
|
- **LiveLeak**
|
||||||
- **LiveLeakEmbed**
|
- **LiveLeakEmbed**
|
||||||
@ -818,7 +822,7 @@
|
|||||||
- **Spiegeltv**
|
- **Spiegeltv**
|
||||||
- **sport.francetvinfo.fr**
|
- **sport.francetvinfo.fr**
|
||||||
- **Sport5**
|
- **Sport5**
|
||||||
- **SportBoxEmbed**
|
- **SportBox**
|
||||||
- **SportDeutschland**
|
- **SportDeutschland**
|
||||||
- **SpringboardPlatform**
|
- **SpringboardPlatform**
|
||||||
- **Sprout**
|
- **Sprout**
|
||||||
@ -881,6 +885,8 @@
|
|||||||
- **ThisAmericanLife**
|
- **ThisAmericanLife**
|
||||||
- **ThisAV**
|
- **ThisAV**
|
||||||
- **ThisOldHouse**
|
- **ThisOldHouse**
|
||||||
|
- **TikTok**
|
||||||
|
- **TikTokUser**
|
||||||
- **tinypic**: tinypic.com videos
|
- **tinypic**: tinypic.com videos
|
||||||
- **TMZ**
|
- **TMZ**
|
||||||
- **TMZArticle**
|
- **TMZArticle**
|
||||||
@ -909,7 +915,6 @@
|
|||||||
- **TV2**
|
- **TV2**
|
||||||
- **tv2.hu**
|
- **tv2.hu**
|
||||||
- **TV2Article**
|
- **TV2Article**
|
||||||
- **TV3**
|
|
||||||
- **TV4**: tv4.se and tv4play.se
|
- **TV4**: tv4.se and tv4play.se
|
||||||
- **TV5MondePlus**: TV5MONDE+
|
- **TV5MondePlus**: TV5MONDE+
|
||||||
- **TVA**
|
- **TVA**
|
||||||
@ -931,6 +936,7 @@
|
|||||||
- **TVPlayer**
|
- **TVPlayer**
|
||||||
- **TVPlayHome**
|
- **TVPlayHome**
|
||||||
- **Tweakers**
|
- **Tweakers**
|
||||||
|
- **TwitCasting**
|
||||||
- **twitch:chapter**
|
- **twitch:chapter**
|
||||||
- **twitch:clips**
|
- **twitch:clips**
|
||||||
- **twitch:profile**
|
- **twitch:profile**
|
||||||
@ -975,6 +981,7 @@
|
|||||||
- **VevoPlaylist**
|
- **VevoPlaylist**
|
||||||
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
||||||
- **vh1.com**
|
- **vh1.com**
|
||||||
|
- **vhx:embed**
|
||||||
- **Viafree**
|
- **Viafree**
|
||||||
- **vice**
|
- **vice**
|
||||||
- **vice:article**
|
- **vice:article**
|
||||||
@ -1078,6 +1085,7 @@
|
|||||||
- **wrzuta.pl:playlist**
|
- **wrzuta.pl:playlist**
|
||||||
- **WSJ**: Wall Street Journal
|
- **WSJ**: Wall Street Journal
|
||||||
- **WSJArticle**
|
- **WSJArticle**
|
||||||
|
- **WWE**
|
||||||
- **XBef**
|
- **XBef**
|
||||||
- **XboxClips**
|
- **XboxClips**
|
||||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
|
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
|
||||||
@ -1137,3 +1145,4 @@
|
|||||||
- **ZDF**
|
- **ZDF**
|
||||||
- **ZDFChannel**
|
- **ZDFChannel**
|
||||||
- **zingmp3**: mp3.zing.vn
|
- **zingmp3**: mp3.zing.vn
|
||||||
|
- **Zype**
|
||||||
|
9
setup.py
9
setup.py
@ -124,6 +124,8 @@ setup(
|
|||||||
'Development Status :: 5 - Production/Stable',
|
'Development Status :: 5 - Production/Stable',
|
||||||
'Environment :: Console',
|
'Environment :: Console',
|
||||||
'License :: Public Domain',
|
'License :: Public Domain',
|
||||||
|
'Programming Language :: Python',
|
||||||
|
'Programming Language :: Python :: 2',
|
||||||
'Programming Language :: Python :: 2.6',
|
'Programming Language :: Python :: 2.6',
|
||||||
'Programming Language :: Python :: 2.7',
|
'Programming Language :: Python :: 2.7',
|
||||||
'Programming Language :: Python :: 3',
|
'Programming Language :: Python :: 3',
|
||||||
@ -132,6 +134,13 @@ setup(
|
|||||||
'Programming Language :: Python :: 3.4',
|
'Programming Language :: Python :: 3.4',
|
||||||
'Programming Language :: Python :: 3.5',
|
'Programming Language :: Python :: 3.5',
|
||||||
'Programming Language :: Python :: 3.6',
|
'Programming Language :: Python :: 3.6',
|
||||||
|
'Programming Language :: Python :: 3.7',
|
||||||
|
'Programming Language :: Python :: 3.8',
|
||||||
|
'Programming Language :: Python :: Implementation',
|
||||||
|
'Programming Language :: Python :: Implementation :: CPython',
|
||||||
|
'Programming Language :: Python :: Implementation :: IronPython',
|
||||||
|
'Programming Language :: Python :: Implementation :: Jython',
|
||||||
|
'Programming Language :: Python :: Implementation :: PyPy',
|
||||||
],
|
],
|
||||||
|
|
||||||
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
cmdclass={'build_lazy_extractors': build_lazy_extractors},
|
||||||
|
@ -7,6 +7,7 @@ import json
|
|||||||
import os.path
|
import os.path
|
||||||
import re
|
import re
|
||||||
import types
|
import types
|
||||||
|
import ssl
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import youtube_dl.extractor
|
import youtube_dl.extractor
|
||||||
@ -244,3 +245,12 @@ def expect_warnings(ydl, warnings_re):
|
|||||||
real_warning(w)
|
real_warning(w)
|
||||||
|
|
||||||
ydl.report_warning = _report_warning
|
ydl.report_warning = _report_warning
|
||||||
|
|
||||||
|
|
||||||
|
def http_server_port(httpd):
|
||||||
|
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||||
|
# In Jython SSLSocket is not a subclass of socket.socket
|
||||||
|
sock = httpd.socket.sock
|
||||||
|
else:
|
||||||
|
sock = httpd.socket
|
||||||
|
return sock.getsockname()[1]
|
||||||
|
@ -9,11 +9,30 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import FakeYDL, expect_dict, expect_value
|
from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
|
||||||
from youtube_dl.compat import compat_etree_fromstring
|
from youtube_dl.compat import compat_etree_fromstring, compat_http_server
|
||||||
from youtube_dl.extractor.common import InfoExtractor
|
from youtube_dl.extractor.common import InfoExtractor
|
||||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
||||||
|
import threading
|
||||||
|
|
||||||
|
|
||||||
|
TEAPOT_RESPONSE_STATUS = 418
|
||||||
|
TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
|
||||||
|
|
||||||
|
|
||||||
|
class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||||
|
def log_message(self, format, *args):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def do_GET(self):
|
||||||
|
if self.path == '/teapot':
|
||||||
|
self.send_response(TEAPOT_RESPONSE_STATUS)
|
||||||
|
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
|
|
||||||
|
|
||||||
class TestIE(InfoExtractor):
|
class TestIE(InfoExtractor):
|
||||||
@ -743,6 +762,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
|||||||
for i in range(len(entries)):
|
for i in range(len(entries)):
|
||||||
expect_dict(self, entries[i], expected_entries[i])
|
expect_dict(self, entries[i], expected_entries[i])
|
||||||
|
|
||||||
|
def test_response_with_expected_status_returns_content(self):
|
||||||
|
# Checks for mitigations against the effects of
|
||||||
|
# <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which
|
||||||
|
# manifest as `_download_webpage`, `_download_xml`, `_download_json`,
|
||||||
|
# or the underlying `_download_webpage_handle` returning no content
|
||||||
|
# when a response matches `expected_status`.
|
||||||
|
|
||||||
|
httpd = compat_http_server.HTTPServer(
|
||||||
|
('127.0.0.1', 0), InfoExtractorTestRequestHandler)
|
||||||
|
port = http_server_port(httpd)
|
||||||
|
server_thread = threading.Thread(target=httpd.serve_forever)
|
||||||
|
server_thread.daemon = True
|
||||||
|
server_thread.start()
|
||||||
|
|
||||||
|
(content, urlh) = self.ie._download_webpage_handle(
|
||||||
|
'http://127.0.0.1:%d/teapot' % port, None,
|
||||||
|
expected_status=TEAPOT_RESPONSE_STATUS)
|
||||||
|
self.assertEqual(content, TEAPOT_RESPONSE_BODY)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -39,7 +39,7 @@ class TestCompat(unittest.TestCase):
|
|||||||
|
|
||||||
def test_compat_expanduser(self):
|
def test_compat_expanduser(self):
|
||||||
old_home = os.environ.get('HOME')
|
old_home = os.environ.get('HOME')
|
||||||
test_str = 'C:\Documents and Settings\тест\Application Data'
|
test_str = r'C:\Documents and Settings\тест\Application Data'
|
||||||
compat_setenv('HOME', test_str)
|
compat_setenv('HOME', test_str)
|
||||||
self.assertEqual(compat_expanduser('~'), test_str)
|
self.assertEqual(compat_expanduser('~'), test_str)
|
||||||
compat_setenv('HOME', old_home or '')
|
compat_setenv('HOME', old_home or '')
|
||||||
|
@ -9,26 +9,16 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
from test.helper import try_rm
|
from test.helper import http_server_port, try_rm
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.compat import compat_http_server
|
from youtube_dl.compat import compat_http_server
|
||||||
from youtube_dl.downloader.http import HttpFD
|
from youtube_dl.downloader.http import HttpFD
|
||||||
from youtube_dl.utils import encodeFilename
|
from youtube_dl.utils import encodeFilename
|
||||||
import ssl
|
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
def http_server_port(httpd):
|
|
||||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
|
||||||
# In Jython SSLSocket is not a subclass of socket.socket
|
|
||||||
sock = httpd.socket.sock
|
|
||||||
else:
|
|
||||||
sock = httpd.socket
|
|
||||||
return sock.getsockname()[1]
|
|
||||||
|
|
||||||
|
|
||||||
TEST_SIZE = 10 * 1024
|
TEST_SIZE = 10 * 1024
|
||||||
|
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ import sys
|
|||||||
import unittest
|
import unittest
|
||||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
from test.helper import http_server_port
|
||||||
from youtube_dl import YoutubeDL
|
from youtube_dl import YoutubeDL
|
||||||
from youtube_dl.compat import compat_http_server, compat_urllib_request
|
from youtube_dl.compat import compat_http_server, compat_urllib_request
|
||||||
import ssl
|
import ssl
|
||||||
@ -16,15 +17,6 @@ import threading
|
|||||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
|
|
||||||
|
|
||||||
def http_server_port(httpd):
|
|
||||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
|
||||||
# In Jython SSLSocket is not a subclass of socket.socket
|
|
||||||
sock = httpd.socket.sock
|
|
||||||
else:
|
|
||||||
sock = httpd.socket
|
|
||||||
return sock.getsockname()[1]
|
|
||||||
|
|
||||||
|
|
||||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||||
def log_message(self, format, *args):
|
def log_message(self, format, *args):
|
||||||
pass
|
pass
|
||||||
|
@ -14,4 +14,4 @@ from youtube_dl.postprocessor import MetadataFromTitlePP
|
|||||||
class TestMetadataFromTitle(unittest.TestCase):
|
class TestMetadataFromTitle(unittest.TestCase):
|
||||||
def test_format_to_regex(self):
|
def test_format_to_regex(self):
|
||||||
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
|
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
|
||||||
self.assertEqual(pp._titleregex, '(?P<title>.+)\ \-\ (?P<artist>.+)')
|
self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)')
|
||||||
|
@ -43,10 +43,6 @@ class AmericasTestKitchenIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
partner_id = self._search_regex(
|
|
||||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
|
||||||
webpage, 'kaltura partner id')
|
|
||||||
|
|
||||||
video_data = self._parse_json(
|
video_data = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
||||||
@ -58,7 +54,18 @@ class AmericasTestKitchenIE(InfoExtractor):
|
|||||||
(lambda x: x['episodeDetail']['content']['data'],
|
(lambda x: x['episodeDetail']['content']['data'],
|
||||||
lambda x: x['videoDetail']['content']['data']), dict)
|
lambda x: x['videoDetail']['content']['data']), dict)
|
||||||
ep_meta = ep_data.get('full_video', {})
|
ep_meta = ep_data.get('full_video', {})
|
||||||
external_id = ep_data.get('external_id') or ep_meta['external_id']
|
|
||||||
|
zype_id = ep_meta.get('zype_id')
|
||||||
|
if zype_id:
|
||||||
|
embed_url = 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id
|
||||||
|
ie_key = 'Zype'
|
||||||
|
else:
|
||||||
|
partner_id = self._search_regex(
|
||||||
|
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||||
|
webpage, 'kaltura partner id')
|
||||||
|
external_id = ep_data.get('external_id') or ep_meta['external_id']
|
||||||
|
embed_url = 'kaltura:%s:%s' % (partner_id, external_id)
|
||||||
|
ie_key = 'Kaltura'
|
||||||
|
|
||||||
title = ep_data.get('title') or ep_meta.get('title')
|
title = ep_data.get('title') or ep_meta.get('title')
|
||||||
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
||||||
@ -72,8 +79,8 @@ class AmericasTestKitchenIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': 'kaltura:%s:%s' % (partner_id, external_id),
|
'url': embed_url,
|
||||||
'ie_key': 'Kaltura',
|
'ie_key': ie_key,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
@ -12,59 +13,83 @@ from ..utils import (
|
|||||||
class AparatIE(InfoExtractor):
|
class AparatIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.aparat.com/v/wP8On',
|
'url': 'http://www.aparat.com/v/wP8On',
|
||||||
'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
|
'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'wP8On',
|
'id': 'wP8On',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'تیم گلکسی 11 - زومیت',
|
'title': 'تیم گلکسی 11 - زومیت',
|
||||||
'age_limit': 0,
|
'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028',
|
||||||
|
'duration': 231,
|
||||||
|
'timestamp': 1387394859,
|
||||||
|
'upload_date': '20131218',
|
||||||
|
'view_count': int,
|
||||||
},
|
},
|
||||||
# 'skip': 'Extremely unreliable',
|
}, {
|
||||||
}
|
# multiple formats
|
||||||
|
'url': 'https://www.aparat.com/v/8dflw/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
# Note: There is an easier-to-parse configuration at
|
# Provides more metadata
|
||||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||||
# but the URL in there does not work
|
|
||||||
webpage = self._download_webpage(
|
|
||||||
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
|
if not webpage:
|
||||||
|
# Note: There is an easier-to-parse configuration at
|
||||||
|
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||||
|
# but the URL in there does not work
|
||||||
|
webpage = self._download_webpage(
|
||||||
|
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
||||||
|
video_id)
|
||||||
|
|
||||||
file_list = self._parse_json(
|
options = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage,
|
r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
|
||||||
'file list'),
|
webpage, 'options', group='value'),
|
||||||
video_id)
|
video_id)
|
||||||
|
|
||||||
|
player = options['plugins']['sabaPlayerPlugin']
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for item in file_list[0]:
|
for sources in player['multiSRC']:
|
||||||
file_url = url_or_none(item.get('file'))
|
for item in sources:
|
||||||
if not file_url:
|
if not isinstance(item, dict):
|
||||||
continue
|
continue
|
||||||
ext = mimetype2ext(item.get('type'))
|
file_url = url_or_none(item.get('src'))
|
||||||
label = item.get('label')
|
if not file_url:
|
||||||
formats.append({
|
continue
|
||||||
'url': file_url,
|
item_type = item.get('type')
|
||||||
'ext': ext,
|
if item_type == 'application/vnd.apple.mpegurl':
|
||||||
'format_id': label or ext,
|
formats.extend(self._extract_m3u8_formats(
|
||||||
'height': int_or_none(self._search_regex(
|
file_url, video_id, 'mp4',
|
||||||
r'(\d+)[pP]', label or '', 'height', default=None)),
|
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||||
})
|
fatal=False))
|
||||||
self._sort_formats(formats)
|
else:
|
||||||
|
ext = mimetype2ext(item.get('type'))
|
||||||
|
label = item.get('label')
|
||||||
|
formats.append({
|
||||||
|
'url': file_url,
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': 'http-%s' % (label or ext),
|
||||||
|
'height': int_or_none(self._search_regex(
|
||||||
|
r'(\d+)[pP]', label or '', 'height',
|
||||||
|
default=None)),
|
||||||
|
})
|
||||||
|
self._sort_formats(
|
||||||
|
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||||
|
|
||||||
thumbnail = self._search_regex(
|
info = self._search_json_ld(webpage, video_id, default={})
|
||||||
r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
|
|
||||||
|
|
||||||
return {
|
if not info.get('title'):
|
||||||
|
info['title'] = player['title']
|
||||||
|
|
||||||
|
return merge_dicts(info, {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'thumbnail': url_or_none(options.get('poster')),
|
||||||
'thumbnail': thumbnail,
|
'duration': int_or_none(player.get('duration')),
|
||||||
'age_limit': self._family_friendly_search(webpage),
|
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
})
|
||||||
|
@ -28,8 +28,10 @@ class ATVAtIE(InfoExtractor):
|
|||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
video_data = self._parse_json(unescapeHTML(self._search_regex(
|
video_data = self._parse_json(unescapeHTML(self._search_regex(
|
||||||
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"',
|
[r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1',
|
||||||
webpage, 'player data')), display_id)['config']['initial_video']
|
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'],
|
||||||
|
webpage, 'player data', group='json')),
|
||||||
|
display_id)['config']['initial_video']
|
||||||
|
|
||||||
video_id = video_data['id']
|
video_id = video_data['id']
|
||||||
video_title = video_data['title']
|
video_title = video_data['title']
|
||||||
|
@ -1,213 +1,86 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
from ..utils import (
|
|
||||||
get_element_by_class,
|
|
||||||
get_element_by_id,
|
|
||||||
strip_or_none,
|
|
||||||
urljoin,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class AZMedienBaseIE(InfoExtractor):
|
class AZMedienIE(InfoExtractor):
|
||||||
def _kaltura_video(self, partner_id, entry_id):
|
|
||||||
return self.url_result(
|
|
||||||
'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
|
|
||||||
video_id=entry_id)
|
|
||||||
|
|
||||||
|
|
||||||
class AZMedienIE(AZMedienBaseIE):
|
|
||||||
IE_DESC = 'AZ Medien videos'
|
IE_DESC = 'AZ Medien videos'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.)?
|
(?:www\.)?
|
||||||
(?:
|
(?P<host>
|
||||||
telezueri\.ch|
|
telezueri\.ch|
|
||||||
telebaern\.tv|
|
telebaern\.tv|
|
||||||
telem1\.ch
|
telem1\.ch
|
||||||
)/
|
)/
|
||||||
[0-9]+-show-[^/\#]+
|
[^/]+/
|
||||||
(?:
|
(?P<id>
|
||||||
/[0-9]+-episode-[^/\#]+
|
[^/]+-(?P<article_id>\d+)
|
||||||
(?:
|
|
||||||
/[0-9]+-segment-(?:[^/\#]+\#)?|
|
|
||||||
\#
|
|
||||||
)|
|
|
||||||
\#
|
|
||||||
)
|
)
|
||||||
(?P<id>[^\#]+)
|
(?:
|
||||||
|
\#video=
|
||||||
|
(?P<kaltura_id>
|
||||||
|
[_0-9a-z]+
|
||||||
|
)
|
||||||
|
)?
|
||||||
'''
|
'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# URL with 'segment'
|
'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
|
||||||
'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1_2444peh4',
|
'id': '1_anruz3wy',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
|
'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
|
||||||
'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
|
'uploader_id': 'TVOnline',
|
||||||
'uploader_id': 'TeleZ?ri',
|
'upload_date': '20180930',
|
||||||
'upload_date': '20161218',
|
'timestamp': 1538328802,
|
||||||
'timestamp': 1482084490,
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# URL with 'segment' and fragment:
|
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
||||||
'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
|
|
||||||
'only_matching': True
|
|
||||||
}, {
|
|
||||||
# URL with 'episode' and fragment:
|
|
||||||
'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
|
|
||||||
'only_matching': True
|
|
||||||
}, {
|
|
||||||
# URL with 'show' and fragment:
|
|
||||||
'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
|
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
_PARTNER_ID = '1719221'
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
partner_id = self._search_regex(
|
|
||||||
r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
|
|
||||||
webpage, 'kaltura partner id')
|
|
||||||
entry_id = self._html_search_regex(
|
|
||||||
r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
|
|
||||||
% re.escape(video_id), webpage, 'kaltura entry id', group='id')
|
|
||||||
|
|
||||||
return self._kaltura_video(partner_id, entry_id)
|
|
||||||
|
|
||||||
|
|
||||||
class AZMedienPlaylistIE(AZMedienBaseIE):
|
|
||||||
IE_DESC = 'AZ Medien playlists'
|
|
||||||
_VALID_URL = r'''(?x)
|
|
||||||
https?://
|
|
||||||
(?:www\.)?
|
|
||||||
(?:
|
|
||||||
telezueri\.ch|
|
|
||||||
telebaern\.tv|
|
|
||||||
telem1\.ch
|
|
||||||
)/
|
|
||||||
(?P<id>[0-9]+-
|
|
||||||
(?:
|
|
||||||
show|
|
|
||||||
topic|
|
|
||||||
themen
|
|
||||||
)-[^/\#]+
|
|
||||||
(?:
|
|
||||||
/[0-9]+-episode-[^/\#]+
|
|
||||||
)?
|
|
||||||
)$
|
|
||||||
'''
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
# URL with 'episode'
|
|
||||||
'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016',
|
|
||||||
'title': 'News - Donnerstag, 15. Dezember 2016',
|
|
||||||
},
|
|
||||||
'playlist_count': 9,
|
|
||||||
}, {
|
|
||||||
# URL with 'themen'
|
|
||||||
'url': 'http://www.telem1.ch/258-themen-tele-m1-classics',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '258-themen-tele-m1-classics',
|
|
||||||
'title': 'Tele M1 Classics',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 15,
|
|
||||||
}, {
|
|
||||||
# URL with 'topic', contains nested playlists
|
|
||||||
'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
# URL with 'show' only
|
|
||||||
'url': 'http://www.telezueri.ch/86-show-talktaeglich',
|
|
||||||
'only_matching': True
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
show_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
webpage = self._download_webpage(url, show_id)
|
host = mobj.group('host')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
entry_id = mobj.group('kaltura_id')
|
||||||
|
|
||||||
entries = []
|
if not entry_id:
|
||||||
|
api_url = 'https://www.%s/api/pub/gql/%s' % (host, host.split('.')[0])
|
||||||
|
payload = {
|
||||||
|
'query': '''query VideoContext($articleId: ID!) {
|
||||||
|
article: node(id: $articleId) {
|
||||||
|
... on Article {
|
||||||
|
mainAssetRelation {
|
||||||
|
asset {
|
||||||
|
... on VideoAsset {
|
||||||
|
kalturaId
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}''',
|
||||||
|
'variables': {'articleId': 'Article:%s' % mobj.group('article_id')},
|
||||||
|
}
|
||||||
|
json_data = self._download_json(
|
||||||
|
api_url, video_id, headers={
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
data=json.dumps(payload).encode())
|
||||||
|
entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId']
|
||||||
|
|
||||||
partner_id = self._search_regex(
|
return self.url_result(
|
||||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
|
||||||
webpage, 'kaltura partner id', default=None)
|
ie=KalturaIE.ie_key(), video_id=entry_id)
|
||||||
|
|
||||||
if partner_id:
|
|
||||||
entries = [
|
|
||||||
self._kaltura_video(partner_id, m.group('id'))
|
|
||||||
for m in re.finditer(
|
|
||||||
r'data-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage)]
|
|
||||||
|
|
||||||
if not entries:
|
|
||||||
entries = [
|
|
||||||
self.url_result(m.group('url'), ie=AZMedienIE.ie_key())
|
|
||||||
for m in re.finditer(
|
|
||||||
r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
|
|
||||||
|
|
||||||
if not entries:
|
|
||||||
entries = [
|
|
||||||
# May contain nested playlists (e.g. [1]) thus no explicit
|
|
||||||
# ie_key
|
|
||||||
# 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen)
|
|
||||||
self.url_result(urljoin(url, m.group('url')))
|
|
||||||
for m in re.finditer(
|
|
||||||
r'<a[^>]+name=[^>]+href=(["\'])(?P<url>/.+?)\1', webpage)]
|
|
||||||
|
|
||||||
title = self._search_regex(
|
|
||||||
r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
|
||||||
webpage, 'title',
|
|
||||||
default=strip_or_none(get_element_by_id(
|
|
||||||
'video-title', webpage)), group='title')
|
|
||||||
|
|
||||||
return self.playlist_result(entries, show_id, title)
|
|
||||||
|
|
||||||
|
|
||||||
class AZMedienShowPlaylistIE(AZMedienBaseIE):
|
|
||||||
IE_DESC = 'AZ Medien show playlists'
|
|
||||||
_VALID_URL = r'''(?x)
|
|
||||||
https?://
|
|
||||||
(?:www\.)?
|
|
||||||
(?:
|
|
||||||
telezueri\.ch|
|
|
||||||
telebaern\.tv|
|
|
||||||
telem1\.ch
|
|
||||||
)/
|
|
||||||
(?:
|
|
||||||
all-episodes|
|
|
||||||
alle-episoden
|
|
||||||
)/
|
|
||||||
(?P<id>[^/?#&]+)
|
|
||||||
'''
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.telezueri.ch/all-episodes/astrotalk',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'astrotalk',
|
|
||||||
'title': 'TeleZüri: AstroTalk - alle episoden',
|
|
||||||
'description': 'md5:4c0f7e7d741d906004266e295ceb4a26',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 13,
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
playlist_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
|
||||||
episodes = get_element_by_class('search-mobile-box', webpage)
|
|
||||||
entries = [self.url_result(
|
|
||||||
urljoin(url, m.group('url'))) for m in re.finditer(
|
|
||||||
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)]
|
|
||||||
title = self._og_search_title(webpage, fatal=False)
|
|
||||||
description = self._og_search_description(webpage)
|
|
||||||
return self.playlist_result(entries, playlist_id, title, description)
|
|
||||||
|
@ -37,7 +37,7 @@ class BitChuteIE(InfoExtractor):
|
|||||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
|
||||||
})
|
})
|
||||||
|
|
||||||
title = self._search_regex(
|
title = self._html_search_regex(
|
||||||
(r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
|
(r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
|
||||||
webpage, 'title', default=None) or self._html_search_meta(
|
webpage, 'title', default=None) or self._html_search_meta(
|
||||||
'description', webpage, 'title',
|
'description', webpage, 'title',
|
||||||
|
@ -333,7 +333,6 @@ class BrightcoveLegacyIE(InfoExtractor):
|
|||||||
if publisher_id and publisher_id[0].isdigit():
|
if publisher_id and publisher_id[0].isdigit():
|
||||||
publisher_id = publisher_id[0]
|
publisher_id = publisher_id[0]
|
||||||
if not publisher_id:
|
if not publisher_id:
|
||||||
valid_key = lambda key: key and ',' in key
|
|
||||||
player_key = query.get('playerKey')
|
player_key = query.get('playerKey')
|
||||||
if player_key and ',' in player_key[0]:
|
if player_key and ',' in player_key[0]:
|
||||||
player_key = player_key[0]
|
player_key = player_key[0]
|
||||||
|
142
youtube_dl/extractor/ciscolive.py
Normal file
142
youtube_dl/extractor/ciscolive.py
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CiscoLiveBaseIE(InfoExtractor):
|
||||||
|
# These appear to be constant across all Cisco Live presentations
|
||||||
|
# and are not tied to any user session or event
|
||||||
|
RAINFOCUS_API_URL = 'https://events.rainfocus.com/api/%s'
|
||||||
|
RAINFOCUS_API_PROFILE_ID = 'Na3vqYdAlJFSxhYTYQGuMbpafMqftalz'
|
||||||
|
RAINFOCUS_WIDGET_ID = 'n6l4Lo05R8fiy3RpUBm447dZN8uNWoye'
|
||||||
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5647924234001/SyK2FdqjM_default/index.html?videoId=%s'
|
||||||
|
|
||||||
|
HEADERS = {
|
||||||
|
'Origin': 'https://ciscolive.cisco.com',
|
||||||
|
'rfApiProfileId': RAINFOCUS_API_PROFILE_ID,
|
||||||
|
'rfWidgetId': RAINFOCUS_WIDGET_ID,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _call_api(self, ep, rf_id, query, referrer, note=None):
|
||||||
|
headers = self.HEADERS.copy()
|
||||||
|
headers['Referer'] = referrer
|
||||||
|
return self._download_json(
|
||||||
|
self.RAINFOCUS_API_URL % ep, rf_id, note=note,
|
||||||
|
data=urlencode_postdata(query), headers=headers)
|
||||||
|
|
||||||
|
def _parse_rf_item(self, rf_item):
|
||||||
|
event_name = rf_item.get('eventName')
|
||||||
|
title = rf_item['title']
|
||||||
|
description = clean_html(rf_item.get('abstract'))
|
||||||
|
presenter_name = try_get(rf_item, lambda x: x['participants'][0]['fullName'])
|
||||||
|
bc_id = rf_item['videos'][0]['url']
|
||||||
|
bc_url = self.BRIGHTCOVE_URL_TEMPLATE % bc_id
|
||||||
|
duration = float_or_none(try_get(rf_item, lambda x: x['times'][0]['length']))
|
||||||
|
location = try_get(rf_item, lambda x: x['times'][0]['room'])
|
||||||
|
|
||||||
|
if duration:
|
||||||
|
duration = duration * 60
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': bc_url,
|
||||||
|
'ie_key': 'BrightcoveNew',
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
|
'creator': presenter_name,
|
||||||
|
'location': location,
|
||||||
|
'series': event_name,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CiscoLiveSessionIE(CiscoLiveBaseIE):
|
||||||
|
_VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/\??[^#]*#/session/(?P<id>[^/?&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://ciscolive.cisco.com/on-demand-library/?#/session/1423353499155001FoSs',
|
||||||
|
'md5': 'c98acf395ed9c9f766941c70f5352e22',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5803694304001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '13 Smart Automations to Monitor Your Cisco IOS Network',
|
||||||
|
'description': 'md5:ec4a436019e09a918dec17714803f7cc',
|
||||||
|
'timestamp': 1530305395,
|
||||||
|
'upload_date': '20180629',
|
||||||
|
'uploader_id': '5647924234001',
|
||||||
|
'location': '16B Mezz.',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
rf_id = self._match_id(url)
|
||||||
|
rf_result = self._call_api('session', rf_id, {'id': rf_id}, url)
|
||||||
|
return self._parse_rf_item(rf_result['items'][0])
|
||||||
|
|
||||||
|
|
||||||
|
class CiscoLiveSearchIE(CiscoLiveBaseIE):
|
||||||
|
_VALID_URL = r'https?://ciscolive\.cisco\.com/on-demand-library/'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://ciscolive.cisco.com/on-demand-library/?search.event=ciscoliveus2018&search.technicallevel=scpsSkillLevel_aintroductory&search.focus=scpsSessionFocus_designAndDeployment#/',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Search query',
|
||||||
|
},
|
||||||
|
'playlist_count': 5,
|
||||||
|
}, {
|
||||||
|
'url': 'https://ciscolive.cisco.com/on-demand-library/?search.technology=scpsTechnology_applicationDevelopment&search.technology=scpsTechnology_ipv6&search.focus=scpsSessionFocus_troubleshootingTroubleshooting#/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if CiscoLiveSessionIE.suitable(url) else super(CiscoLiveSearchIE, cls).suitable(url)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _check_bc_id_exists(rf_item):
|
||||||
|
return int_or_none(try_get(rf_item, lambda x: x['videos'][0]['url'])) is not None
|
||||||
|
|
||||||
|
def _entries(self, query, url):
|
||||||
|
query['size'] = 50
|
||||||
|
query['from'] = 0
|
||||||
|
for page_num in itertools.count(1):
|
||||||
|
results = self._call_api(
|
||||||
|
'search', None, query, url,
|
||||||
|
'Downloading search JSON page %d' % page_num)
|
||||||
|
sl = try_get(results, lambda x: x['sectionList'][0], dict)
|
||||||
|
if sl:
|
||||||
|
results = sl
|
||||||
|
items = results.get('items')
|
||||||
|
if not items or not isinstance(items, list):
|
||||||
|
break
|
||||||
|
for item in items:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
if not self._check_bc_id_exists(item):
|
||||||
|
continue
|
||||||
|
yield self._parse_rf_item(item)
|
||||||
|
size = int_or_none(results.get('size'))
|
||||||
|
if size is not None:
|
||||||
|
query['size'] = size
|
||||||
|
total = int_or_none(results.get('total'))
|
||||||
|
if total is not None and query['from'] + query['size'] > total:
|
||||||
|
break
|
||||||
|
query['from'] += query['size']
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
query = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
|
query['type'] = 'session'
|
||||||
|
return self.playlist_result(
|
||||||
|
self._entries(query, url), playlist_title='Search query')
|
@ -1,19 +1,10 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import int_or_none
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
url_or_none,
|
||||||
_translation_table = {
|
)
|
||||||
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
|
|
||||||
'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
|
|
||||||
'y': 'l', 'z': 'i',
|
|
||||||
'$': ':', '&': '.', '(': '=', '^': '&', '=': '/',
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _decode(s):
|
|
||||||
return ''.join(_translation_table.get(c, c) for c in s)
|
|
||||||
|
|
||||||
|
|
||||||
class CliphunterIE(InfoExtractor):
|
class CliphunterIE(InfoExtractor):
|
||||||
@ -60,14 +51,14 @@ class CliphunterIE(InfoExtractor):
|
|||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, f in gexo_files.items():
|
for format_id, f in gexo_files.items():
|
||||||
video_url = f.get('url')
|
video_url = url_or_none(f.get('url'))
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
fmt = f.get('fmt')
|
fmt = f.get('fmt')
|
||||||
height = f.get('h')
|
height = f.get('h')
|
||||||
format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
|
format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': _decode(video_url),
|
'url': video_url,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'width': int_or_none(f.get('w')),
|
'width': int_or_none(f.get('w')),
|
||||||
'height': int_or_none(height),
|
'height': int_or_none(height),
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import smuggle_url
|
from ..utils import smuggle_url
|
||||||
|
|
||||||
@ -34,3 +35,32 @@ class CNBCIE(InfoExtractor):
|
|||||||
{'force_smil_url': True}),
|
{'force_smil_url': True}),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CNBCVideoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '7000031301',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Trump: I don't necessarily agree with raising rates",
|
||||||
|
'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3',
|
||||||
|
'timestamp': 1531958400,
|
||||||
|
'upload_date': '20180719',
|
||||||
|
'uploader': 'NBCU-CNBC',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
|
||||||
|
'video id')
|
||||||
|
return self.url_result(
|
||||||
|
'http://video.cnbc.com/gallery/?video=%s' % video_id,
|
||||||
|
CNBCIE.ie_key())
|
||||||
|
@ -69,6 +69,7 @@ from ..utils import (
|
|||||||
update_url_query,
|
update_url_query,
|
||||||
urljoin,
|
urljoin,
|
||||||
url_basename,
|
url_basename,
|
||||||
|
url_or_none,
|
||||||
xpath_element,
|
xpath_element,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
xpath_with_ns,
|
xpath_with_ns,
|
||||||
@ -605,6 +606,11 @@ class InfoExtractor(object):
|
|||||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||||
if isinstance(err, compat_urllib_error.HTTPError):
|
if isinstance(err, compat_urllib_error.HTTPError):
|
||||||
if self.__can_accept_status_code(err, expected_status):
|
if self.__can_accept_status_code(err, expected_status):
|
||||||
|
# Retain reference to error to prevent file object from
|
||||||
|
# being closed before it can be read. Works around the
|
||||||
|
# effects of <https://bugs.python.org/issue15002>
|
||||||
|
# introduced in Python 3.4.1.
|
||||||
|
err.fp._error = err
|
||||||
return err.fp
|
return err.fp
|
||||||
|
|
||||||
if errnote is False:
|
if errnote is False:
|
||||||
@ -1213,10 +1219,10 @@ class InfoExtractor(object):
|
|||||||
def extract_video_object(e):
|
def extract_video_object(e):
|
||||||
assert e['@type'] == 'VideoObject'
|
assert e['@type'] == 'VideoObject'
|
||||||
info.update({
|
info.update({
|
||||||
'url': e.get('contentUrl'),
|
'url': url_or_none(e.get('contentUrl')),
|
||||||
'title': unescapeHTML(e.get('name')),
|
'title': unescapeHTML(e.get('name')),
|
||||||
'description': unescapeHTML(e.get('description')),
|
'description': unescapeHTML(e.get('description')),
|
||||||
'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
|
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
||||||
'duration': parse_duration(e.get('duration')),
|
'duration': parse_duration(e.get('duration')),
|
||||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||||
'filesize': float_or_none(e.get('contentSize')),
|
'filesize': float_or_none(e.get('contentSize')),
|
||||||
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
import xml.etree.ElementTree as etree
|
||||||
import zlib
|
import zlib
|
||||||
|
|
||||||
from hashlib import sha1
|
from hashlib import sha1
|
||||||
@ -398,7 +399,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
'Downloading subtitles for ' + sub_name, data={
|
'Downloading subtitles for ' + sub_name, data={
|
||||||
'subtitle_script_id': sub_id,
|
'subtitle_script_id': sub_id,
|
||||||
})
|
})
|
||||||
if sub_doc is None:
|
if not isinstance(sub_doc, etree.Element):
|
||||||
continue
|
continue
|
||||||
sid = sub_doc.get('id')
|
sid = sub_doc.get('id')
|
||||||
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
|
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
|
||||||
@ -515,7 +516,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
'video_quality': stream_quality,
|
'video_quality': stream_quality,
|
||||||
'current_page': url,
|
'current_page': url,
|
||||||
})
|
})
|
||||||
if streamdata is not None:
|
if isinstance(streamdata, etree.Element):
|
||||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||||
if stream_info is not None:
|
if stream_info is not None:
|
||||||
stream_infos.append(stream_info)
|
stream_infos.append(stream_info)
|
||||||
@ -526,7 +527,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
'video_format': stream_format,
|
'video_format': stream_format,
|
||||||
'video_encode_quality': stream_quality,
|
'video_encode_quality': stream_quality,
|
||||||
})
|
})
|
||||||
if stream_info is not None:
|
if isinstance(stream_info, etree.Element):
|
||||||
stream_infos.append(stream_info)
|
stream_infos.append(stream_info)
|
||||||
for stream_info in stream_infos:
|
for stream_info in stream_infos:
|
||||||
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
||||||
@ -598,10 +599,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
series = self._html_search_regex(
|
series = self._html_search_regex(
|
||||||
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
|
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
|
||||||
webpage, 'series', fatal=False)
|
webpage, 'series', fatal=False)
|
||||||
season = xpath_text(metadata, 'series_title')
|
|
||||||
|
|
||||||
episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title')
|
season = episode = episode_number = duration = thumbnail = None
|
||||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number'))
|
|
||||||
|
if isinstance(metadata, etree.Element):
|
||||||
|
season = xpath_text(metadata, 'series_title')
|
||||||
|
episode = xpath_text(metadata, 'episode_title')
|
||||||
|
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||||
|
duration = float_or_none(media_metadata.get('duration'), 1000)
|
||||||
|
thumbnail = xpath_text(metadata, 'episode_image_url')
|
||||||
|
|
||||||
|
if not episode:
|
||||||
|
episode = media_metadata.get('title')
|
||||||
|
if not episode_number:
|
||||||
|
episode_number = int_or_none(media_metadata.get('episode_number'))
|
||||||
|
if not thumbnail:
|
||||||
|
thumbnail = media_metadata.get('thumbnail', {}).get('url')
|
||||||
|
|
||||||
season_number = int_or_none(self._search_regex(
|
season_number = int_or_none(self._search_regex(
|
||||||
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
||||||
@ -611,8 +624,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_title,
|
'title': video_title,
|
||||||
'description': video_description,
|
'description': video_description,
|
||||||
'duration': float_or_none(media_metadata.get('duration'), 1000),
|
'duration': duration,
|
||||||
'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'),
|
'thumbnail': thumbnail,
|
||||||
'uploader': video_uploader,
|
'uploader': video_uploader,
|
||||||
'upload_date': video_upload_date,
|
'upload_date': video_upload_date,
|
||||||
'series': series,
|
'series': series,
|
||||||
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
|||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_age_limit,
|
parse_age_limit,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
@ -66,9 +67,12 @@ class CWTVIE(InfoExtractor):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
video_data = self._download_json(
|
data = self._download_json(
|
||||||
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
|
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
|
||||||
video_id)['video']
|
video_id)
|
||||||
|
if data.get('result') != 'ok':
|
||||||
|
raise ExtractorError(data['msg'], expected=True)
|
||||||
|
video_data = data['video']
|
||||||
title = video_data['title']
|
title = video_data['title']
|
||||||
mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
|
mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
|
||||||
|
|
||||||
|
@ -49,6 +49,9 @@ class DailyMailIE(InfoExtractor):
|
|||||||
'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
|
'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
|
||||||
|
|
||||||
video_sources = self._download_json(sources_url, video_id)
|
video_sources = self._download_json(sources_url, video_id)
|
||||||
|
body = video_sources.get('body')
|
||||||
|
if body:
|
||||||
|
video_sources = body
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for rendition in video_sources['renditions']:
|
for rendition in video_sources['renditions']:
|
||||||
|
@ -88,11 +88,7 @@ from .awaan import (
|
|||||||
AWAANLiveIE,
|
AWAANLiveIE,
|
||||||
AWAANSeasonIE,
|
AWAANSeasonIE,
|
||||||
)
|
)
|
||||||
from .azmedien import (
|
from .azmedien import AZMedienIE
|
||||||
AZMedienIE,
|
|
||||||
AZMedienPlaylistIE,
|
|
||||||
AZMedienShowPlaylistIE,
|
|
||||||
)
|
|
||||||
from .baidu import BaiduVideoIE
|
from .baidu import BaiduVideoIE
|
||||||
from .bambuser import BambuserIE, BambuserChannelIE
|
from .bambuser import BambuserIE, BambuserChannelIE
|
||||||
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||||
@ -198,6 +194,10 @@ from .chirbit import (
|
|||||||
ChirbitProfileIE,
|
ChirbitProfileIE,
|
||||||
)
|
)
|
||||||
from .cinchcast import CinchcastIE
|
from .cinchcast import CinchcastIE
|
||||||
|
from .ciscolive import (
|
||||||
|
CiscoLiveSessionIE,
|
||||||
|
CiscoLiveSearchIE,
|
||||||
|
)
|
||||||
from .cjsw import CJSWIE
|
from .cjsw import CJSWIE
|
||||||
from .cliphunter import CliphunterIE
|
from .cliphunter import CliphunterIE
|
||||||
from .clippit import ClippitIE
|
from .clippit import ClippitIE
|
||||||
@ -209,7 +209,10 @@ from .cloudy import CloudyIE
|
|||||||
from .clubic import ClubicIE
|
from .clubic import ClubicIE
|
||||||
from .clyp import ClypIE
|
from .clyp import ClypIE
|
||||||
from .cmt import CMTIE
|
from .cmt import CMTIE
|
||||||
from .cnbc import CNBCIE
|
from .cnbc import (
|
||||||
|
CNBCIE,
|
||||||
|
CNBCVideoIE,
|
||||||
|
)
|
||||||
from .cnn import (
|
from .cnn import (
|
||||||
CNNIE,
|
CNNIE,
|
||||||
CNNBlogsIE,
|
CNNBlogsIE,
|
||||||
@ -540,6 +543,7 @@ from .la7 import LA7IE
|
|||||||
from .laola1tv import (
|
from .laola1tv import (
|
||||||
Laola1TvEmbedIE,
|
Laola1TvEmbedIE,
|
||||||
Laola1TvIE,
|
Laola1TvIE,
|
||||||
|
EHFTVIE,
|
||||||
ITTFIE,
|
ITTFIE,
|
||||||
)
|
)
|
||||||
from .lci import LCIIE
|
from .lci import LCIIE
|
||||||
@ -569,6 +573,10 @@ from .limelight import (
|
|||||||
LimelightChannelListIE,
|
LimelightChannelListIE,
|
||||||
)
|
)
|
||||||
from .line import LineTVIE
|
from .line import LineTVIE
|
||||||
|
from .linkedin import (
|
||||||
|
LinkedInLearningIE,
|
||||||
|
LinkedInLearningCourseIE,
|
||||||
|
)
|
||||||
from .litv import LiTVIE
|
from .litv import LiTVIE
|
||||||
from .liveleak import (
|
from .liveleak import (
|
||||||
LiveLeakIE,
|
LiveLeakIE,
|
||||||
@ -1043,7 +1051,7 @@ from .spike import (
|
|||||||
)
|
)
|
||||||
from .stitcher import StitcherIE
|
from .stitcher import StitcherIE
|
||||||
from .sport5 import Sport5IE
|
from .sport5 import Sport5IE
|
||||||
from .sportbox import SportBoxEmbedIE
|
from .sportbox import SportBoxIE
|
||||||
from .sportdeutschland import SportDeutschlandIE
|
from .sportdeutschland import SportDeutschlandIE
|
||||||
from .springboardplatform import SpringboardPlatformIE
|
from .springboardplatform import SpringboardPlatformIE
|
||||||
from .sprout import SproutIE
|
from .sprout import SproutIE
|
||||||
@ -1116,6 +1124,10 @@ from .thisamericanlife import ThisAmericanLifeIE
|
|||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
from .thisoldhouse import ThisOldHouseIE
|
from .thisoldhouse import ThisOldHouseIE
|
||||||
from .threeqsdn import ThreeQSDNIE
|
from .threeqsdn import ThreeQSDNIE
|
||||||
|
from .tiktok import (
|
||||||
|
TikTokIE,
|
||||||
|
TikTokUserIE,
|
||||||
|
)
|
||||||
from .tinypic import TinyPicIE
|
from .tinypic import TinyPicIE
|
||||||
from .tmz import (
|
from .tmz import (
|
||||||
TMZIE,
|
TMZIE,
|
||||||
@ -1189,6 +1201,7 @@ from .tweakers import TweakersIE
|
|||||||
from .twentyfourvideo import TwentyFourVideoIE
|
from .twentyfourvideo import TwentyFourVideoIE
|
||||||
from .twentymin import TwentyMinutenIE
|
from .twentymin import TwentyMinutenIE
|
||||||
from .twentythreevideo import TwentyThreeVideoIE
|
from .twentythreevideo import TwentyThreeVideoIE
|
||||||
|
from .twitcasting import TwitCastingIE
|
||||||
from .twitch import (
|
from .twitch import (
|
||||||
TwitchVideoIE,
|
TwitchVideoIE,
|
||||||
TwitchChapterIE,
|
TwitchChapterIE,
|
||||||
@ -1294,6 +1307,7 @@ from .vimeo import (
|
|||||||
VimeoReviewIE,
|
VimeoReviewIE,
|
||||||
VimeoUserIE,
|
VimeoUserIE,
|
||||||
VimeoWatchLaterIE,
|
VimeoWatchLaterIE,
|
||||||
|
VHXEmbedIE,
|
||||||
)
|
)
|
||||||
from .vimple import VimpleIE
|
from .vimple import VimpleIE
|
||||||
from .vine import (
|
from .vine import (
|
||||||
@ -1381,6 +1395,7 @@ from .wsj import (
|
|||||||
WSJIE,
|
WSJIE,
|
||||||
WSJArticleIE,
|
WSJArticleIE,
|
||||||
)
|
)
|
||||||
|
from .wwe import WWEIE
|
||||||
from .xbef import XBefIE
|
from .xbef import XBefIE
|
||||||
from .xboxclips import XboxClipsIE
|
from .xboxclips import XboxClipsIE
|
||||||
from .xfileshare import XFileShareIE
|
from .xfileshare import XFileShareIE
|
||||||
@ -1473,3 +1488,4 @@ from .zattoo import (
|
|||||||
)
|
)
|
||||||
from .zdf import ZDFIE, ZDFChannelIE
|
from .zdf import ZDFIE, ZDFChannelIE
|
||||||
from .zingmp3 import ZingMp3IE
|
from .zingmp3 import ZingMp3IE
|
||||||
|
from .zype import ZypeIE
|
||||||
|
@ -57,7 +57,7 @@ class FacebookIE(InfoExtractor):
|
|||||||
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
||||||
|
|
||||||
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
||||||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true'
|
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
||||||
|
@ -1,43 +1,33 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
|
||||||
smuggle_url,
|
|
||||||
update_url_query,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FoxSportsIE(InfoExtractor):
|
class FoxSportsIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*(?P<id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*video/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.foxsports.com/tennessee/video/432609859715',
|
'url': 'http://www.foxsports.com/tennessee/video/432609859715',
|
||||||
'md5': 'b49050e955bebe32c301972e4012ac17',
|
'md5': 'b49050e955bebe32c301972e4012ac17',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'bwduI3X_TgUB',
|
'id': '432609859715',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
|
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
|
||||||
'description': 'Courtney Lee talks about Memphis being focused.',
|
'description': 'Courtney Lee talks about Memphis being focused.',
|
||||||
'upload_date': '20150423',
|
# TODO: fix timestamp
|
||||||
'timestamp': 1429761109,
|
'upload_date': '19700101', # '20150423',
|
||||||
|
# 'timestamp': 1429761109,
|
||||||
'uploader': 'NEWA-FNG-FOXSPORTS',
|
'uploader': 'NEWA-FNG-FOXSPORTS',
|
||||||
},
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
'add_ie': ['ThePlatform'],
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
return self.url_result(
|
||||||
|
'https://feed.theplatform.com/f/BKQ29B/foxsports-all?byId=' + video_id, 'ThePlatformFeed')
|
||||||
config = self._parse_json(
|
|
||||||
self._html_search_regex(
|
|
||||||
r"""class="[^"]*(?:fs-player|platformPlayer-wrapper)[^"]*".+?data-player-config='([^']+)'""",
|
|
||||||
webpage, 'data player config'),
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
return self.url_result(smuggle_url(update_url_query(
|
|
||||||
config['releaseURL'], {
|
|
||||||
'mbr': 'true',
|
|
||||||
'switch': 'http',
|
|
||||||
}), {'force_smil_url': True}))
|
|
||||||
|
@ -14,7 +14,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class GameSpotIE(OnceIE):
|
class GameSpotIE(OnceIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?gamespot\.com/(?:video|article|review)s/(?:[^/]+/\d+-|embed/)(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
|
'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/',
|
||||||
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
|
'md5': 'b2a30deaa8654fcccd43713a6b6a4825',
|
||||||
@ -41,6 +41,9 @@ class GameSpotIE(OnceIE):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/',
|
'url': 'https://www.gamespot.com/articles/the-last-of-us-2-receives-new-ps4-trailer/1100-6454469/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.gamespot.com/reviews/gears-of-war-review/1900-6161188/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -47,7 +47,7 @@ from .nbc import NBCSportsVPlayerIE
|
|||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
from .tvc import TVCIE
|
from .tvc import TVCIE
|
||||||
from .sportbox import SportBoxEmbedIE
|
from .sportbox import SportBoxIE
|
||||||
from .smotri import SmotriIE
|
from .smotri import SmotriIE
|
||||||
from .myvi import MyviIE
|
from .myvi import MyviIE
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
@ -114,6 +114,7 @@ from .apa import APAIE
|
|||||||
from .foxnews import FoxNewsIE
|
from .foxnews import FoxNewsIE
|
||||||
from .viqeo import ViqeoIE
|
from .viqeo import ViqeoIE
|
||||||
from .expressen import ExpressenIE
|
from .expressen import ExpressenIE
|
||||||
|
from .zype import ZypeIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
@ -2070,6 +2071,20 @@ class GenericIE(InfoExtractor):
|
|||||||
},
|
},
|
||||||
'playlist_count': 6,
|
'playlist_count': 6,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Zype embed
|
||||||
|
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5b400b834b32992a310622b9',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Smoky Barbecue Favorites',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||||
|
},
|
||||||
|
'add_ie': [ZypeIE.ie_key()],
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
# videojs embed
|
# videojs embed
|
||||||
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
|
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
|
||||||
@ -2636,9 +2651,9 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.url_result(tvc_url, 'TVC')
|
return self.url_result(tvc_url, 'TVC')
|
||||||
|
|
||||||
# Look for embedded SportBox player
|
# Look for embedded SportBox player
|
||||||
sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
|
sportbox_urls = SportBoxIE._extract_urls(webpage)
|
||||||
if sportbox_urls:
|
if sportbox_urls:
|
||||||
return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
|
return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key())
|
||||||
|
|
||||||
# Look for embedded XHamster player
|
# Look for embedded XHamster player
|
||||||
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
|
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
|
||||||
@ -3129,6 +3144,11 @@ class GenericIE(InfoExtractor):
|
|||||||
return self.playlist_from_matches(
|
return self.playlist_from_matches(
|
||||||
expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
|
expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
|
||||||
|
|
||||||
|
zype_urls = ZypeIE._extract_urls(webpage)
|
||||||
|
if zype_urls:
|
||||||
|
return self.playlist_from_matches(
|
||||||
|
zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
|
||||||
|
|
||||||
# Look for HTML5 media
|
# Look for HTML5 media
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||||
if entries:
|
if entries:
|
||||||
|
@ -53,7 +53,7 @@ class GfycatIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
gfy = self._download_json(
|
gfy = self._download_json(
|
||||||
'http://gfycat.com/cajax/get/%s' % video_id,
|
'https://api.gfycat.com/v1/gfycats/%s' % video_id,
|
||||||
video_id, 'Downloading video info')
|
video_id, 'Downloading video info')
|
||||||
if 'error' in gfy:
|
if 'error' in gfy:
|
||||||
raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
|
raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
|
||||||
|
@ -43,6 +43,7 @@ class HotStarIE(HotStarBaseIE):
|
|||||||
IE_NAME = 'hotstar'
|
IE_NAME = 'hotstar'
|
||||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
|
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
# contentData
|
||||||
'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
|
'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1000076273',
|
'id': '1000076273',
|
||||||
@ -57,6 +58,10 @@ class HotStarIE(HotStarBaseIE):
|
|||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# contentDetail
|
||||||
|
'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
|
'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
@ -74,8 +79,12 @@ class HotStarIE(HotStarBaseIE):
|
|||||||
r'<script>window\.APP_STATE\s*=\s*({.+?})</script>',
|
r'<script>window\.APP_STATE\s*=\s*({.+?})</script>',
|
||||||
webpage, 'app state'), video_id)
|
webpage, 'app state'), video_id)
|
||||||
video_data = {}
|
video_data = {}
|
||||||
|
getters = (
|
||||||
|
lambda x, k=k: x['initialState']['content%s' % k]['content']
|
||||||
|
for k in ('Data', 'Detail')
|
||||||
|
)
|
||||||
for v in app_state.values():
|
for v in app_state.values():
|
||||||
content = try_get(v, lambda x: x['initialState']['contentData']['content'], dict)
|
content = try_get(v, getters, dict)
|
||||||
if content and content.get('contentId') == video_id:
|
if content and content.get('contentId') == video_id:
|
||||||
video_data = content
|
video_data = content
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ from ..utils import (
|
|||||||
class IviIE(InfoExtractor):
|
class IviIE(InfoExtractor):
|
||||||
IE_DESC = 'ivi.ru'
|
IE_DESC = 'ivi.ru'
|
||||||
IE_NAME = 'ivi'
|
IE_NAME = 'ivi'
|
||||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
_GEO_COUNTRIES = ['RU']
|
_GEO_COUNTRIES = ['RU']
|
||||||
|
|
||||||
@ -65,7 +65,11 @@ class IviIE(InfoExtractor):
|
|||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
'skip': 'Only works from Russia',
|
'skip': 'Only works from Russia',
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
'url': 'https://www.ivi.tv/watch/33560/',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
# Sorted by quality
|
# Sorted by quality
|
||||||
|
@ -61,7 +61,7 @@ class JojIE(InfoExtractor):
|
|||||||
|
|
||||||
bitrates = self._parse_json(
|
bitrates = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)bitrates\s*=\s*({.+?});', webpage, 'bitrates',
|
r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates',
|
||||||
default='{}'),
|
default='{}'),
|
||||||
video_id, transform_source=js_to_json, fatal=False)
|
video_id, transform_source=js_to_json, fatal=False)
|
||||||
|
|
||||||
|
@ -192,6 +192,8 @@ class KalturaIE(InfoExtractor):
|
|||||||
'entryId': video_id,
|
'entryId': video_id,
|
||||||
'service': 'baseentry',
|
'service': 'baseentry',
|
||||||
'ks': '{1:result:ks}',
|
'ks': '{1:result:ks}',
|
||||||
|
'responseProfile:fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId',
|
||||||
|
'responseProfile:type': 1,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'action': 'getbyentryid',
|
'action': 'getbyentryid',
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
@ -32,7 +33,8 @@ class Laola1TvEmbedIE(InfoExtractor):
|
|||||||
|
|
||||||
def _extract_token_url(self, stream_access_url, video_id, data):
|
def _extract_token_url(self, stream_access_url, video_id, data):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
stream_access_url, video_id, headers={
|
self._proto_relative_url(stream_access_url, 'https:'), video_id,
|
||||||
|
headers={
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
}, data=json.dumps(data).encode())['data']['stream-access'][0]
|
}, data=json.dumps(data).encode())['data']['stream-access'][0]
|
||||||
|
|
||||||
@ -119,9 +121,59 @@ class Laola1TvEmbedIE(InfoExtractor):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class Laola1TvIE(Laola1TvEmbedIE):
|
class Laola1TvBaseIE(Laola1TvEmbedIE):
|
||||||
|
def _extract_video(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
if 'Dieser Livestream ist bereits beendet.' in webpage:
|
||||||
|
raise ExtractorError('This live stream has already finished.', expected=True)
|
||||||
|
|
||||||
|
conf = self._parse_json(self._search_regex(
|
||||||
|
r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
|
||||||
|
display_id,
|
||||||
|
transform_source=lambda s: js_to_json(re.sub(r'shareurl:.+,', '', s)))
|
||||||
|
video_id = conf['videoid']
|
||||||
|
|
||||||
|
config = self._download_json(conf['configUrl'], video_id, query={
|
||||||
|
'videoid': video_id,
|
||||||
|
'partnerid': conf['partnerid'],
|
||||||
|
'language': conf.get('language', ''),
|
||||||
|
'portal': conf.get('portalid', ''),
|
||||||
|
})
|
||||||
|
error = config.get('error')
|
||||||
|
if error:
|
||||||
|
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||||
|
|
||||||
|
video_data = config['video']
|
||||||
|
title = video_data['title']
|
||||||
|
is_live = video_data.get('isLivestream') and video_data.get('isLive')
|
||||||
|
meta = video_data.get('metaInformation')
|
||||||
|
sports = meta.get('sports')
|
||||||
|
categories = sports.split(',') if sports else []
|
||||||
|
|
||||||
|
token_url = self._extract_token_url(
|
||||||
|
video_data['streamAccess'], video_id,
|
||||||
|
video_data['abo']['required'])
|
||||||
|
|
||||||
|
formats = self._extract_formats(token_url, video_id)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
'title': self._live_title(title) if is_live else title,
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'thumbnail': video_data.get('image'),
|
||||||
|
'categories': categories,
|
||||||
|
'formats': formats,
|
||||||
|
'is_live': is_live,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class Laola1TvIE(Laola1TvBaseIE):
|
||||||
IE_NAME = 'laola1tv'
|
IE_NAME = 'laola1tv'
|
||||||
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
|
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -169,52 +221,30 @@ class Laola1TvIE(Laola1TvEmbedIE):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
return self._extract_video(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
|
|
||||||
if 'Dieser Livestream ist bereits beendet.' in webpage:
|
class EHFTVIE(Laola1TvBaseIE):
|
||||||
raise ExtractorError('This live stream has already finished.', expected=True)
|
IE_NAME = 'ehftv'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(?:-[a-z]+)?/[^/]+/(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
conf = self._parse_json(self._search_regex(
|
_TESTS = [{
|
||||||
r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
|
'url': 'https://www.ehftv.com/int/video/paris-saint-germain-handball-pge-vive-kielce/1166761',
|
||||||
display_id, js_to_json)
|
'info_dict': {
|
||||||
|
'id': '1166761',
|
||||||
|
'display_id': 'paris-saint-germain-handball-pge-vive-kielce',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Paris Saint-Germain Handball - PGE Vive Kielce',
|
||||||
|
'is_live': False,
|
||||||
|
'categories': ['Handball'],
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
video_id = conf['videoid']
|
def _real_extract(self, url):
|
||||||
|
return self._extract_video(url)
|
||||||
config = self._download_json(conf['configUrl'], video_id, query={
|
|
||||||
'videoid': video_id,
|
|
||||||
'partnerid': conf['partnerid'],
|
|
||||||
'language': conf.get('language', ''),
|
|
||||||
'portal': conf.get('portalid', ''),
|
|
||||||
})
|
|
||||||
error = config.get('error')
|
|
||||||
if error:
|
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
|
||||||
|
|
||||||
video_data = config['video']
|
|
||||||
title = video_data['title']
|
|
||||||
is_live = video_data.get('isLivestream') and video_data.get('isLive')
|
|
||||||
meta = video_data.get('metaInformation')
|
|
||||||
sports = meta.get('sports')
|
|
||||||
categories = sports.split(',') if sports else []
|
|
||||||
|
|
||||||
token_url = self._extract_token_url(
|
|
||||||
video_data['streamAccess'], video_id,
|
|
||||||
video_data['abo']['required'])
|
|
||||||
|
|
||||||
formats = self._extract_formats(token_url, video_id)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': self._live_title(title) if is_live else title,
|
|
||||||
'description': video_data.get('description'),
|
|
||||||
'thumbnail': video_data.get('image'),
|
|
||||||
'categories': categories,
|
|
||||||
'formats': formats,
|
|
||||||
'is_live': is_live,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class ITTFIE(InfoExtractor):
|
class ITTFIE(InfoExtractor):
|
||||||
|
@ -16,16 +16,15 @@ from ..utils import (
|
|||||||
class LibraryOfCongressIE(InfoExtractor):
|
class LibraryOfCongressIE(InfoExtractor):
|
||||||
IE_NAME = 'loc'
|
IE_NAME = 'loc'
|
||||||
IE_DESC = 'Library of Congress'
|
IE_DESC = 'Library of Congress'
|
||||||
_VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?loc\.gov/(?:item/|today/cyberlc/feature_wdesc\.php\?.*\brec=)(?P<id>[0-9a-z_.]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# embedded via <div class="media-player"
|
# embedded via <div class="media-player"
|
||||||
'url': 'http://loc.gov/item/90716351/',
|
'url': 'http://loc.gov/item/90716351/',
|
||||||
'md5': '353917ff7f0255aa6d4b80a034833de8',
|
'md5': '6ec0ae8f07f86731b1b2ff70f046210a',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '90716351',
|
'id': '90716351',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Pa's trip to Mars",
|
'title': "Pa's trip to Mars",
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'duration': 0,
|
'duration': 0,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
},
|
},
|
||||||
@ -57,6 +56,12 @@ class LibraryOfCongressIE(InfoExtractor):
|
|||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.loc.gov/item/ihas.200197114/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.loc.gov/item/afc1981005_afs20503/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -67,12 +72,13 @@ class LibraryOfCongressIE(InfoExtractor):
|
|||||||
(r'id=(["\'])media-player-(?P<id>.+?)\1',
|
(r'id=(["\'])media-player-(?P<id>.+?)\1',
|
||||||
r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1',
|
r'<video[^>]+id=(["\'])uuid-(?P<id>.+?)\1',
|
||||||
r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1',
|
r'<video[^>]+data-uuid=(["\'])(?P<id>.+?)\1',
|
||||||
r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1'),
|
r'mediaObjectId\s*:\s*(["\'])(?P<id>.+?)\1',
|
||||||
|
r'data-tab="share-media-(?P<id>[0-9A-F]{32})"'),
|
||||||
webpage, 'media id', group='id')
|
webpage, 'media id', group='id')
|
||||||
|
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id,
|
'https://media.loc.gov/services/v1/media?id=%s&context=json' % media_id,
|
||||||
video_id)['mediaObject']
|
media_id)['mediaObject']
|
||||||
|
|
||||||
derivative = data['derivatives'][0]
|
derivative = data['derivatives'][0]
|
||||||
media_url = derivative['derivativeUrl']
|
media_url = derivative['derivativeUrl']
|
||||||
@ -89,25 +95,29 @@ class LibraryOfCongressIE(InfoExtractor):
|
|||||||
if ext not in ('mp4', 'mp3'):
|
if ext not in ('mp4', 'mp3'):
|
||||||
media_url += '.mp4' if is_video else '.mp3'
|
media_url += '.mp4' if is_video else '.mp3'
|
||||||
|
|
||||||
if 'vod/mp4:' in media_url:
|
formats = []
|
||||||
formats = [{
|
if '/vod/mp4:' in media_url:
|
||||||
'url': media_url.replace('vod/mp4:', 'hls-vod/media/') + '.m3u8',
|
formats.append({
|
||||||
|
'url': media_url.replace('/vod/mp4:', '/hls-vod/media/') + '.m3u8',
|
||||||
'format_id': 'hls',
|
'format_id': 'hls',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'protocol': 'm3u8_native',
|
'protocol': 'm3u8_native',
|
||||||
'quality': 1,
|
'quality': 1,
|
||||||
}]
|
})
|
||||||
elif 'vod/mp3:' in media_url:
|
http_format = {
|
||||||
formats = [{
|
'url': re.sub(r'(://[^/]+/)(?:[^/]+/)*(?:mp4|mp3):', r'\1', media_url),
|
||||||
'url': media_url.replace('vod/mp3:', ''),
|
'format_id': 'http',
|
||||||
'vcodec': 'none',
|
'quality': 1,
|
||||||
}]
|
}
|
||||||
|
if not is_video:
|
||||||
|
http_format['vcodec'] = 'none'
|
||||||
|
formats.append(http_format)
|
||||||
|
|
||||||
download_urls = set()
|
download_urls = set()
|
||||||
for m in re.finditer(
|
for m in re.finditer(
|
||||||
r'<option[^>]+value=(["\'])(?P<url>.+?)\1[^>]+data-file-download=[^>]+>\s*(?P<id>.+?)(?:(?: |\s+)\((?P<size>.+?)\))?\s*<', webpage):
|
r'<option[^>]+value=(["\'])(?P<url>.+?)\1[^>]+data-file-download=[^>]+>\s*(?P<id>.+?)(?:(?: |\s+)\((?P<size>.+?)\))?\s*<', webpage):
|
||||||
format_id = m.group('id').lower()
|
format_id = m.group('id').lower()
|
||||||
if format_id == 'gif':
|
if format_id in ('gif', 'jpeg'):
|
||||||
continue
|
continue
|
||||||
download_url = m.group('url')
|
download_url = m.group('url')
|
||||||
if download_url in download_urls:
|
if download_url in download_urls:
|
||||||
|
175
youtube_dl/extractor/linkedin.py
Normal file
175
youtube_dl/extractor/linkedin.py
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class LinkedInLearningBaseIE(InfoExtractor):
|
||||||
|
_NETRC_MACHINE = 'linkedin'
|
||||||
|
|
||||||
|
def _call_api(self, course_slug, fields, video_slug=None, resolution=None):
|
||||||
|
query = {
|
||||||
|
'courseSlug': course_slug,
|
||||||
|
'fields': fields,
|
||||||
|
'q': 'slugs',
|
||||||
|
}
|
||||||
|
sub = ''
|
||||||
|
if video_slug:
|
||||||
|
query.update({
|
||||||
|
'videoSlug': video_slug,
|
||||||
|
'resolution': '_%s' % resolution,
|
||||||
|
})
|
||||||
|
sub = ' %dp' % resolution
|
||||||
|
api_url = 'https://www.linkedin.com/learning-api/detailedCourses'
|
||||||
|
return self._download_json(
|
||||||
|
api_url, video_slug, 'Downloading%s JSON metadata' % sub, headers={
|
||||||
|
'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value,
|
||||||
|
}, query=query)['elements'][0]
|
||||||
|
|
||||||
|
def _get_video_id(self, urn, course_slug, video_slug):
|
||||||
|
if urn:
|
||||||
|
mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn)
|
||||||
|
if mobj:
|
||||||
|
return mobj.group(1)
|
||||||
|
return '%s/%s' % (course_slug, video_slug)
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
email, password = self._get_login_info()
|
||||||
|
if email is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
login_page = self._download_webpage(
|
||||||
|
'https://www.linkedin.com/uas/login?trk=learning',
|
||||||
|
None, 'Downloading login page')
|
||||||
|
action_url = self._search_regex(
|
||||||
|
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, 'post url',
|
||||||
|
default='https://www.linkedin.com/uas/login-submit', group='url')
|
||||||
|
data = self._hidden_inputs(login_page)
|
||||||
|
data.update({
|
||||||
|
'session_key': email,
|
||||||
|
'session_password': password,
|
||||||
|
})
|
||||||
|
login_submit_page = self._download_webpage(
|
||||||
|
action_url, None, 'Logging in',
|
||||||
|
data=urlencode_postdata(data))
|
||||||
|
error = self._search_regex(
|
||||||
|
r'<span[^>]+class="error"[^>]*>\s*(.+?)\s*</span>',
|
||||||
|
login_submit_page, 'error', default=None)
|
||||||
|
if error:
|
||||||
|
raise ExtractorError(error, expected=True)
|
||||||
|
|
||||||
|
|
||||||
|
class LinkedInLearningIE(LinkedInLearningBaseIE):
|
||||||
|
IE_NAME = 'linkedin:learning'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P<course_slug>[^/]+)/(?P<id>[^/?#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals/welcome?autoplay=true',
|
||||||
|
'md5': 'a1d74422ff0d5e66a792deb996693167',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '90426',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Welcome',
|
||||||
|
'timestamp': 1430396150.82,
|
||||||
|
'upload_date': '20150430',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
course_slug, video_slug = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
|
video_data = None
|
||||||
|
formats = []
|
||||||
|
for width, height in ((640, 360), (960, 540), (1280, 720)):
|
||||||
|
video_data = self._call_api(
|
||||||
|
course_slug, 'selectedVideo', video_slug, height)['selectedVideo']
|
||||||
|
|
||||||
|
video_url_data = video_data.get('url') or {}
|
||||||
|
progressive_url = video_url_data.get('progressiveUrl')
|
||||||
|
if progressive_url:
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'progressive-%dp' % height,
|
||||||
|
'url': progressive_url,
|
||||||
|
'height': height,
|
||||||
|
'width': width,
|
||||||
|
'source_preference': 1,
|
||||||
|
})
|
||||||
|
|
||||||
|
title = video_data['title']
|
||||||
|
|
||||||
|
audio_url = video_data.get('audio', {}).get('progressiveUrl')
|
||||||
|
if audio_url:
|
||||||
|
formats.append({
|
||||||
|
'abr': 64,
|
||||||
|
'ext': 'm4a',
|
||||||
|
'format_id': 'audio',
|
||||||
|
'url': audio_url,
|
||||||
|
'vcodec': 'none',
|
||||||
|
})
|
||||||
|
|
||||||
|
streaming_url = video_url_data.get('streamingUrl')
|
||||||
|
if streaming_url:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
streaming_url, video_slug, 'mp4',
|
||||||
|
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
|
self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': self._get_video_id(video_data.get('urn'), course_slug, video_slug),
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': video_data.get('defaultThumbnail'),
|
||||||
|
'timestamp': float_or_none(video_data.get('publishedOn'), 1000),
|
||||||
|
'duration': int_or_none(video_data.get('durationInSeconds')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LinkedInLearningCourseIE(LinkedInLearningBaseIE):
|
||||||
|
IE_NAME = 'linkedin:learning:course'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P<id>[^/?#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'programming-foundations-fundamentals',
|
||||||
|
'title': 'Programming Foundations: Fundamentals',
|
||||||
|
'description': 'md5:76e580b017694eb89dc8e8923fff5c86',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 61,
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if LinkedInLearningIE.suitable(url) else super(LinkedInLearningCourseIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
course_slug = self._match_id(url)
|
||||||
|
course_data = self._call_api(course_slug, 'chapters,description,title')
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for chapter in course_data.get('chapters', []):
|
||||||
|
chapter_title = chapter.get('title')
|
||||||
|
for video in chapter.get('videos', []):
|
||||||
|
video_slug = video.get('slug')
|
||||||
|
if not video_slug:
|
||||||
|
continue
|
||||||
|
entries.append({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'id': self._get_video_id(video.get('urn'), course_slug, video_slug),
|
||||||
|
'title': video.get('title'),
|
||||||
|
'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug),
|
||||||
|
'chapter': chapter_title,
|
||||||
|
'ie_key': LinkedInLearningIE.ie_key(),
|
||||||
|
})
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, course_slug,
|
||||||
|
course_data.get('title'),
|
||||||
|
course_data.get('description'))
|
@ -15,7 +15,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class LyndaBaseIE(InfoExtractor):
|
class LyndaBaseIE(InfoExtractor):
|
||||||
_SIGNIN_URL = 'https://www.lynda.com/signin'
|
_SIGNIN_URL = 'https://www.lynda.com/signin/lynda'
|
||||||
_PASSWORD_URL = 'https://www.lynda.com/signin/password'
|
_PASSWORD_URL = 'https://www.lynda.com/signin/password'
|
||||||
_USER_URL = 'https://www.lynda.com/signin/user'
|
_USER_URL = 'https://www.lynda.com/signin/user'
|
||||||
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
_ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.'
|
||||||
|
@ -161,11 +161,17 @@ class MixcloudIE(InfoExtractor):
|
|||||||
stream_info = info_json['streamInfo']
|
stream_info = info_json['streamInfo']
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
|
def decrypt_url(f_url):
|
||||||
|
for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'):
|
||||||
|
decrypted_url = self._decrypt_xor_cipher(k, f_url)
|
||||||
|
if re.search(r'^https?://[0-9a-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url):
|
||||||
|
return decrypted_url
|
||||||
|
|
||||||
for url_key in ('url', 'hlsUrl', 'dashUrl'):
|
for url_key in ('url', 'hlsUrl', 'dashUrl'):
|
||||||
format_url = stream_info.get(url_key)
|
format_url = stream_info.get(url_key)
|
||||||
if not format_url:
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
decrypted = self._decrypt_xor_cipher(key, compat_b64decode(format_url))
|
decrypted = decrypt_url(compat_b64decode(format_url))
|
||||||
if not decrypted:
|
if not decrypted:
|
||||||
continue
|
continue
|
||||||
if url_key == 'hlsUrl':
|
if url_key == 'hlsUrl':
|
||||||
|
@ -9,10 +9,8 @@ from .theplatform import ThePlatformIE
|
|||||||
from .adobepass import AdobePassIE
|
from .adobepass import AdobePassIE
|
||||||
from ..compat import compat_urllib_parse_unquote
|
from ..compat import compat_urllib_parse_unquote
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
find_xpath_attr,
|
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
|
||||||
update_url_query,
|
update_url_query,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
)
|
)
|
||||||
@ -269,27 +267,14 @@ class CSNNEIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class NBCNewsIE(ThePlatformIE):
|
class NBCNewsIE(ThePlatformIE):
|
||||||
_VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/
|
_VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
|
||||||
(?:video/.+?/(?P<id>\d+)|
|
|
||||||
([^/]+/)*(?:.*-)?(?P<mpx_id>[^/?]+))
|
|
||||||
'''
|
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
|
||||||
'url': 'http://www.nbcnews.com/video/nbc-news/52753292',
|
|
||||||
'md5': '47abaac93c6eaf9ad37ee6c4463a5179',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '52753292',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Crew emerges after four-month Mars food study',
|
|
||||||
'description': 'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
|
'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
|
||||||
'md5': 'af1adfa51312291a017720403826bb64',
|
'md5': 'af1adfa51312291a017720403826bb64',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p_tweet_snow_140529',
|
'id': '269389891880',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'How Twitter Reacted To The Snowden Interview',
|
'title': 'How Twitter Reacted To The Snowden Interview',
|
||||||
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
|
'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
|
||||||
@ -313,7 +298,7 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
|
'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
|
||||||
'md5': '73135a2e0ef819107bbb55a5a9b2a802',
|
'md5': '73135a2e0ef819107bbb55a5a9b2a802',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'nn_netcast_150204',
|
'id': '394064451844',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
|
'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
|
||||||
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
|
'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
|
||||||
@ -326,7 +311,7 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
|
'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
|
||||||
'md5': 'a49e173825e5fcd15c13fc297fced39d',
|
'md5': 'a49e173825e5fcd15c13fc297fced39d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'x_lon_vwhorn_150922',
|
'id': '529953347624',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
|
'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
|
||||||
'description': 'md5:c8be487b2d80ff0594c005add88d8351',
|
'description': 'md5:c8be487b2d80ff0594c005add88d8351',
|
||||||
@ -339,7 +324,7 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
|
'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
|
||||||
'md5': '118d7ca3f0bea6534f119c68ef539f71',
|
'md5': '118d7ca3f0bea6534f119c68ef539f71',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'tdy_al_space_160420',
|
'id': '669831235788',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'See the aurora borealis from space in stunning new NASA video',
|
'title': 'See the aurora borealis from space in stunning new NASA video',
|
||||||
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
|
'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
|
||||||
@ -352,7 +337,7 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
|
'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
|
||||||
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
|
'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'n_hayes_Aimm_140801_272214',
|
'id': '314487875924',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The chaotic GOP immigration vote',
|
'title': 'The chaotic GOP immigration vote',
|
||||||
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
|
'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
|
||||||
@ -374,60 +359,22 @@ class NBCNewsIE(ThePlatformIE):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
if not video_id.isdigit():
|
||||||
if video_id is not None:
|
|
||||||
all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
|
||||||
info = all_info.find('video')
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': info.find('headline').text,
|
|
||||||
'ext': 'flv',
|
|
||||||
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
|
|
||||||
'description': info.find('caption').text,
|
|
||||||
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
# "feature" and "nightly-news" pages use theplatform.com
|
|
||||||
video_id = mobj.group('mpx_id')
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
filter_param = 'byId'
|
data = self._parse_json(self._search_regex(
|
||||||
bootstrap_json = self._search_regex(
|
r'window\.__data\s*=\s*({.+});', webpage,
|
||||||
[r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$',
|
'bootstrap json'), video_id)
|
||||||
r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"',
|
video_id = data['article']['content'][0]['primaryMedia']['video']['mpxMetadata']['id']
|
||||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);'],
|
|
||||||
webpage, 'bootstrap json', default=None)
|
|
||||||
if bootstrap_json:
|
|
||||||
bootstrap = self._parse_json(
|
|
||||||
bootstrap_json, video_id, transform_source=unescapeHTML)
|
|
||||||
|
|
||||||
info = None
|
return {
|
||||||
if 'results' in bootstrap:
|
'_type': 'url_transparent',
|
||||||
info = bootstrap['results'][0]['video']
|
'id': video_id,
|
||||||
elif 'video' in bootstrap:
|
# http://feed.theplatform.com/f/2E2eJC/nbcnews also works
|
||||||
info = bootstrap['video']
|
'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {'byId': video_id}),
|
||||||
elif 'msnbcVideoInfo' in bootstrap:
|
'ie_key': 'ThePlatformFeed',
|
||||||
info = bootstrap['msnbcVideoInfo']['meta']
|
}
|
||||||
elif 'msnbcThePlatform' in bootstrap:
|
|
||||||
info = bootstrap['msnbcThePlatform']['videoPlayer']['video']
|
|
||||||
else:
|
|
||||||
info = bootstrap
|
|
||||||
|
|
||||||
if 'guid' in info:
|
|
||||||
video_id = info['guid']
|
|
||||||
filter_param = 'byGuid'
|
|
||||||
elif 'mpxId' in info:
|
|
||||||
video_id = info['mpxId']
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'id': video_id,
|
|
||||||
# http://feed.theplatform.com/f/2E2eJC/nbcnews also works
|
|
||||||
'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {filter_param: video_id}),
|
|
||||||
'ie_key': 'ThePlatformFeed',
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class NBCOlympicsIE(InfoExtractor):
|
class NBCOlympicsIE(InfoExtractor):
|
||||||
|
@ -31,6 +31,8 @@ class NJPWWorldIE(InfoExtractor):
|
|||||||
'skip': 'Requires login',
|
'skip': 'Requires login',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_LOGIN_URL = 'https://front.njpwworld.com/auth/login'
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
@ -40,13 +42,17 @@ class NJPWWorldIE(InfoExtractor):
|
|||||||
if not username:
|
if not username:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
# Setup session (will set necessary cookies)
|
||||||
|
self._request_webpage(
|
||||||
|
'https://njpwworld.com/', None, note='Setting up session')
|
||||||
|
|
||||||
webpage, urlh = self._download_webpage_handle(
|
webpage, urlh = self._download_webpage_handle(
|
||||||
'https://njpwworld.com/auth/login', None,
|
self._LOGIN_URL, None,
|
||||||
note='Logging in', errnote='Unable to login',
|
note='Logging in', errnote='Unable to login',
|
||||||
data=urlencode_postdata({'login_id': username, 'pw': password}),
|
data=urlencode_postdata({'login_id': username, 'pw': password}),
|
||||||
headers={'Referer': 'https://njpwworld.com/auth'})
|
headers={'Referer': 'https://front.njpwworld.com/auth'})
|
||||||
# /auth/login will return 302 for successful logins
|
# /auth/login will return 302 for successful logins
|
||||||
if urlh.geturl() == 'https://njpwworld.com/auth/login':
|
if urlh.geturl() == self._LOGIN_URL:
|
||||||
self.report_warning('unable to login')
|
self.report_warning('unable to login')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ class NovaEmbedIE(InfoExtractor):
|
|||||||
|
|
||||||
bitrates = self._parse_json(
|
bitrates = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)bitrates\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
||||||
video_id, transform_source=js_to_json)
|
video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
||||||
|
@ -11,20 +11,27 @@ from ..utils import (
|
|||||||
|
|
||||||
class NZZIE(InfoExtractor):
|
class NZZIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
|
'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9153',
|
'id': '9153',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 6,
|
'playlist_mincount': 6,
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://www.nzz.ch/video/nzz-standpunkte/cvp-auf-der-suche-nach-dem-mass-der-mitte-ld.1368112',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1368112',
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
page_id = self._match_id(url)
|
page_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, page_id)
|
webpage = self._download_webpage(url, page_id)
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage):
|
for player_element in re.findall(
|
||||||
|
r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage):
|
||||||
player_params = extract_attributes(player_element)
|
player_params = extract_attributes(player_element)
|
||||||
if player_params.get('data-type') not in ('kaltura_singleArticle',):
|
if player_params.get('data-type') not in ('kaltura_singleArticle',):
|
||||||
self.report_warning('Unsupported player type')
|
self.report_warning('Unsupported player type')
|
||||||
|
@ -243,7 +243,18 @@ class PhantomJSwrapper(object):
|
|||||||
|
|
||||||
|
|
||||||
class OpenloadIE(InfoExtractor):
|
class OpenloadIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download|cloud|cc))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?P<host>
|
||||||
|
(?:www\.)?
|
||||||
|
(?:
|
||||||
|
openload\.(?:co|io|link)|
|
||||||
|
oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun)
|
||||||
|
)
|
||||||
|
)/
|
||||||
|
(?:f|embed)/
|
||||||
|
(?P<id>[a-zA-Z0-9-_]+)
|
||||||
|
'''
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://openload.co/f/kUEfGclsU9o',
|
'url': 'https://openload.co/f/kUEfGclsU9o',
|
||||||
@ -317,6 +328,12 @@ class OpenloadIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://oload.cc/embed/5NEAbI2BDSk',
|
'url': 'https://oload.cc/embed/5NEAbI2BDSk',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://oload.icu/f/-_i4y_F_Hs8',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://oload.fun/f/gb6G1H4sHXY',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
||||||
@ -328,8 +345,11 @@ class OpenloadIE(InfoExtractor):
|
|||||||
webpage)
|
webpage)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
url_pattern = 'https://openload.co/%%s/%s/' % video_id
|
host = mobj.group('host')
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
url_pattern = 'https://%s/%%s/%s/' % (host, video_id)
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': self._USER_AGENT,
|
'User-Agent': self._USER_AGENT,
|
||||||
}
|
}
|
||||||
@ -362,7 +382,7 @@ class OpenloadIE(InfoExtractor):
|
|||||||
r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
|
r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
|
||||||
'stream URL'))
|
'stream URL'))
|
||||||
|
|
||||||
video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
|
video_url = 'https://%s/stream/%s?mime=true' % (host, decoded_id)
|
||||||
|
|
||||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||||
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
||||||
@ -373,7 +393,7 @@ class OpenloadIE(InfoExtractor):
|
|||||||
entry = entries[0] if entries else {}
|
entry = entries[0] if entries else {}
|
||||||
subtitles = entry.get('subtitles')
|
subtitles = entry.get('subtitles')
|
||||||
|
|
||||||
info_dict = {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
|
||||||
@ -382,4 +402,3 @@ class OpenloadIE(InfoExtractor):
|
|||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'http_headers': headers,
|
'http_headers': headers,
|
||||||
}
|
}
|
||||||
return info_dict
|
|
||||||
|
@ -15,6 +15,7 @@ from ..utils import (
|
|||||||
strip_jsonp,
|
strip_jsonp,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -68,26 +69,35 @@ class ORFTVthekIE(InfoExtractor):
|
|||||||
webpage, 'playlist', group='json'),
|
webpage, 'playlist', group='json'),
|
||||||
playlist_id, transform_source=unescapeHTML)['playlist']['videos']
|
playlist_id, transform_source=unescapeHTML)['playlist']['videos']
|
||||||
|
|
||||||
def quality_to_int(s):
|
|
||||||
m = re.search('([0-9]+)', s)
|
|
||||||
if m is None:
|
|
||||||
return -1
|
|
||||||
return int(m.group(1))
|
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for sd in data_jsb:
|
for sd in data_jsb:
|
||||||
video_id, title = sd.get('id'), sd.get('title')
|
video_id, title = sd.get('id'), sd.get('title')
|
||||||
if not video_id or not title:
|
if not video_id or not title:
|
||||||
continue
|
continue
|
||||||
video_id = compat_str(video_id)
|
video_id = compat_str(video_id)
|
||||||
formats = [{
|
formats = []
|
||||||
'preference': -10 if fd['delivery'] == 'hls' else None,
|
for fd in sd['sources']:
|
||||||
'format_id': '%s-%s-%s' % (
|
src = url_or_none(fd.get('src'))
|
||||||
fd['delivery'], fd['quality'], fd['quality_string']),
|
if not src:
|
||||||
'url': fd['src'],
|
continue
|
||||||
'protocol': fd['protocol'],
|
format_id_list = []
|
||||||
'quality': quality_to_int(fd['quality']),
|
for key in ('delivery', 'quality', 'quality_string'):
|
||||||
} for fd in sd['sources']]
|
value = fd.get(key)
|
||||||
|
if value:
|
||||||
|
format_id_list.append(value)
|
||||||
|
format_id = '-'.join(format_id_list)
|
||||||
|
if determine_ext(fd['src']) == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
fd['src'], video_id, 'mp4', m3u8_id=format_id))
|
||||||
|
elif determine_ext(fd['src']) == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
fd['src'], video_id, f4m_id=format_id))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'url': src,
|
||||||
|
'protocol': fd.get('protocol'),
|
||||||
|
})
|
||||||
|
|
||||||
# Check for geoblocking.
|
# Check for geoblocking.
|
||||||
# There is a property is_geoprotection, but that's always false
|
# There is a property is_geoprotection, but that's always false
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
@ -15,7 +16,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class PicartoIE(InfoExtractor):
|
class PicartoIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://picarto.tv/Setz',
|
'url': 'https://picarto.tv/Setz',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -33,20 +34,14 @@ class PicartoIE(InfoExtractor):
|
|||||||
return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
|
return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
stream_page = self._download_webpage(url, channel_id)
|
channel_id = mobj.group('id')
|
||||||
|
|
||||||
if '>This channel does not exist' in stream_page:
|
metadata = self._download_json(
|
||||||
raise ExtractorError(
|
'https://api.picarto.tv/v1/channel/name/' + channel_id,
|
||||||
'Channel %s does not exist' % channel_id, expected=True)
|
channel_id)
|
||||||
|
|
||||||
player = self._parse_json(
|
if metadata.get('online') is False:
|
||||||
self._search_regex(
|
|
||||||
r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page,
|
|
||||||
'player settings'),
|
|
||||||
channel_id, transform_source=js_to_json)
|
|
||||||
|
|
||||||
if player.get('online') is False:
|
|
||||||
raise ExtractorError('Stream is offline', expected=True)
|
raise ExtractorError('Stream is offline', expected=True)
|
||||||
|
|
||||||
cdn_data = self._download_json(
|
cdn_data = self._download_json(
|
||||||
@ -54,20 +49,13 @@ class PicartoIE(InfoExtractor):
|
|||||||
data=urlencode_postdata({'loadbalancinginfo': channel_id}),
|
data=urlencode_postdata({'loadbalancinginfo': channel_id}),
|
||||||
note='Downloading load balancing info')
|
note='Downloading load balancing info')
|
||||||
|
|
||||||
def get_event(key):
|
token = mobj.group('token') or 'public'
|
||||||
return try_get(player, lambda x: x['event'][key], compat_str) or ''
|
|
||||||
|
|
||||||
params = {
|
params = {
|
||||||
'token': player.get('token') or '',
|
|
||||||
'ticket': get_event('ticket'),
|
|
||||||
'con': int(time.time() * 1000),
|
'con': int(time.time() * 1000),
|
||||||
'type': get_event('ticket'),
|
'token': token,
|
||||||
'scope': get_event('scope'),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
prefered_edge = cdn_data.get('preferedEdge')
|
prefered_edge = cdn_data.get('preferedEdge')
|
||||||
default_tech = player.get('defaultTech')
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
for edge in cdn_data['edges']:
|
for edge in cdn_data['edges']:
|
||||||
@ -81,8 +69,6 @@ class PicartoIE(InfoExtractor):
|
|||||||
preference = 0
|
preference = 0
|
||||||
if edge_id == prefered_edge:
|
if edge_id == prefered_edge:
|
||||||
preference += 1
|
preference += 1
|
||||||
if tech_type == default_tech:
|
|
||||||
preference += 1
|
|
||||||
format_id = []
|
format_id = []
|
||||||
if edge_id:
|
if edge_id:
|
||||||
format_id.append(edge_id)
|
format_id.append(edge_id)
|
||||||
@ -109,7 +95,7 @@ class PicartoIE(InfoExtractor):
|
|||||||
continue
|
continue
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
mature = player.get('mature')
|
mature = metadata.get('adult')
|
||||||
if mature is None:
|
if mature is None:
|
||||||
age_limit = None
|
age_limit = None
|
||||||
else:
|
else:
|
||||||
@ -117,9 +103,11 @@ class PicartoIE(InfoExtractor):
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
'id': channel_id,
|
'id': channel_id,
|
||||||
'title': self._live_title(channel_id),
|
'title': self._live_title(metadata.get('title') or channel_id),
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'thumbnail': player.get('vodThumb'),
|
'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']),
|
||||||
|
'channel': channel_id,
|
||||||
|
'channel_url': 'https://picarto.tv/%s' % channel_id,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
@ -27,7 +27,7 @@ class PornHubIE(InfoExtractor):
|
|||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:[^/]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||||
(?:www\.)?thumbzilla\.com/video/
|
(?:www\.)?thumbzilla\.com/video/
|
||||||
)
|
)
|
||||||
(?P<id>[\da-z]+)
|
(?P<id>[\da-z]+)
|
||||||
@ -121,12 +121,15 @@ class PornHubIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
|
'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return re.findall(
|
return re.findall(
|
||||||
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)',
|
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net)/embed/[\da-z]+)',
|
||||||
webpage)
|
webpage)
|
||||||
|
|
||||||
def _extract_count(self, pattern, webpage, name):
|
def _extract_count(self, pattern, webpage, name):
|
||||||
@ -134,14 +137,16 @@ class PornHubIE(InfoExtractor):
|
|||||||
pattern, webpage, '%s count' % name, fatal=False))
|
pattern, webpage, '%s count' % name, fatal=False))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
host = mobj.group('host') or 'pornhub.com'
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
self._set_cookie('pornhub.com', 'age_verified', '1')
|
self._set_cookie(host, 'age_verified', '1')
|
||||||
|
|
||||||
def dl_webpage(platform):
|
def dl_webpage(platform):
|
||||||
self._set_cookie('pornhub.com', 'platform', platform)
|
self._set_cookie(host, 'platform', platform)
|
||||||
return self._download_webpage(
|
return self._download_webpage(
|
||||||
'http://www.pornhub.com/view_video.php?viewkey=%s' % video_id,
|
'http://www.%s/view_video.php?viewkey=%s' % (host, video_id),
|
||||||
video_id, 'Downloading %s webpage' % platform)
|
video_id, 'Downloading %s webpage' % platform)
|
||||||
|
|
||||||
webpage = dl_webpage('pc')
|
webpage = dl_webpage('pc')
|
||||||
@ -303,7 +308,7 @@ class PornHubIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class PornHubPlaylistBaseIE(InfoExtractor):
|
class PornHubPlaylistBaseIE(InfoExtractor):
|
||||||
def _extract_entries(self, webpage):
|
def _extract_entries(self, webpage, host):
|
||||||
# Only process container div with main playlist content skipping
|
# Only process container div with main playlist content skipping
|
||||||
# drop-down menu that uses similar pattern for videos (see
|
# drop-down menu that uses similar pattern for videos (see
|
||||||
# https://github.com/rg3/youtube-dl/issues/11594).
|
# https://github.com/rg3/youtube-dl/issues/11594).
|
||||||
@ -313,7 +318,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
return [
|
return [
|
||||||
self.url_result(
|
self.url_result(
|
||||||
'http://www.pornhub.com/%s' % video_url,
|
'http://www.%s/%s' % (host, video_url),
|
||||||
PornHubIE.ie_key(), video_title=title)
|
PornHubIE.ie_key(), video_title=title)
|
||||||
for video_url, title in orderedSet(re.findall(
|
for video_url, title in orderedSet(re.findall(
|
||||||
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
|
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
|
||||||
@ -321,11 +326,13 @@ class PornHubPlaylistBaseIE(InfoExtractor):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
host = mobj.group('host')
|
||||||
|
playlist_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
entries = self._extract_entries(webpage)
|
entries = self._extract_entries(webpage, host)
|
||||||
|
|
||||||
playlist = self._parse_json(
|
playlist = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
@ -340,7 +347,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
|
|||||||
|
|
||||||
|
|
||||||
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
||||||
_VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/playlist/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/playlist/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.pornhub.com/playlist/4667351',
|
'url': 'http://www.pornhub.com/playlist/4667351',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -355,7 +362,7 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
|||||||
|
|
||||||
|
|
||||||
class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
||||||
_VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos'
|
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
|
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
@ -396,7 +403,9 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
|||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
user_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
host = mobj.group('host')
|
||||||
|
user_id = mobj.group('id')
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
@ -408,7 +417,7 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
|||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
|
||||||
break
|
break
|
||||||
raise
|
raise
|
||||||
page_entries = self._extract_entries(webpage)
|
page_entries = self._extract_entries(webpage, host)
|
||||||
if not page_entries:
|
if not page_entries:
|
||||||
break
|
break
|
||||||
entries.extend(page_entries)
|
entries.extend(page_entries)
|
||||||
|
@ -8,7 +8,10 @@ from ..compat import compat_HTTPError
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
url_or_none,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -17,65 +20,87 @@ class RteBaseIE(InfoExtractor):
|
|||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
item_id = self._match_id(url)
|
item_id = self._match_id(url)
|
||||||
|
|
||||||
try:
|
info_dict = {}
|
||||||
json_string = self._download_json(
|
|
||||||
'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id,
|
|
||||||
item_id)
|
|
||||||
except ExtractorError as ee:
|
|
||||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
|
||||||
error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
|
|
||||||
if error_info:
|
|
||||||
raise ExtractorError(
|
|
||||||
'%s said: %s' % (self.IE_NAME, error_info['message']),
|
|
||||||
expected=True)
|
|
||||||
raise
|
|
||||||
|
|
||||||
# NB the string values in the JSON are stored using XML escaping(!)
|
|
||||||
show = json_string['shows'][0]
|
|
||||||
title = unescapeHTML(show['title'])
|
|
||||||
description = unescapeHTML(show.get('description'))
|
|
||||||
thumbnail = show.get('thumbnail')
|
|
||||||
duration = float_or_none(show.get('duration'), 1000)
|
|
||||||
timestamp = parse_iso8601(show.get('published'))
|
|
||||||
|
|
||||||
mg = show['media:group'][0]
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
if mg.get('url'):
|
ENDPOINTS = (
|
||||||
m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
|
'https://feeds.rasset.ie/rteavgen/player/playlist?type=iptv&format=json&showId=',
|
||||||
if m:
|
'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=',
|
||||||
m = m.groupdict()
|
)
|
||||||
formats.append({
|
|
||||||
'url': m['url'] + '/' + m['app'],
|
|
||||||
'app': m['app'],
|
|
||||||
'play_path': m['playpath'],
|
|
||||||
'player_url': url,
|
|
||||||
'ext': 'flv',
|
|
||||||
'format_id': 'rtmp',
|
|
||||||
})
|
|
||||||
|
|
||||||
if mg.get('hls_server') and mg.get('hls_url'):
|
for num, ep_url in enumerate(ENDPOINTS, start=1):
|
||||||
formats.extend(self._extract_m3u8_formats(
|
try:
|
||||||
mg['hls_server'] + mg['hls_url'], item_id, 'mp4',
|
data = self._download_json(ep_url + item_id, item_id)
|
||||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
except ExtractorError as ee:
|
||||||
|
if num < len(ENDPOINTS) or formats:
|
||||||
|
continue
|
||||||
|
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
||||||
|
error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
|
||||||
|
if error_info:
|
||||||
|
raise ExtractorError(
|
||||||
|
'%s said: %s' % (self.IE_NAME, error_info['message']),
|
||||||
|
expected=True)
|
||||||
|
raise
|
||||||
|
|
||||||
if mg.get('hds_server') and mg.get('hds_url'):
|
# NB the string values in the JSON are stored using XML escaping(!)
|
||||||
formats.extend(self._extract_f4m_formats(
|
show = try_get(data, lambda x: x['shows'][0], dict)
|
||||||
mg['hds_server'] + mg['hds_url'], item_id,
|
if not show:
|
||||||
f4m_id='hds', fatal=False))
|
continue
|
||||||
|
|
||||||
|
if not info_dict:
|
||||||
|
title = unescapeHTML(show['title'])
|
||||||
|
description = unescapeHTML(show.get('description'))
|
||||||
|
thumbnail = show.get('thumbnail')
|
||||||
|
duration = float_or_none(show.get('duration'), 1000)
|
||||||
|
timestamp = parse_iso8601(show.get('published'))
|
||||||
|
info_dict = {
|
||||||
|
'id': item_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
||||||
|
|
||||||
|
mg = try_get(show, lambda x: x['media:group'][0], dict)
|
||||||
|
if not mg:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if mg.get('url'):
|
||||||
|
m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
|
||||||
|
if m:
|
||||||
|
m = m.groupdict()
|
||||||
|
formats.append({
|
||||||
|
'url': m['url'] + '/' + m['app'],
|
||||||
|
'app': m['app'],
|
||||||
|
'play_path': m['playpath'],
|
||||||
|
'player_url': url,
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_id': 'rtmp',
|
||||||
|
})
|
||||||
|
|
||||||
|
if mg.get('hls_server') and mg.get('hls_url'):
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
mg['hls_server'] + mg['hls_url'], item_id, 'mp4',
|
||||||
|
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
|
if mg.get('hds_server') and mg.get('hds_url'):
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
mg['hds_server'] + mg['hds_url'], item_id,
|
||||||
|
f4m_id='hds', fatal=False))
|
||||||
|
|
||||||
|
mg_rte_server = str_or_none(mg.get('rte:server'))
|
||||||
|
mg_url = str_or_none(mg.get('url'))
|
||||||
|
if mg_rte_server and mg_url:
|
||||||
|
hds_url = url_or_none(mg_rte_server + mg_url)
|
||||||
|
if hds_url:
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
hds_url, item_id, f4m_id='hds', fatal=False))
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
info_dict['formats'] = formats
|
||||||
'id': item_id,
|
return info_dict
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
'duration': duration,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class RteIE(RteBaseIE):
|
class RteIE(RteBaseIE):
|
||||||
|
@ -103,7 +103,8 @@ class RutubeIE(RutubeBaseIE):
|
|||||||
|
|
||||||
options = self._download_json(
|
options = self._download_json(
|
||||||
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
|
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
|
||||||
video_id, 'Downloading options JSON')
|
video_id, 'Downloading options JSON',
|
||||||
|
headers=self.geo_verification_headers())
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_url in options['video_balancer'].items():
|
for format_id, format_url in options['video_balancer'].items():
|
||||||
|
@ -65,7 +65,8 @@ class RuutuIE(InfoExtractor):
|
|||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
video_xml = self._download_xml(
|
video_xml = self._download_xml(
|
||||||
'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id, video_id)
|
'https://gatling.nelonenmedia.fi/media-xml-cache', video_id,
|
||||||
|
query={'id': video_id})
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
processed_urls = []
|
processed_urls = []
|
||||||
|
@ -90,6 +90,15 @@ class ScreencastIE(InfoExtractor):
|
|||||||
r'src=(.*?)(?:$|&)', video_meta,
|
r'src=(.*?)(?:$|&)', video_meta,
|
||||||
'meta tag video URL', default=None)
|
'meta tag video URL', default=None)
|
||||||
|
|
||||||
|
if video_url is None:
|
||||||
|
video_url = self._html_search_regex(
|
||||||
|
r'MediaContentUrl["\']\s*:(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
|
webpage, 'video url', default=None, group='url')
|
||||||
|
|
||||||
|
if video_url is None:
|
||||||
|
video_url = self._html_search_meta(
|
||||||
|
'og:video', webpage, default=None)
|
||||||
|
|
||||||
if video_url is None:
|
if video_url is None:
|
||||||
raise ExtractorError('Cannot find video')
|
raise ExtractorError('Cannot find video')
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@ from ..compat import compat_b64decode
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -86,9 +87,16 @@ class VivoIE(SharedBaseIE):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def _extract_video_url(self, webpage, video_id, *args):
|
def _extract_video_url(self, webpage, video_id, *args):
|
||||||
|
def decode_url(encoded_url):
|
||||||
|
return compat_b64decode(encoded_url).decode('utf-8')
|
||||||
|
|
||||||
|
stream_url = url_or_none(decode_url(self._search_regex(
|
||||||
|
r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||||
|
'stream url', default=None, group='url')))
|
||||||
|
if stream_url:
|
||||||
|
return stream_url
|
||||||
return self._parse_json(
|
return self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
webpage, 'stream', group='url'),
|
webpage, 'stream', group='url'),
|
||||||
video_id,
|
video_id, transform_source=decode_url)[0]
|
||||||
transform_source=lambda x: compat_b64decode(x).decode('utf-8'))[0]
|
|
||||||
|
@ -64,7 +64,7 @@ class SixPlayIE(InfoExtractor):
|
|||||||
for asset in clip_data['assets']:
|
for asset in clip_data['assets']:
|
||||||
asset_url = asset.get('full_physical_path')
|
asset_url = asset.get('full_physical_path')
|
||||||
protocol = asset.get('protocol')
|
protocol = asset.get('protocol')
|
||||||
if not asset_url or protocol == 'primetime' or asset_url in urls:
|
if not asset_url or protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264' or asset_url in urls:
|
||||||
continue
|
continue
|
||||||
urls.append(asset_url)
|
urls.append(asset_url)
|
||||||
container = asset.get('video_container')
|
container = asset.get('video_container')
|
||||||
@ -81,19 +81,17 @@ class SixPlayIE(InfoExtractor):
|
|||||||
if not urlh:
|
if not urlh:
|
||||||
continue
|
continue
|
||||||
asset_url = urlh.geturl()
|
asset_url = urlh.geturl()
|
||||||
asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
|
for i in range(3, 0, -1):
|
||||||
formats.extend(self._extract_m3u8_formats(
|
asset_url = asset_url = asset_url.replace('_sd1/', '_sd%d/' % i)
|
||||||
asset_url, video_id, 'mp4', 'm3u8_native',
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
m3u8_id='hls', fatal=False))
|
asset_url, video_id, 'mp4', 'm3u8_native',
|
||||||
formats.extend(self._extract_f4m_formats(
|
m3u8_id='hls', fatal=False)
|
||||||
asset_url.replace('.m3u8', '.f4m'),
|
formats.extend(m3u8_formats)
|
||||||
video_id, f4m_id='hds', fatal=False))
|
formats.extend(self._extract_mpd_formats(
|
||||||
formats.extend(self._extract_mpd_formats(
|
asset_url.replace('.m3u8', '.mpd'),
|
||||||
asset_url.replace('.m3u8', '.mpd'),
|
video_id, mpd_id='dash', fatal=False))
|
||||||
video_id, mpd_id='dash', fatal=False))
|
if m3u8_formats:
|
||||||
formats.extend(self._extract_ism_formats(
|
break
|
||||||
re.sub(r'/[^/]+\.m3u8', '/Manifest', asset_url),
|
|
||||||
video_id, ism_id='mss', fatal=False))
|
|
||||||
else:
|
else:
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
asset_url, video_id, 'mp4', 'm3u8_native',
|
asset_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
@ -8,20 +8,24 @@ from ..utils import (
|
|||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
|
merge_dicts,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class SportBoxEmbedIE(InfoExtractor):
|
class SportBoxIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
|
'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '211355',
|
'id': '109158',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '211355',
|
'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
|
||||||
|
'description': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 292,
|
'duration': 292,
|
||||||
'view_count': int,
|
'view_count': int,
|
||||||
|
'timestamp': 1426237001,
|
||||||
|
'upload_date': '20150313',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
@ -33,12 +37,18 @@ class SportBoxEmbedIE(InfoExtractor):
|
|||||||
}, {
|
}, {
|
||||||
'url': 'https://news.sportbox.ru/vdl/player/media/193095',
|
'url': 'https://news.sportbox.ru/vdl/player/media/193095',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://news.sportbox.ru/vdl/player/media/109158',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://matchtv.ru/vdl/player/media/109158',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage):
|
||||||
return re.findall(
|
return re.findall(
|
||||||
r'<iframe[^>]+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"',
|
r'<iframe[^>]+src="(https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"',
|
||||||
webpage)
|
webpage)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
@ -46,13 +56,14 @@ class SportBoxEmbedIE(InfoExtractor):
|
|||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
wjplayer_data = self._parse_json(
|
sources = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)wjplayer\(({.+?})\);', webpage, 'wjplayer settings'),
|
r'(?s)playerOptions\.sources(?:WithRes)?\s*=\s*(\[.+?\])\s*;\s*\n',
|
||||||
|
webpage, 'sources'),
|
||||||
video_id, transform_source=js_to_json)
|
video_id, transform_source=js_to_json)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for source in wjplayer_data['sources']:
|
for source in sources:
|
||||||
src = source.get('src')
|
src = source.get('src')
|
||||||
if not src:
|
if not src:
|
||||||
continue
|
continue
|
||||||
@ -66,14 +77,23 @@ class SportBoxEmbedIE(InfoExtractor):
|
|||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
player = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)playerOptions\s*=\s*({.+?})\s*;\s*\n', webpage,
|
||||||
|
'player options', default='{}'),
|
||||||
|
video_id, transform_source=js_to_json)
|
||||||
|
media_id = player['mediaId']
|
||||||
|
|
||||||
|
info = self._search_json_ld(webpage, media_id, default={})
|
||||||
|
|
||||||
view_count = int_or_none(self._search_regex(
|
view_count = int_or_none(self._search_regex(
|
||||||
r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None))
|
r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None))
|
||||||
|
|
||||||
return {
|
return merge_dicts(info, {
|
||||||
'id': video_id,
|
'id': media_id,
|
||||||
'title': video_id,
|
'title': self._og_search_title(webpage, default=None) or media_id,
|
||||||
'thumbnail': wjplayer_data.get('poster'),
|
'thumbnail': player.get('poster'),
|
||||||
'duration': int_or_none(wjplayer_data.get('duration')),
|
'duration': int_or_none(player.get('duration')),
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
})
|
||||||
|
@ -39,9 +39,17 @@ class ThePlatformBaseIE(OnceIE):
|
|||||||
smil_url, video_id, note=note, query={'format': 'SMIL'},
|
smil_url, video_id, note=note, query={'format': 'SMIL'},
|
||||||
headers=self.geo_verification_headers())
|
headers=self.geo_verification_headers())
|
||||||
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
|
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
|
||||||
if error_element is not None and error_element.attrib['src'].startswith(
|
if error_element is not None:
|
||||||
'http://link.theplatform.%s/s/errorFiles/Unavailable.' % self._TP_TLD):
|
exception = find_xpath_attr(
|
||||||
raise ExtractorError(error_element.attrib['abstract'], expected=True)
|
error_element, _x('.//smil:param'), 'name', 'exception')
|
||||||
|
if exception is not None:
|
||||||
|
if exception.get('value') == 'GeoLocationBlocked':
|
||||||
|
self.raise_geo_restricted(error_element.attrib['abstract'])
|
||||||
|
elif error_element.attrib['src'].startswith(
|
||||||
|
'http://link.theplatform.%s/s/errorFiles/Unavailable.'
|
||||||
|
% self._TP_TLD):
|
||||||
|
raise ExtractorError(
|
||||||
|
error_element.attrib['abstract'], expected=True)
|
||||||
|
|
||||||
smil_formats = self._parse_smil_formats(
|
smil_formats = self._parse_smil_formats(
|
||||||
meta, smil_url, video_id, namespace=default_ns,
|
meta, smil_url, video_id, namespace=default_ns,
|
||||||
@ -335,7 +343,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
|||||||
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
|
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
|
||||||
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
|
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
|
||||||
entry = self._download_json(real_url, video_id)['entries'][0]
|
entry = self._download_json(real_url, video_id)['entries'][0]
|
||||||
main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None
|
main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else entry.get('plmedia$publicUrl')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
@ -348,7 +356,8 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
|
|||||||
if first_video_id is None:
|
if first_video_id is None:
|
||||||
first_video_id = cur_video_id
|
first_video_id = cur_video_id
|
||||||
duration = float_or_none(item.get('plfile$duration'))
|
duration = float_or_none(item.get('plfile$duration'))
|
||||||
for asset_type in item['plfile$assetTypes']:
|
file_asset_types = item.get('plfile$assetTypes') or compat_parse_qs(compat_urllib_parse_urlparse(smil_url).query)['assetTypes']
|
||||||
|
for asset_type in file_asset_types:
|
||||||
if asset_type in asset_types:
|
if asset_type in asset_types:
|
||||||
continue
|
continue
|
||||||
asset_types.append(asset_type)
|
asset_types.append(asset_type)
|
||||||
|
117
youtube_dl/extractor/tiktok.py
Normal file
117
youtube_dl/extractor/tiktok.py
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_str,
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TikTokBaseIE(InfoExtractor):
|
||||||
|
def _extract_aweme(self, data):
|
||||||
|
video = data['video']
|
||||||
|
description = str_or_none(try_get(data, lambda x: x['desc']))
|
||||||
|
width = int_or_none(try_get(data, lambda x: video['width']))
|
||||||
|
height = int_or_none(try_get(data, lambda x: video['height']))
|
||||||
|
|
||||||
|
format_urls = set()
|
||||||
|
formats = []
|
||||||
|
for format_id in (
|
||||||
|
'play_addr_lowbr', 'play_addr', 'play_addr_h264',
|
||||||
|
'download_addr'):
|
||||||
|
for format in try_get(
|
||||||
|
video, lambda x: x[format_id]['url_list'], list) or []:
|
||||||
|
format_url = url_or_none(format)
|
||||||
|
if not format_url:
|
||||||
|
continue
|
||||||
|
if format_url in format_urls:
|
||||||
|
continue
|
||||||
|
format_urls.add(format_url)
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'height': height,
|
||||||
|
'width': width,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnail = url_or_none(try_get(
|
||||||
|
video, lambda x: x['cover']['url_list'][0], compat_str))
|
||||||
|
uploader = try_get(data, lambda x: x['author']['nickname'], compat_str)
|
||||||
|
timestamp = int_or_none(data.get('create_time'))
|
||||||
|
comment_count = int_or_none(data.get('comment_count')) or int_or_none(
|
||||||
|
try_get(data, lambda x: x['statistics']['comment_count']))
|
||||||
|
repost_count = int_or_none(try_get(
|
||||||
|
data, lambda x: x['statistics']['share_count']))
|
||||||
|
|
||||||
|
aweme_id = data['aweme_id']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': aweme_id,
|
||||||
|
'title': uploader or aweme_id,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader': uploader,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'comment_count': comment_count,
|
||||||
|
'repost_count': repost_count,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TikTokIE(TikTokBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:m\.)?tiktok\.com/v/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://m.tiktok.com/v/6606727368545406213.html',
|
||||||
|
'md5': 'd584b572e92fcd48888051f238022420',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6606727368545406213',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Zureeal',
|
||||||
|
'description': '#bowsette#mario#cosplay#uk#lgbt#gaming#asian#bowsettecosplay',
|
||||||
|
'thumbnail': r're:^https?://.*~noop.image',
|
||||||
|
'uploader': 'Zureeal',
|
||||||
|
'timestamp': 1538248586,
|
||||||
|
'upload_date': '20180929',
|
||||||
|
'comment_count': int,
|
||||||
|
'repost_count': int,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
data = self._parse_json(self._search_regex(
|
||||||
|
r'\bdata\s*=\s*({.+?})\s*;', webpage, 'data'), video_id)
|
||||||
|
return self._extract_aweme(data)
|
||||||
|
|
||||||
|
|
||||||
|
class TikTokUserIE(TikTokBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:m\.)?tiktok\.com/h5/share/usr/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://m.tiktok.com/h5/share/usr/188294915489964032.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '188294915489964032',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 24,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
user_id = self._match_id(url)
|
||||||
|
data = self._download_json(
|
||||||
|
'https://m.tiktok.com/h5/share/usr/list/%s/' % user_id, user_id,
|
||||||
|
query={'_signature': '_'})
|
||||||
|
entries = []
|
||||||
|
for aweme in data['aweme_list']:
|
||||||
|
try:
|
||||||
|
entry = self._extract_aweme(aweme)
|
||||||
|
except ExtractorError:
|
||||||
|
continue
|
||||||
|
entry['extractor_key'] = TikTokIE.ie_key()
|
||||||
|
entries.append(entry)
|
||||||
|
return self.playlist_result(entries, user_id)
|
@ -18,8 +18,9 @@ from ..utils import (
|
|||||||
class TNAFlixNetworkBaseIE(InfoExtractor):
|
class TNAFlixNetworkBaseIE(InfoExtractor):
|
||||||
# May be overridden in descendants if necessary
|
# May be overridden in descendants if necessary
|
||||||
_CONFIG_REGEX = [
|
_CONFIG_REGEX = [
|
||||||
r'flashvars\.config\s*=\s*escape\("([^"]+)"',
|
r'flashvars\.config\s*=\s*escape\("(?P<url>[^"]+)"',
|
||||||
r'<input[^>]+name="config\d?" value="([^"]+)"',
|
r'<input[^>]+name="config\d?" value="(?P<url>[^"]+)"',
|
||||||
|
r'config\s*=\s*(["\'])(?P<url>(?:https?:)?//(?:(?!\1).)+)\1',
|
||||||
]
|
]
|
||||||
_HOST = 'tna'
|
_HOST = 'tna'
|
||||||
_VKEY_SUFFIX = ''
|
_VKEY_SUFFIX = ''
|
||||||
@ -85,7 +86,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
|
|||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
cfg_url = self._proto_relative_url(self._html_search_regex(
|
cfg_url = self._proto_relative_url(self._html_search_regex(
|
||||||
self._CONFIG_REGEX, webpage, 'flashvars.config', default=None), 'http:')
|
self._CONFIG_REGEX, webpage, 'flashvars.config', default=None,
|
||||||
|
group='url'), 'http:')
|
||||||
|
|
||||||
if not cfg_url:
|
if not cfg_url:
|
||||||
inputs = self._hidden_inputs(webpage)
|
inputs = self._hidden_inputs(webpage)
|
||||||
|
60
youtube_dl/extractor/twitcasting.py
Normal file
60
youtube_dl/extractor/twitcasting.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class TwitCastingIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609',
|
||||||
|
'md5': '745243cad58c4681dc752490f7540d7f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2357609',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Recorded Live #2357609',
|
||||||
|
'uploader_id': 'ivetesangalo',
|
||||||
|
'description': "Moi! I'm live on TwitCasting from my iPhone.",
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
uploader_id = mobj.group('uploader_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)</',
|
||||||
|
webpage, 'title', default=None) or self._html_search_meta(
|
||||||
|
'twitter:title', webpage, fatal=True)
|
||||||
|
|
||||||
|
m3u8_url = self._search_regex(
|
||||||
|
(r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||||
|
r'(["\'])(?P<url>http.+?\.m3u8.*?)\1'),
|
||||||
|
webpage, 'm3u8 url', group='url')
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls')
|
||||||
|
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
description = self._og_search_description(
|
||||||
|
webpage, default=None) or self._html_search_meta(
|
||||||
|
'twitter:description', webpage)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -130,16 +130,16 @@ class ViewsterIE(InfoExtractor):
|
|||||||
def concat(suffix, sep='-'):
|
def concat(suffix, sep='-'):
|
||||||
return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix
|
return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix
|
||||||
|
|
||||||
for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
|
medias = self._download_json(
|
||||||
media = self._download_json(
|
'https://public-api.viewster.com/movies/%s/videos' % entry_id,
|
||||||
'https://public-api.viewster.com/movies/%s/video' % entry_id,
|
video_id, fatal=False, query={
|
||||||
video_id, 'Downloading %s JSON' % concat(media_type, ' '), fatal=False, query={
|
'mediaTypes': ['application/f4m+xml', 'application/x-mpegURL', 'video/mp4'],
|
||||||
'mediaType': media_type,
|
'language': audio,
|
||||||
'language': audio,
|
'subtitle': subtitle,
|
||||||
'subtitle': subtitle,
|
})
|
||||||
})
|
if not medias:
|
||||||
if not media:
|
continue
|
||||||
continue
|
for media in medias:
|
||||||
video_url = media.get('Uri')
|
video_url = media.get('Uri')
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
|
@ -14,10 +14,13 @@ from ..compat import (
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
js_to_json,
|
||||||
InAdvancePagedList,
|
InAdvancePagedList,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
NO_DEFAULT,
|
NO_DEFAULT,
|
||||||
|
parse_filesize,
|
||||||
|
qualities,
|
||||||
RegexNotFoundError,
|
RegexNotFoundError,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
@ -27,7 +30,6 @@ from ..utils import (
|
|||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
parse_filesize,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -1063,3 +1065,96 @@ class VimeoLikesIE(InfoExtractor):
|
|||||||
'description': description,
|
'description': description,
|
||||||
'entries': pl,
|
'entries': pl,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class VHXEmbedIE(InfoExtractor):
|
||||||
|
IE_NAME = 'vhx:embed'
|
||||||
|
_VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
|
||||||
|
|
||||||
|
def _call_api(self, video_id, access_token, path='', query=None):
|
||||||
|
return self._download_json(
|
||||||
|
'https://api.vhx.tv/videos/' + video_id + path, video_id, headers={
|
||||||
|
'Authorization': 'Bearer ' + access_token,
|
||||||
|
}, query=query)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
credentials = self._parse_json(self._search_regex(
|
||||||
|
r'(?s)credentials\s*:\s*({.+?}),', webpage,
|
||||||
|
'config'), video_id, js_to_json)
|
||||||
|
access_token = credentials['access_token']
|
||||||
|
|
||||||
|
query = {}
|
||||||
|
for k, v in credentials.items():
|
||||||
|
if k in ('authorization', 'authUserToken', 'ticket') and v and v != 'undefined':
|
||||||
|
if k == 'authUserToken':
|
||||||
|
query['auth_user_token'] = v
|
||||||
|
else:
|
||||||
|
query[k] = v
|
||||||
|
files = self._call_api(video_id, access_token, '/files', query)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for f in files:
|
||||||
|
href = try_get(f, lambda x: x['_links']['source']['href'])
|
||||||
|
if not href:
|
||||||
|
continue
|
||||||
|
method = f.get('method')
|
||||||
|
if method == 'hls':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
href, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif method == 'dash':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
href, video_id, mpd_id='dash', fatal=False))
|
||||||
|
else:
|
||||||
|
fmt = {
|
||||||
|
'filesize': int_or_none(try_get(f, lambda x: x['size']['bytes'])),
|
||||||
|
'format_id': 'http',
|
||||||
|
'preference': 1,
|
||||||
|
'url': href,
|
||||||
|
'vcodec': f.get('codec'),
|
||||||
|
}
|
||||||
|
quality = f.get('quality')
|
||||||
|
if quality:
|
||||||
|
fmt.update({
|
||||||
|
'format_id': 'http-' + quality,
|
||||||
|
'height': int_or_none(self._search_regex(r'(\d+)p', quality, 'height', default=None)),
|
||||||
|
})
|
||||||
|
formats.append(fmt)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
video_data = self._call_api(video_id, access_token)
|
||||||
|
title = video_data.get('title') or video_data['name']
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for subtitle in try_get(video_data, lambda x: x['tracks']['subtitles'], list) or []:
|
||||||
|
lang = subtitle.get('srclang') or subtitle.get('label')
|
||||||
|
for _link in subtitle.get('_links', {}).values():
|
||||||
|
href = _link.get('href')
|
||||||
|
if not href:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(lang, []).append({
|
||||||
|
'url': href,
|
||||||
|
})
|
||||||
|
|
||||||
|
q = qualities(['small', 'medium', 'large', 'source'])
|
||||||
|
thumbnails = []
|
||||||
|
for thumbnail_id, thumbnail_url in video_data.get('thumbnail', {}).items():
|
||||||
|
thumbnails.append({
|
||||||
|
'id': thumbnail_id,
|
||||||
|
'url': thumbnail_url,
|
||||||
|
'preference': q(thumbnail_id),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': video_data.get('description'),
|
||||||
|
'duration': int_or_none(try_get(video_data, lambda x: x['duration']['seconds'])),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'timestamp': unified_timestamp(video_data.get('created_at')),
|
||||||
|
'view_count': int_or_none(video_data.get('plays_count')),
|
||||||
|
}
|
||||||
|
@ -293,8 +293,12 @@ class VKIE(VKBaseIE):
|
|||||||
# This video is no longer available, because its author has been blocked.
|
# This video is no longer available, because its author has been blocked.
|
||||||
'url': 'https://vk.com/video-10639516_456240611',
|
'url': 'https://vk.com/video-10639516_456240611',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}
|
},
|
||||||
]
|
{
|
||||||
|
# The video is not available in your region.
|
||||||
|
'url': 'https://vk.com/video-51812607_171445436',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
@ -354,6 +358,9 @@ class VKIE(VKBaseIE):
|
|||||||
|
|
||||||
r'<!>This video is no longer available, because it has been deleted.':
|
r'<!>This video is no longer available, because it has been deleted.':
|
||||||
'Video %s is no longer available, because it has been deleted.',
|
'Video %s is no longer available, because it has been deleted.',
|
||||||
|
|
||||||
|
r'<!>The video .+? is not available in your region.':
|
||||||
|
'Video %s is not available in your region.',
|
||||||
}
|
}
|
||||||
|
|
||||||
for error_re, error_msg in ERRORS.items():
|
for error_re, error_msg in ERRORS.items():
|
||||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
|||||||
|
|
||||||
|
|
||||||
class WistiaIE(InfoExtractor):
|
class WistiaIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.net/embed/iframe/)(?P<id>[a-z0-9]+)'
|
_VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/iframe/)(?P<id>[a-z0-9]+)'
|
||||||
_API_URL = 'http://fast.wistia.com/embed/medias/%s.json'
|
_API_URL = 'http://fast.wistia.com/embed/medias/%s.json'
|
||||||
_IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
|
_IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
|
||||||
|
|
||||||
@ -35,12 +35,15 @@ class WistiaIE(InfoExtractor):
|
|||||||
# with hls video
|
# with hls video
|
||||||
'url': 'wistia:807fafadvk',
|
'url': 'wistia:807fafadvk',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage):
|
def _extract_url(webpage):
|
||||||
match = re.search(
|
match = re.search(
|
||||||
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)
|
r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/iframe/.+?)\1', webpage)
|
||||||
if match:
|
if match:
|
||||||
return unescapeHTML(match.group('url'))
|
return unescapeHTML(match.group('url'))
|
||||||
|
|
||||||
|
140
youtube_dl/extractor/wwe.py
Normal file
140
youtube_dl/extractor/wwe.py
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
try_get,
|
||||||
|
unescapeHTML,
|
||||||
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class WWEBaseIE(InfoExtractor):
|
||||||
|
_SUBTITLE_LANGS = {
|
||||||
|
'English': 'en',
|
||||||
|
'Deutsch': 'de',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _extract_entry(self, data, url, video_id=None):
|
||||||
|
video_id = compat_str(video_id or data['nid'])
|
||||||
|
title = data['title']
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
data['file'], video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls')
|
||||||
|
|
||||||
|
description = data.get('description')
|
||||||
|
thumbnail = urljoin(url, data.get('image'))
|
||||||
|
series = data.get('show_name')
|
||||||
|
episode = data.get('episode_name')
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
tracks = data.get('tracks')
|
||||||
|
if isinstance(tracks, list):
|
||||||
|
for track in tracks:
|
||||||
|
if not isinstance(track, dict):
|
||||||
|
continue
|
||||||
|
if track.get('kind') != 'captions':
|
||||||
|
continue
|
||||||
|
track_file = url_or_none(track.get('file'))
|
||||||
|
if not track_file:
|
||||||
|
continue
|
||||||
|
label = track.get('label')
|
||||||
|
lang = self._SUBTITLE_LANGS.get(label, label) or 'en'
|
||||||
|
subtitles.setdefault(lang, []).append({
|
||||||
|
'url': track_file,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'series': series,
|
||||||
|
'episode': episode,
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class WWEIE(WWEBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*videos/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.wwe.com/videos/daniel-bryan-vs-andrade-cien-almas-smackdown-live-sept-4-2018',
|
||||||
|
'md5': '92811c6a14bfc206f7a6a9c5d9140184',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '40048199',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Daniel Bryan vs. Andrade "Cien" Almas: SmackDown LIVE, Sept. 4, 2018',
|
||||||
|
'description': 'md5:2d7424dbc6755c61a0e649d2a8677f67',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://de.wwe.com/videos/gran-metalik-vs-tony-nese-wwe-205-live-sept-4-2018',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
landing = self._parse_json(
|
||||||
|
self._html_search_regex(
|
||||||
|
r'(?s)Drupal\.settings\s*,\s*({.+?})\s*\)\s*;',
|
||||||
|
webpage, 'drupal settings'),
|
||||||
|
display_id)['WWEVideoLanding']
|
||||||
|
|
||||||
|
data = landing['initialVideo']['playlist'][0]
|
||||||
|
video_id = landing.get('initialVideoId')
|
||||||
|
|
||||||
|
info = self._extract_entry(data, url, video_id)
|
||||||
|
info['display_id'] = display_id
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class WWEPlaylistIE(WWEBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.wwe.com/shows/raw/2018-11-12',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2018-11-12',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 11,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.wwe.com/article/walk-the-prank-wwe-edition',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.wwe.com/shows/wwenxt/article/matt-riddle-interview',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if WWEIE.suitable(url) else super(WWEPlaylistIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'data-video\s*=\s*(["\'])(?P<data>{.+?})\1', webpage):
|
||||||
|
video = self._parse_json(
|
||||||
|
mobj.group('data'), display_id, transform_source=unescapeHTML,
|
||||||
|
fatal=False)
|
||||||
|
if not video:
|
||||||
|
continue
|
||||||
|
data = try_get(video, lambda x: x['playlist'][0], dict)
|
||||||
|
if not data:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
entry = self._extract_entry(data, url)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
entry['extractor_key'] = WWEIE.ie_key()
|
||||||
|
entries.append(entry)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, display_id)
|
@ -41,6 +41,7 @@ from ..utils import (
|
|||||||
remove_quotes,
|
remove_quotes,
|
||||||
remove_start,
|
remove_start,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
str_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
@ -501,6 +502,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'categories': ['Science & Technology'],
|
'categories': ['Science & Technology'],
|
||||||
'tags': ['youtube-dl'],
|
'tags': ['youtube-dl'],
|
||||||
'duration': 10,
|
'duration': 10,
|
||||||
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
'start_time': 1,
|
'start_time': 1,
|
||||||
@ -583,6 +585,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'categories': ['Science & Technology'],
|
'categories': ['Science & Technology'],
|
||||||
'tags': ['youtube-dl'],
|
'tags': ['youtube-dl'],
|
||||||
'duration': 10,
|
'duration': 10,
|
||||||
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
},
|
},
|
||||||
@ -1189,7 +1192,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
(r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
(r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('),
|
r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||||
|
r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
|
||||||
jscode, 'Initial JS player signature function name', group='sig')
|
jscode, 'Initial JS player signature function name', group='sig')
|
||||||
|
|
||||||
jsi = JSInterpreter(jscode)
|
jsi = JSInterpreter(jscode)
|
||||||
@ -1538,6 +1542,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
def extract_view_count(v_info):
|
def extract_view_count(v_info):
|
||||||
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
|
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
|
||||||
|
|
||||||
|
player_response = {}
|
||||||
|
|
||||||
# Get video info
|
# Get video info
|
||||||
embed_webpage = None
|
embed_webpage = None
|
||||||
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
||||||
@ -1580,6 +1586,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if args.get('livestream') == '1' or args.get('live_playback') == 1:
|
if args.get('livestream') == '1' or args.get('live_playback') == 1:
|
||||||
is_live = True
|
is_live = True
|
||||||
sts = ytplayer_config.get('sts')
|
sts = ytplayer_config.get('sts')
|
||||||
|
if not player_response:
|
||||||
|
pl_response = str_or_none(args.get('player_response'))
|
||||||
|
if pl_response:
|
||||||
|
pl_response = self._parse_json(pl_response, video_id, fatal=False)
|
||||||
|
if isinstance(pl_response, dict):
|
||||||
|
player_response = pl_response
|
||||||
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
|
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||||
# We also try looking in get_video_info since it may contain different dashmpd
|
# We also try looking in get_video_info since it may contain different dashmpd
|
||||||
# URL that points to a DASH manifest with possibly different itag set (some itags
|
# URL that points to a DASH manifest with possibly different itag set (some itags
|
||||||
@ -1608,6 +1620,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
if not video_info_webpage:
|
if not video_info_webpage:
|
||||||
continue
|
continue
|
||||||
get_video_info = compat_parse_qs(video_info_webpage)
|
get_video_info = compat_parse_qs(video_info_webpage)
|
||||||
|
if not player_response:
|
||||||
|
pl_response = get_video_info.get('player_response', [None])[0]
|
||||||
|
if isinstance(pl_response, dict):
|
||||||
|
player_response = pl_response
|
||||||
add_dash_mpd(get_video_info)
|
add_dash_mpd(get_video_info)
|
||||||
if view_count is None:
|
if view_count is None:
|
||||||
view_count = extract_view_count(get_video_info)
|
view_count = extract_view_count(get_video_info)
|
||||||
@ -1653,9 +1669,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'"token" parameter not in video info for unknown reason',
|
'"token" parameter not in video info for unknown reason',
|
||||||
video_id=video_id)
|
video_id=video_id)
|
||||||
|
|
||||||
|
video_details = try_get(
|
||||||
|
player_response, lambda x: x['videoDetails'], dict) or {}
|
||||||
|
|
||||||
# title
|
# title
|
||||||
if 'title' in video_info:
|
if 'title' in video_info:
|
||||||
video_title = video_info['title'][0]
|
video_title = video_info['title'][0]
|
||||||
|
elif 'title' in player_response:
|
||||||
|
video_title = video_details['title']
|
||||||
else:
|
else:
|
||||||
self._downloader.report_warning('Unable to extract video title')
|
self._downloader.report_warning('Unable to extract video title')
|
||||||
video_title = '_'
|
video_title = '_'
|
||||||
@ -1718,6 +1739,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
|
|
||||||
if view_count is None:
|
if view_count is None:
|
||||||
view_count = extract_view_count(video_info)
|
view_count = extract_view_count(video_info)
|
||||||
|
if view_count is None and video_details:
|
||||||
|
view_count = int_or_none(video_details.get('viewCount'))
|
||||||
|
|
||||||
# Check for "rental" videos
|
# Check for "rental" videos
|
||||||
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
||||||
@ -1898,7 +1921,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
||||||
|
|
||||||
# uploader
|
# uploader
|
||||||
video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
|
video_uploader = try_get(
|
||||||
|
video_info, lambda x: x['author'][0],
|
||||||
|
compat_str) or str_or_none(video_details.get('author'))
|
||||||
if video_uploader:
|
if video_uploader:
|
||||||
video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
|
video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
|
||||||
else:
|
else:
|
||||||
@ -2011,12 +2036,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
like_count = _extract_count('like')
|
like_count = _extract_count('like')
|
||||||
dislike_count = _extract_count('dislike')
|
dislike_count = _extract_count('dislike')
|
||||||
|
|
||||||
|
if view_count is None:
|
||||||
|
view_count = str_to_int(self._search_regex(
|
||||||
|
r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
|
||||||
|
'view count', default=None))
|
||||||
|
|
||||||
# subtitles
|
# subtitles
|
||||||
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
||||||
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
|
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
|
||||||
|
|
||||||
video_duration = try_get(
|
video_duration = try_get(
|
||||||
video_info, lambda x: int_or_none(x['length_seconds'][0]))
|
video_info, lambda x: int_or_none(x['length_seconds'][0]))
|
||||||
|
if not video_duration:
|
||||||
|
video_duration = int_or_none(video_details.get('lengthSeconds'))
|
||||||
if not video_duration:
|
if not video_duration:
|
||||||
video_duration = parse_duration(self._html_search_meta(
|
video_duration = parse_duration(self._html_search_meta(
|
||||||
'duration', video_webpage, 'video duration'))
|
'duration', video_webpage, 'video duration'))
|
||||||
@ -2131,7 +2163,11 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
|||||||
(?:https?://)?
|
(?:https?://)?
|
||||||
(?:\w+\.)?
|
(?:\w+\.)?
|
||||||
(?:
|
(?:
|
||||||
youtube\.com/
|
(?:
|
||||||
|
youtube\.com|
|
||||||
|
invidio\.us
|
||||||
|
)
|
||||||
|
/
|
||||||
(?:
|
(?:
|
||||||
(?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
|
(?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
|
||||||
\? (?:.*?[&;])*? (?:p|a|list)=
|
\? (?:.*?[&;])*? (?:p|a|list)=
|
||||||
@ -2244,6 +2280,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
|||||||
'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
|
'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
|
||||||
'categories': ['People & Blogs'],
|
'categories': ['People & Blogs'],
|
||||||
'tags': list,
|
'tags': list,
|
||||||
|
'view_count': int,
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'dislike_count': int,
|
'dislike_count': int,
|
||||||
},
|
},
|
||||||
@ -2282,6 +2319,9 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
|||||||
# music album playlist
|
# music album playlist
|
||||||
'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
|
'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
|
@ -22,7 +22,7 @@ class ZattooPlatformBaseIE(InfoExtractor):
|
|||||||
_power_guide_hash = None
|
_power_guide_hash = None
|
||||||
|
|
||||||
def _host_url(self):
|
def _host_url(self):
|
||||||
return 'https://%s' % self._HOST
|
return 'https://%s' % (self._API_HOST if hasattr(self, '_API_HOST') else self._HOST)
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
username, password = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
@ -286,6 +286,7 @@ class ZattooLiveIE(ZattooBaseIE):
|
|||||||
class NetPlusIE(ZattooIE):
|
class NetPlusIE(ZattooIE):
|
||||||
_NETRC_MACHINE = 'netplus'
|
_NETRC_MACHINE = 'netplus'
|
||||||
_HOST = 'netplus.tv'
|
_HOST = 'netplus.tv'
|
||||||
|
_API_HOST = 'www.%s' % _HOST
|
||||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -300,7 +301,7 @@ class MNetTVIE(ZattooIE):
|
|||||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.tvplus.m-net.de/watch/abc/123-abc',
|
'url': 'https://tvplus.m-net.de/watch/abc/123-abc',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@ -311,7 +312,7 @@ class WalyTVIE(ZattooIE):
|
|||||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.player.waly.tv/watch/abc/123-abc',
|
'url': 'https://player.waly.tv/watch/abc/123-abc',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@ -319,6 +320,7 @@ class WalyTVIE(ZattooIE):
|
|||||||
class BBVTVIE(ZattooIE):
|
class BBVTVIE(ZattooIE):
|
||||||
_NETRC_MACHINE = 'bbvtv'
|
_NETRC_MACHINE = 'bbvtv'
|
||||||
_HOST = 'bbv-tv.net'
|
_HOST = 'bbv-tv.net'
|
||||||
|
_API_HOST = 'www.%s' % _HOST
|
||||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -330,6 +332,7 @@ class BBVTVIE(ZattooIE):
|
|||||||
class VTXTVIE(ZattooIE):
|
class VTXTVIE(ZattooIE):
|
||||||
_NETRC_MACHINE = 'vtxtv'
|
_NETRC_MACHINE = 'vtxtv'
|
||||||
_HOST = 'vtxtv.ch'
|
_HOST = 'vtxtv.ch'
|
||||||
|
_API_HOST = 'www.%s' % _HOST
|
||||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -341,6 +344,7 @@ class VTXTVIE(ZattooIE):
|
|||||||
class MyVisionTVIE(ZattooIE):
|
class MyVisionTVIE(ZattooIE):
|
||||||
_NETRC_MACHINE = 'myvisiontv'
|
_NETRC_MACHINE = 'myvisiontv'
|
||||||
_HOST = 'myvisiontv.ch'
|
_HOST = 'myvisiontv.ch'
|
||||||
|
_API_HOST = 'www.%s' % _HOST
|
||||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -355,7 +359,7 @@ class GlattvisionTVIE(ZattooIE):
|
|||||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.iptv.glattvision.ch/watch/abc/123-abc',
|
'url': 'https://iptv.glattvision.ch/watch/abc/123-abc',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@ -363,6 +367,7 @@ class GlattvisionTVIE(ZattooIE):
|
|||||||
class SAKTVIE(ZattooIE):
|
class SAKTVIE(ZattooIE):
|
||||||
_NETRC_MACHINE = 'saktv'
|
_NETRC_MACHINE = 'saktv'
|
||||||
_HOST = 'saktv.ch'
|
_HOST = 'saktv.ch'
|
||||||
|
_API_HOST = 'www.%s' % _HOST
|
||||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -377,7 +382,7 @@ class EWETVIE(ZattooIE):
|
|||||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.tvonline.ewe.de/watch/abc/123-abc',
|
'url': 'https://tvonline.ewe.de/watch/abc/123-abc',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@ -385,6 +390,7 @@ class EWETVIE(ZattooIE):
|
|||||||
class QuantumTVIE(ZattooIE):
|
class QuantumTVIE(ZattooIE):
|
||||||
_NETRC_MACHINE = 'quantumtv'
|
_NETRC_MACHINE = 'quantumtv'
|
||||||
_HOST = 'quantum-tv.com'
|
_HOST = 'quantum-tv.com'
|
||||||
|
_API_HOST = 'www.%s' % _HOST
|
||||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -395,11 +401,11 @@ class QuantumTVIE(ZattooIE):
|
|||||||
|
|
||||||
class OsnatelTVIE(ZattooIE):
|
class OsnatelTVIE(ZattooIE):
|
||||||
_NETRC_MACHINE = 'osnateltv'
|
_NETRC_MACHINE = 'osnateltv'
|
||||||
_HOST = 'onlinetv.osnatel.de'
|
_HOST = 'tvonline.osnatel.de'
|
||||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.onlinetv.osnatel.de/watch/abc/123-abc',
|
'url': 'https://tvonline.osnatel.de/watch/abc/123-abc',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@ -407,6 +413,7 @@ class OsnatelTVIE(ZattooIE):
|
|||||||
class EinsUndEinsTVIE(ZattooIE):
|
class EinsUndEinsTVIE(ZattooIE):
|
||||||
_NETRC_MACHINE = '1und1tv'
|
_NETRC_MACHINE = '1und1tv'
|
||||||
_HOST = '1und1.tv'
|
_HOST = '1und1.tv'
|
||||||
|
_API_HOST = 'www.%s' % _HOST
|
||||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
57
youtube_dl/extractor/zype.py
Normal file
57
youtube_dl/extractor/zype.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class ZypeIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://player\.zype\.com/embed/(?P<id>[\da-fA-F]+)\.js\?.*?api_key=[^&]+'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false',
|
||||||
|
'md5': 'eaee31d474c76a955bdaba02a505c595',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5b400b834b32992a310622b9',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Smoky Barbecue Favorites',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage):
|
||||||
|
return [
|
||||||
|
mobj.group('url')
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//player\.zype\.com/embed/[\da-fA-F]+\.js\?.*?api_key=.+?)\1',
|
||||||
|
webpage)]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._search_regex(
|
||||||
|
r'video_title\s*[:=]\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||||
|
'title', group='value')
|
||||||
|
|
||||||
|
m3u8_url = self._search_regex(
|
||||||
|
r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1', webpage,
|
||||||
|
'm3u8 url', group='url')
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnail = self._search_regex(
|
||||||
|
r'poster\s*[:=]\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'thumbnail',
|
||||||
|
default=False, group='url')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
@ -3948,8 +3948,12 @@ def write_xattr(path, key, value):
|
|||||||
|
|
||||||
|
|
||||||
def random_birthday(year_field, month_field, day_field):
|
def random_birthday(year_field, month_field, day_field):
|
||||||
|
start_date = datetime.date(1950, 1, 1)
|
||||||
|
end_date = datetime.date(1995, 12, 31)
|
||||||
|
offset = random.randint(0, (end_date - start_date).days)
|
||||||
|
random_date = start_date + datetime.timedelta(offset)
|
||||||
return {
|
return {
|
||||||
year_field: str(random.randint(1950, 1995)),
|
year_field: str(random_date.year),
|
||||||
month_field: str(random.randint(1, 12)),
|
month_field: str(random_date.month),
|
||||||
day_field: str(random.randint(1, 31)),
|
day_field: str(random_date.day),
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2018.10.05'
|
__version__ = '2018.12.03'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user