mirror of
https://github.com/l1ving/youtube-dl
synced 2020-11-18 19:53:54 -08:00
Merge remote-tracking branch 'upstream/master'
This commit is contained in:
commit
517907ec3d
6
.github/ISSUE_TEMPLATE.md
vendored
6
.github/ISSUE_TEMPLATE.md
vendored
@ -6,8 +6,8 @@
|
||||
|
||||
---
|
||||
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.08.04*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.08.04**
|
||||
### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2018.11.18*. If it's not, read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected.
|
||||
- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2018.11.18**
|
||||
|
||||
### Before submitting an *issue* make sure you have:
|
||||
- [ ] At least skimmed through the [README](https://github.com/rg3/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections
|
||||
@ -36,7 +36,7 @@ Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl
|
||||
[debug] User config: []
|
||||
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
|
||||
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
|
||||
[debug] youtube-dl version 2018.08.04
|
||||
[debug] youtube-dl version 2018.11.18
|
||||
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
|
||||
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
|
||||
[debug] Proxy map: {}
|
||||
|
@ -296,5 +296,26 @@ title = self._search_regex(
|
||||
|
||||
### Use safe conversion functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
||||
Use `url_or_none` for safe URL processing.
|
||||
|
||||
Use `try_get` for safe metadata extraction from parsed JSON.
|
||||
|
||||
Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
|
||||
|
||||
#### More examples
|
||||
|
||||
##### Safely extract optional description from parsed JSON
|
||||
```python
|
||||
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
|
||||
```
|
||||
|
||||
##### Safely extract more optional metadata
|
||||
```python
|
||||
video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
|
||||
description = video.get('summary')
|
||||
duration = float_or_none(video.get('durationMs'), scale=1000)
|
||||
view_count = int_or_none(video.get('views'))
|
||||
```
|
||||
|
||||
|
202
ChangeLog
202
ChangeLog
@ -1,3 +1,205 @@
|
||||
version 2018.11.18
|
||||
|
||||
Extractors
|
||||
+ [wwe] Extract subtitles
|
||||
+ [wwe] Add support for playlistst (#14781)
|
||||
+ [wwe] Add support for wwe.com (#14781, #17450)
|
||||
* [vk] Detect geo restriction (#17767)
|
||||
* [openload] Use original host during extraction (#18211)
|
||||
* [atvat] Fix extraction (#18041)
|
||||
+ [rte] Add support for new API endpoint (#18206)
|
||||
* [tnaflixnetwork:embed] Fix extraction (#18205)
|
||||
* [picarto] Use API and add token support (#16518)
|
||||
+ [zype] Add support for player.zype.com (#18143)
|
||||
* [vivo] Fix extraction (#18139)
|
||||
* [ruutu] Update API endpoint (#18138)
|
||||
|
||||
|
||||
version 2018.11.07
|
||||
|
||||
Extractors
|
||||
+ [youtube] Add another JS signature function name regex (#18091, #18093,
|
||||
#18094)
|
||||
* [facebook] Fix tahoe request (#17171)
|
||||
* [cliphunter] Fix extraction (#18083)
|
||||
+ [youtube:playlist] Add support for invidio.us (#18077)
|
||||
* [zattoo] Arrange API hosts for derived extractors (#18035)
|
||||
+ [youtube] Add fallback metadata extraction from videoDetails (#18052)
|
||||
|
||||
|
||||
version 2018.11.03
|
||||
|
||||
Core
|
||||
* [extractor/common] Ensure response handle is not prematurely closed before
|
||||
it can be read if it matches expected_status (#17195, #17846, #17447)
|
||||
|
||||
Extractors
|
||||
* [laola1tv:embed] Set correct stream access URL scheme (#16341)
|
||||
+ [ehftv] Add support for ehftv.com (#15408)
|
||||
* [azmedien] Adopt to major site redesign (#17745, #17746)
|
||||
+ [twitcasting] Add support for twitcasting.tv (#17981)
|
||||
* [orf:tvthek] Fix extraction (#17737, #17956, #18024)
|
||||
+ [openload] Add support for oload.fun (#18045)
|
||||
* [njpwworld] Fix authentication (#17427)
|
||||
+ [linkedin:learning] Add support for linkedin.com/learning (#13545)
|
||||
* [theplatform] Improve error detection (#13222)
|
||||
* [cnbc] Simplify extraction (#14280, #17110)
|
||||
+ [cbnc] Add support for new URL schema (#14193)
|
||||
* [aparat] Improve extraction and extract more metadata (#17445, #18008)
|
||||
* [aparat] Fix extraction
|
||||
|
||||
|
||||
version 2018.10.29
|
||||
|
||||
Core
|
||||
+ [extractor/common] Add validation for JSON-LD URLs
|
||||
|
||||
Extractors
|
||||
+ [sportbox] Add support for matchtv.ru
|
||||
* [sportbox] Fix extraction (#17978)
|
||||
* [screencast] Fix extraction (#14590, #14617, #17990)
|
||||
+ [openload] Add support for oload.icu
|
||||
+ [ivi] Add support for ivi.tv
|
||||
* [crunchyroll] Improve extraction failsafeness (#17991)
|
||||
* [dailymail] Fix formats extraction (#17976)
|
||||
* [viewster] Reduce format requests
|
||||
* [cwtv] Handle API errors (#17905)
|
||||
+ [rutube] Use geo verification headers (#17897)
|
||||
+ [brightcove:legacy] Add fallbacks to brightcove:new (#13912)
|
||||
- [tv3] Remove extractor (#10461, #15339)
|
||||
* [ted] Fix extraction for HTTP and RTMP formats (#5941, #17572, #17894)
|
||||
+ [openload] Add support for oload.cc (#17823)
|
||||
+ [patreon] Extract post_file URL (#17792)
|
||||
* [patreon] Fix extraction (#14502, #10471)
|
||||
|
||||
|
||||
version 2018.10.05
|
||||
|
||||
Extractors
|
||||
* [pluralsight] Improve authentication (#17762)
|
||||
* [dailymotion] Fix extraction (#17699)
|
||||
* [crunchyroll] Switch to HTTPS for RpcApi (#17749)
|
||||
+ [philharmoniedeparis] Add support for pad.philharmoniedeparis.fr (#17705)
|
||||
* [philharmoniedeparis] Fix extraction (#17705)
|
||||
+ [jamendo] Add support for licensing.jamendo.com (#17724)
|
||||
+ [openload] Add support for oload.cloud (#17710)
|
||||
* [pluralsight] Fix subtitles extraction (#17726, #17728)
|
||||
+ [vimeo] Add another config regular expression (#17690)
|
||||
* [spike] Fix Paramount Network extraction (#17677)
|
||||
* [hotstar] Fix extraction (#14694, #14931, #17637)
|
||||
|
||||
|
||||
version 2018.09.26
|
||||
|
||||
Extractors
|
||||
* [pluralsight] Fix subtitles extraction (#17671)
|
||||
* [mediaset] Improve embed support (#17668)
|
||||
+ [youtube] Add support for invidio.us (#17613)
|
||||
+ [zattoo] Add support for more zattoo platform sites
|
||||
* [zattoo] Fix extraction (#17175, #17542)
|
||||
|
||||
|
||||
version 2018.09.18
|
||||
|
||||
Core
|
||||
+ [extractor/common] Introduce channel meta fields
|
||||
|
||||
Extractors
|
||||
* [adobepass] Don't pollute default headers dict
|
||||
* [udemy] Don't pollute default headers dict
|
||||
* [twitch] Don't pollute default headers dict
|
||||
* [youtube] Don't pollute default query dict (#17593)
|
||||
* [crunchyroll] Prefer hardsubless formats and formats in locale language
|
||||
* [vrv] Make format ids deterministic
|
||||
* [vimeo] Fix ondemand playlist extraction (#14591)
|
||||
+ [pornhub] Extract upload date (#17574)
|
||||
+ [porntube] Extract channel meta fields
|
||||
+ [vimeo] Extract channel meta fields
|
||||
+ [youtube] Extract channel meta fields (#9676, #12939)
|
||||
* [porntube] Fix extraction (#17541)
|
||||
* [asiancrush] Fix extraction (#15630)
|
||||
+ [twitch:clips] Extend URL regular expression (closes #17559)
|
||||
+ [vzaar] Add support for HLS
|
||||
* [tube8] Fix metadata extraction (#17520)
|
||||
* [eporner] Extract JSON-LD (#17519)
|
||||
|
||||
|
||||
version 2018.09.10
|
||||
|
||||
Core
|
||||
+ [utils] Properly recognize AV1 codec (#17506)
|
||||
|
||||
Extractors
|
||||
+ [iprima] Add support for prima.iprima.cz (#17514)
|
||||
+ [tele5] Add support for tele5.de (#7805, #7922, #17331, #17414)
|
||||
* [nbc] Fix extraction of percent encoded URLs (#17374)
|
||||
|
||||
|
||||
version 2018.09.08
|
||||
|
||||
Extractors
|
||||
* [youtube] Fix extraction (#17457, #17464)
|
||||
+ [pornhub:uservideos] Add support for new URLs (#17388)
|
||||
* [iprima] Confirm adult check (#17437)
|
||||
* [slideslive] Make check for video service name case-insensitive (#17429)
|
||||
* [radiojavan] Fix extraction (#17151)
|
||||
* [generic] Skip unsuccessful jwplayer extraction (#16735)
|
||||
|
||||
|
||||
version 2018.09.01
|
||||
|
||||
Core
|
||||
* [utils] Skip remote IP addresses non matching to source address' IP version
|
||||
when creating a connection (#13422, #17362)
|
||||
|
||||
Extractors
|
||||
+ [ard] Add support for one.ard.de (#17397)
|
||||
* [niconico] Fix extraction on python3 (#17393, #17407)
|
||||
* [ard] Extract f4m formats
|
||||
* [crunchyroll] Parse vilos media data (#17343)
|
||||
+ [ard] Add support for Beta ARD Mediathek
|
||||
+ [bandcamp] Extract more metadata (#13197)
|
||||
* [internazionale] Fix extraction of non-available-abroad videos (#17386)
|
||||
|
||||
|
||||
version 2018.08.28
|
||||
|
||||
Extractors
|
||||
+ [youtube:playlist] Add support for music album playlists (OLAK5uy_ prefix)
|
||||
(#17361)
|
||||
* [bitchute] Fix extraction by pass custom User-Agent (#17360)
|
||||
* [webofstories:playlist] Fix extraction (#16914)
|
||||
+ [tvplayhome] Add support for new tvplay URLs (#17344)
|
||||
+ [generic] Allow relative src for videojs embeds (#17324)
|
||||
+ [xfileshare] Add support for vidto.se (#17317)
|
||||
+ [vidzi] Add support for vidzi.nu (#17316)
|
||||
+ [nova:embed] Add support for media.cms.nova.cz (#17282)
|
||||
|
||||
|
||||
version 2018.08.22
|
||||
|
||||
Core
|
||||
* [utils] Use pure browser header for User-Agent (#17236)
|
||||
|
||||
Extractors
|
||||
+ [kinopoisk] Add support for kinopoisk.ru (#17283)
|
||||
+ [yourporn] Add support for yourporn.sexy (#17298)
|
||||
+ [go] Add support for disneynow.go.com (#16299, #17264)
|
||||
+ [6play] Add support for play.rtl.hr (#17249)
|
||||
* [anvato] Fallback to generic API key for access-key-to-API-key lookup
|
||||
(#16788, #17254)
|
||||
* [lci] Fix extraction (#17274)
|
||||
* [bbccouk] Extend id URL regular expression (#17270)
|
||||
* [cwtv] Fix extraction (#17256)
|
||||
* [nova] Fix extraction (#17241)
|
||||
+ [generic] Add support for expressen embeds
|
||||
* [raywenderlich] Adapt to site redesign (#17225)
|
||||
+ [redbulltv] Add support redbull.com tv URLs (#17218)
|
||||
+ [bitchute] Add support for bitchute.com (#14052)
|
||||
+ [clyp] Add support for token protected media (#17184)
|
||||
* [imdb] Fix extension extraction (#17167)
|
||||
|
||||
|
||||
version 2018.08.04
|
||||
|
||||
Extractors
|
||||
|
25
README.md
25
README.md
@ -512,6 +512,8 @@ The basic usage is not to set any template arguments when downloading a single f
|
||||
- `timestamp` (numeric): UNIX timestamp of the moment the video became available
|
||||
- `upload_date` (string): Video upload date (YYYYMMDD)
|
||||
- `uploader_id` (string): Nickname or id of the video uploader
|
||||
- `channel` (string): Full name of the channel the video is uploaded on
|
||||
- `channel_id` (string): Id of the channel
|
||||
- `location` (string): Physical location where the video was filmed
|
||||
- `duration` (numeric): Length of the video in seconds
|
||||
- `view_count` (numeric): How many users have watched the video on the platform
|
||||
@ -1167,7 +1169,28 @@ title = self._search_regex(
|
||||
|
||||
### Use safe conversion functions
|
||||
|
||||
Wrap all extracted numeric data into safe functions from `utils`: `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
|
||||
|
||||
Use `url_or_none` for safe URL processing.
|
||||
|
||||
Use `try_get` for safe metadata extraction from parsed JSON.
|
||||
|
||||
Explore [`youtube_dl/utils.py`](https://github.com/rg3/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
|
||||
|
||||
#### More examples
|
||||
|
||||
##### Safely extract optional description from parsed JSON
|
||||
```python
|
||||
description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
|
||||
```
|
||||
|
||||
##### Safely extract more optional metadata
|
||||
```python
|
||||
video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
|
||||
description = video.get('summary')
|
||||
duration = float_or_none(video.get('durationMs'), scale=1000)
|
||||
view_count = int_or_none(video.get('views'))
|
||||
```
|
||||
|
||||
# EMBEDDING YOUTUBE-DL
|
||||
|
||||
|
@ -56,6 +56,7 @@
|
||||
- **archive.org**: archive.org videos
|
||||
- **ARD**
|
||||
- **ARD:mediathek**
|
||||
- **ARDBetaMediathek**
|
||||
- **Arkena**
|
||||
- **arte.tv**
|
||||
- **arte.tv:+7**
|
||||
@ -83,8 +84,6 @@
|
||||
- **awaan:season**
|
||||
- **awaan:video**
|
||||
- **AZMedien**: AZ Medien videos
|
||||
- **AZMedienPlaylist**: AZ Medien playlists
|
||||
- **AZMedienShowPlaylist**: AZ Medien show playlists
|
||||
- **BaiduVideo**: 百度视频
|
||||
- **bambuser**
|
||||
- **bambuser:channel**
|
||||
@ -97,6 +96,7 @@
|
||||
- **bbc.co.uk:article**: BBC articles
|
||||
- **bbc.co.uk:iplayer:playlist**
|
||||
- **bbc.co.uk:playlist**
|
||||
- **BBVTV**
|
||||
- **Beatport**
|
||||
- **Beeg**
|
||||
- **BehindKink**
|
||||
@ -108,6 +108,8 @@
|
||||
- **BiliBili**
|
||||
- **BioBioChileTV**
|
||||
- **BIQLE**
|
||||
- **BitChute**
|
||||
- **BitChuteChannel**
|
||||
- **BleacherReport**
|
||||
- **BleacherReportCMS**
|
||||
- **blinkx**
|
||||
@ -174,6 +176,7 @@
|
||||
- **Clyp**
|
||||
- **cmt.com**
|
||||
- **CNBC**
|
||||
- **CNBCVideo**
|
||||
- **CNN**
|
||||
- **CNNArticle**
|
||||
- **CNNBlogs**
|
||||
@ -189,7 +192,7 @@
|
||||
- **Crackle**
|
||||
- **Criterion**
|
||||
- **CrooksAndLiars**
|
||||
- **Crunchyroll**
|
||||
- **crunchyroll**
|
||||
- **crunchyroll:playlist**
|
||||
- **CSNNE**
|
||||
- **CSpan**: C-SPAN
|
||||
@ -247,7 +250,9 @@
|
||||
- **EchoMsk**
|
||||
- **egghead:course**: egghead.io course
|
||||
- **egghead:lesson**: egghead.io lesson
|
||||
- **ehftv**
|
||||
- **eHow**
|
||||
- **EinsUndEinsTV**
|
||||
- **Einthusan**
|
||||
- **eitb.tv**
|
||||
- **EllenTube**
|
||||
@ -265,6 +270,7 @@
|
||||
- **EsriVideo**
|
||||
- **Europa**
|
||||
- **EveryonesMixtape**
|
||||
- **EWETV**
|
||||
- **ExpoTV**
|
||||
- **Expressen**
|
||||
- **ExtremeTube**
|
||||
@ -324,6 +330,7 @@
|
||||
- **Gfycat**
|
||||
- **GiantBomb**
|
||||
- **Giga**
|
||||
- **GlattvisionTV**
|
||||
- **Glide**: Glide mobile video messages (glide.me)
|
||||
- **Globo**
|
||||
- **GloboArticle**
|
||||
@ -353,7 +360,7 @@
|
||||
- **HitRecord**
|
||||
- **HornBunny**
|
||||
- **HotNewHipHop**
|
||||
- **HotStar**
|
||||
- **hotstar**
|
||||
- **hotstar:playlist**
|
||||
- **Howcast**
|
||||
- **HowStuffWorks**
|
||||
@ -405,6 +412,7 @@
|
||||
- **Ketnet**
|
||||
- **KhanAcademy**
|
||||
- **KickStarter**
|
||||
- **KinoPoisk**
|
||||
- **KonserthusetPlay**
|
||||
- **kontrtube**: KontrTube.ru - Труба зовёт
|
||||
- **KrasView**: Красвью
|
||||
@ -437,6 +445,8 @@
|
||||
- **limelight:channel**
|
||||
- **limelight:channel_list**
|
||||
- **LineTV**
|
||||
- **linkedin:learning**
|
||||
- **linkedin:learning:course**
|
||||
- **LiTV**
|
||||
- **LiveLeak**
|
||||
- **LiveLeakEmbed**
|
||||
@ -490,6 +500,7 @@
|
||||
- **Mixer:vod**
|
||||
- **MLB**
|
||||
- **Mnet**
|
||||
- **MNetTV**
|
||||
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
|
||||
- **Mofosex**
|
||||
- **Mojvideo**
|
||||
@ -521,6 +532,7 @@
|
||||
- **Myvi**
|
||||
- **MyVidster**
|
||||
- **MyviEmbed**
|
||||
- **MyVisionTV**
|
||||
- **n-tv.de**
|
||||
- **natgeo**
|
||||
- **natgeo:episodeguide**
|
||||
@ -546,6 +558,7 @@
|
||||
- **netease:program**: 网易云音乐 - 电台节目
|
||||
- **netease:singer**: 网易云音乐 - 歌手
|
||||
- **netease:song**: 网易云音乐
|
||||
- **NetPlus**
|
||||
- **Netzkino**
|
||||
- **Newgrounds**
|
||||
- **NewgroundsPlaylist**
|
||||
@ -577,6 +590,7 @@
|
||||
- **Normalboots**
|
||||
- **NosVideo**
|
||||
- **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz
|
||||
- **NovaEmbed**
|
||||
- **nowness**
|
||||
- **nowness:playlist**
|
||||
- **nowness:series**
|
||||
@ -621,6 +635,7 @@
|
||||
- **orf:iptv**: iptv.ORF.at
|
||||
- **orf:oe1**: Radio Österreich 1
|
||||
- **orf:tvthek**: ORF TVthek
|
||||
- **OsnatelTV**
|
||||
- **PacktPub**
|
||||
- **PacktPubCourse**
|
||||
- **PandaTV**: 熊猫TV
|
||||
@ -681,6 +696,7 @@
|
||||
- **qqmusic:playlist**: QQ音乐 - 歌单
|
||||
- **qqmusic:singer**: QQ音乐 - 歌手
|
||||
- **qqmusic:toplist**: QQ音乐 - 排行榜
|
||||
- **QuantumTV**
|
||||
- **Quickline**
|
||||
- **QuicklineLive**
|
||||
- **R7**
|
||||
@ -696,6 +712,7 @@
|
||||
- **RaiPlayLive**
|
||||
- **RaiPlayPlaylist**
|
||||
- **RayWenderlich**
|
||||
- **RayWenderlichCourse**
|
||||
- **RBMARadio**
|
||||
- **RDS**: RDS.ca
|
||||
- **RedBullTV**
|
||||
@ -747,6 +764,7 @@
|
||||
- **safari**: safaribooksonline.com online video
|
||||
- **safari:api**
|
||||
- **safari:course**: safaribooksonline.com online courses
|
||||
- **SAKTV**
|
||||
- **Sapo**: SAPO Vídeos
|
||||
- **savefrom.net**
|
||||
- **SBS**: sbs.com.au
|
||||
@ -802,7 +820,7 @@
|
||||
- **Spiegeltv**
|
||||
- **sport.francetvinfo.fr**
|
||||
- **Sport5**
|
||||
- **SportBoxEmbed**
|
||||
- **SportBox**
|
||||
- **SportDeutschland**
|
||||
- **SpringboardPlatform**
|
||||
- **Sprout**
|
||||
@ -841,6 +859,7 @@
|
||||
- **techtv.mit.edu**
|
||||
- **ted**
|
||||
- **Tele13**
|
||||
- **Tele5**
|
||||
- **TeleBruxelles**
|
||||
- **Telecinco**: telecinco.es, cuatro.com and mediaset.es
|
||||
- **Telegraaf**
|
||||
@ -892,7 +911,6 @@
|
||||
- **TV2**
|
||||
- **tv2.hu**
|
||||
- **TV2Article**
|
||||
- **TV3**
|
||||
- **TV4**: tv4.se and tv4play.se
|
||||
- **TV5MondePlus**: TV5MONDE+
|
||||
- **TVA**
|
||||
@ -912,7 +930,9 @@
|
||||
- **tvp:embed**: Telewizja Polska
|
||||
- **tvp:series**
|
||||
- **TVPlayer**
|
||||
- **TVPlayHome**
|
||||
- **Tweakers**
|
||||
- **TwitCasting**
|
||||
- **twitch:chapter**
|
||||
- **twitch:clips**
|
||||
- **twitch:profile**
|
||||
@ -1027,12 +1047,14 @@
|
||||
- **vrv**
|
||||
- **vrv:series**
|
||||
- **VShare**
|
||||
- **VTXTV**
|
||||
- **vube**: Vube.com
|
||||
- **VuClip**
|
||||
- **VVVVID**
|
||||
- **VyboryMos**
|
||||
- **Vzaar**
|
||||
- **Walla**
|
||||
- **WalyTV**
|
||||
- **washingtonpost**
|
||||
- **washingtonpost:article**
|
||||
- **wat.tv**
|
||||
@ -1058,6 +1080,7 @@
|
||||
- **wrzuta.pl:playlist**
|
||||
- **WSJ**: Wall Street Journal
|
||||
- **WSJArticle**
|
||||
- **WWE**
|
||||
- **XBef**
|
||||
- **XboxClips**
|
||||
- **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
|
||||
@ -1093,6 +1116,7 @@
|
||||
- **YouNowLive**
|
||||
- **YouNowMoment**
|
||||
- **YouPorn**
|
||||
- **YourPorn**
|
||||
- **YourUpload**
|
||||
- **youtube**: YouTube.com
|
||||
- **youtube:channel**: YouTube.com channels
|
||||
@ -1116,3 +1140,4 @@
|
||||
- **ZDF**
|
||||
- **ZDFChannel**
|
||||
- **zingmp3**: mp3.zing.vn
|
||||
- **Zype**
|
||||
|
@ -7,6 +7,7 @@ import json
|
||||
import os.path
|
||||
import re
|
||||
import types
|
||||
import ssl
|
||||
import sys
|
||||
|
||||
import youtube_dl.extractor
|
||||
@ -241,3 +242,12 @@ def expect_warnings(ydl, warnings_re):
|
||||
real_warning(w)
|
||||
|
||||
ydl.report_warning = _report_warning
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
@ -9,11 +9,30 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import FakeYDL, expect_dict, expect_value
|
||||
from youtube_dl.compat import compat_etree_fromstring
|
||||
from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
|
||||
from youtube_dl.compat import compat_etree_fromstring, compat_http_server
|
||||
from youtube_dl.extractor.common import InfoExtractor
|
||||
from youtube_dl.extractor import YoutubeIE, get_info_extractor
|
||||
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
|
||||
import threading
|
||||
|
||||
|
||||
TEAPOT_RESPONSE_STATUS = 418
|
||||
TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
|
||||
|
||||
|
||||
class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
def do_GET(self):
|
||||
if self.path == '/teapot':
|
||||
self.send_response(TEAPOT_RESPONSE_STATUS)
|
||||
self.send_header('Content-Type', 'text/html; charset=utf-8')
|
||||
self.end_headers()
|
||||
self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
|
||||
else:
|
||||
assert False
|
||||
|
||||
|
||||
class TestIE(InfoExtractor):
|
||||
@ -743,6 +762,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
|
||||
for i in range(len(entries)):
|
||||
expect_dict(self, entries[i], expected_entries[i])
|
||||
|
||||
def test_response_with_expected_status_returns_content(self):
|
||||
# Checks for mitigations against the effects of
|
||||
# <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which
|
||||
# manifest as `_download_webpage`, `_download_xml`, `_download_json`,
|
||||
# or the underlying `_download_webpage_handle` returning no content
|
||||
# when a response matches `expected_status`.
|
||||
|
||||
httpd = compat_http_server.HTTPServer(
|
||||
('127.0.0.1', 0), InfoExtractorTestRequestHandler)
|
||||
port = http_server_port(httpd)
|
||||
server_thread = threading.Thread(target=httpd.serve_forever)
|
||||
server_thread.daemon = True
|
||||
server_thread.start()
|
||||
|
||||
(content, urlh) = self.ie._download_webpage_handle(
|
||||
'http://127.0.0.1:%d/teapot' % port, None,
|
||||
expected_status=TEAPOT_RESPONSE_STATUS)
|
||||
self.assertEqual(content, TEAPOT_RESPONSE_BODY)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -9,26 +9,16 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import try_rm
|
||||
from test.helper import http_server_port, try_rm
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_http_server
|
||||
from youtube_dl.downloader.http import HttpFD
|
||||
from youtube_dl.utils import encodeFilename
|
||||
import ssl
|
||||
import threading
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
||||
|
||||
TEST_SIZE = 10 * 1024
|
||||
|
||||
|
||||
|
@ -8,6 +8,7 @@ import sys
|
||||
import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from test.helper import http_server_port
|
||||
from youtube_dl import YoutubeDL
|
||||
from youtube_dl.compat import compat_http_server, compat_urllib_request
|
||||
import ssl
|
||||
@ -16,15 +17,6 @@ import threading
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
|
||||
def http_server_port(httpd):
|
||||
if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
|
||||
# In Jython SSLSocket is not a subclass of socket.socket
|
||||
sock = httpd.socket.sock
|
||||
else:
|
||||
sock = httpd.socket
|
||||
return sock.getsockname()[1]
|
||||
|
||||
|
||||
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
@ -785,6 +785,10 @@ class TestUtil(unittest.TestCase):
|
||||
'vcodec': 'h264',
|
||||
'acodec': 'aac',
|
||||
})
|
||||
self.assertEqual(parse_codecs('av01.0.05M.08'), {
|
||||
'vcodec': 'av01.0.05M.08',
|
||||
'acodec': 'none',
|
||||
})
|
||||
|
||||
def test_escape_rfc3986(self):
|
||||
reserved = "!*'();:@&=+$,/?#[]"
|
||||
|
@ -1325,8 +1325,8 @@ class AdobePassIE(InfoExtractor):
|
||||
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
|
||||
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
headers = kwargs.get('headers', {})
|
||||
headers.update(self.geo_verification_headers())
|
||||
headers = self.geo_verification_headers()
|
||||
headers.update(kwargs.get('headers', {}))
|
||||
kwargs['headers'] = headers
|
||||
return super(AdobePassIE, self)._download_webpage_handle(
|
||||
*args, **compat_kwargs(kwargs))
|
||||
|
@ -43,10 +43,6 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||
webpage, 'kaltura partner id')
|
||||
|
||||
video_data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
||||
@ -58,7 +54,18 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
(lambda x: x['episodeDetail']['content']['data'],
|
||||
lambda x: x['videoDetail']['content']['data']), dict)
|
||||
ep_meta = ep_data.get('full_video', {})
|
||||
external_id = ep_data.get('external_id') or ep_meta['external_id']
|
||||
|
||||
zype_id = ep_meta.get('zype_id')
|
||||
if zype_id:
|
||||
embed_url = 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id
|
||||
ie_key = 'Zype'
|
||||
else:
|
||||
partner_id = self._search_regex(
|
||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||
webpage, 'kaltura partner id')
|
||||
external_id = ep_data.get('external_id') or ep_meta['external_id']
|
||||
embed_url = 'kaltura:%s:%s' % (partner_id, external_id)
|
||||
ie_key = 'Kaltura'
|
||||
|
||||
title = ep_data.get('title') or ep_meta.get('title')
|
||||
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
||||
@ -72,8 +79,8 @@ class AmericasTestKitchenIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, external_id),
|
||||
'ie_key': 'Kaltura',
|
||||
'url': embed_url,
|
||||
'ie_key': ie_key,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
|
@ -134,9 +134,33 @@ class AnvatoIE(InfoExtractor):
|
||||
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
|
||||
}
|
||||
|
||||
_API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
|
||||
|
||||
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
|
||||
_AUTH_KEY = b'\x31\xc2\x42\x84\x9e\x73\xa0\xce'
|
||||
|
||||
_TESTS = [{
|
||||
# from https://www.boston25news.com/news/watch-humpback-whale-breaches-right-next-to-fishing-boat-near-nh/817484874
|
||||
'url': 'anvato:8v9BEynrwx8EFLYpgfOWcG1qJqyXKlRM:4465496',
|
||||
'info_dict': {
|
||||
'id': '4465496',
|
||||
'ext': 'mp4',
|
||||
'title': 'VIDEO: Humpback whale breaches right next to NH boat',
|
||||
'description': 'VIDEO: Humpback whale breaches right next to NH boat. Footage courtesy: Zach Fahey.',
|
||||
'duration': 22,
|
||||
'timestamp': 1534855680,
|
||||
'upload_date': '20180821',
|
||||
'uploader': 'ANV',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# from https://sanfrancisco.cbslocal.com/2016/06/17/source-oakland-cop-on-leave-for-having-girlfriend-help-with-police-reports/
|
||||
'url': 'anvato:DVzl9QRzox3ZZsP9bNu5Li3X7obQOnqP:3417601',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(AnvatoIE, self).__init__(*args, **kwargs)
|
||||
self.__server_time = None
|
||||
@ -169,7 +193,8 @@ class AnvatoIE(InfoExtractor):
|
||||
'api': {
|
||||
'anvrid': anvrid,
|
||||
'anvstk': md5_text('%s|%s|%d|%s' % (
|
||||
access_key, anvrid, server_time, self._ANVACK_TABLE[access_key])),
|
||||
access_key, anvrid, server_time,
|
||||
self._ANVACK_TABLE.get(access_key, self._API_KEY))),
|
||||
'anvts': server_time,
|
||||
},
|
||||
}
|
||||
@ -284,5 +309,6 @@ class AnvatoIE(InfoExtractor):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
access_key, video_id = mobj.group('access_key_or_mcp', 'id')
|
||||
if access_key not in self._ANVACK_TABLE:
|
||||
access_key = self._MCP_TO_ACCESS_KEY_TABLE[access_key]
|
||||
access_key = self._MCP_TO_ACCESS_KEY_TABLE.get(
|
||||
access_key) or access_key
|
||||
return self._get_anvato_videos(access_key, video_id)
|
||||
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
mimetype2ext,
|
||||
url_or_none,
|
||||
)
|
||||
@ -12,59 +13,83 @@ from ..utils import (
|
||||
class AparatIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P<id>[a-zA-Z0-9]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.aparat.com/v/wP8On',
|
||||
'md5': '131aca2e14fe7c4dcb3c4877ba300c89',
|
||||
'info_dict': {
|
||||
'id': 'wP8On',
|
||||
'ext': 'mp4',
|
||||
'title': 'تیم گلکسی 11 - زومیت',
|
||||
'age_limit': 0,
|
||||
'description': 'md5:096bdabcdcc4569f2b8a5e903a3b3028',
|
||||
'duration': 231,
|
||||
'timestamp': 1387394859,
|
||||
'upload_date': '20131218',
|
||||
'view_count': int,
|
||||
},
|
||||
# 'skip': 'Extremely unreliable',
|
||||
}
|
||||
}, {
|
||||
# multiple formats
|
||||
'url': 'https://www.aparat.com/v/8dflw/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
# but the URL in there does not work
|
||||
webpage = self._download_webpage(
|
||||
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
||||
video_id)
|
||||
# Provides more metadata
|
||||
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||
|
||||
title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title')
|
||||
if not webpage:
|
||||
# Note: There is an easier-to-parse configuration at
|
||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
||||
# but the URL in there does not work
|
||||
webpage = self._download_webpage(
|
||||
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
||||
video_id)
|
||||
|
||||
file_list = self._parse_json(
|
||||
options = self._parse_json(
|
||||
self._search_regex(
|
||||
r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage,
|
||||
'file list'),
|
||||
r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
|
||||
webpage, 'options', group='value'),
|
||||
video_id)
|
||||
|
||||
player = options['plugins']['sabaPlayerPlugin']
|
||||
|
||||
formats = []
|
||||
for item in file_list[0]:
|
||||
file_url = url_or_none(item.get('file'))
|
||||
if not file_url:
|
||||
continue
|
||||
ext = mimetype2ext(item.get('type'))
|
||||
label = item.get('label')
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'ext': ext,
|
||||
'format_id': label or ext,
|
||||
'height': int_or_none(self._search_regex(
|
||||
r'(\d+)[pP]', label or '', 'height', default=None)),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
for sources in player['multiSRC']:
|
||||
for item in sources:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
file_url = url_or_none(item.get('src'))
|
||||
if not file_url:
|
||||
continue
|
||||
item_type = item.get('type')
|
||||
if item_type == 'application/vnd.apple.mpegurl':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
file_url, video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls',
|
||||
fatal=False))
|
||||
else:
|
||||
ext = mimetype2ext(item.get('type'))
|
||||
label = item.get('label')
|
||||
formats.append({
|
||||
'url': file_url,
|
||||
'ext': ext,
|
||||
'format_id': 'http-%s' % (label or ext),
|
||||
'height': int_or_none(self._search_regex(
|
||||
r'(\d+)[pP]', label or '', 'height',
|
||||
default=None)),
|
||||
})
|
||||
self._sort_formats(
|
||||
formats, field_preference=('height', 'width', 'tbr', 'format_id'))
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False)
|
||||
info = self._search_json_ld(webpage, video_id, default={})
|
||||
|
||||
return {
|
||||
if not info.get('title'):
|
||||
info['title'] = player['title']
|
||||
|
||||
return merge_dicts(info, {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'age_limit': self._family_friendly_search(webpage),
|
||||
'thumbnail': url_or_none(options.get('poster')),
|
||||
'duration': int_or_none(player.get('duration')),
|
||||
'formats': formats,
|
||||
}
|
||||
})
|
||||
|
@ -21,7 +21,7 @@ from ..compat import compat_etree_fromstring
|
||||
|
||||
class ARDMediathekIE(InfoExtractor):
|
||||
IE_NAME = 'ARD:mediathek'
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
_VALID_URL = r'^https?://(?:(?:www\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
|
||||
|
||||
_TESTS = [{
|
||||
# available till 26.07.2022
|
||||
@ -37,6 +37,9 @@ class ARDMediathekIE(InfoExtractor):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# audio
|
||||
'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
|
||||
@ -282,3 +285,76 @@ class ARDIE(InfoExtractor):
|
||||
'upload_date': upload_date,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class ARDBetaMediathekIE(InfoExtractor):
|
||||
_VALID_URL = r'https://beta\.ardmediathek\.de/[a-z]+/player/(?P<video_id>[a-zA-Z0-9]+)/(?P<display_id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
|
||||
'md5': '2d02d996156ea3c397cfc5036b5d7f8f',
|
||||
'info_dict': {
|
||||
'display_id': 'die-robuste-roswita',
|
||||
'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
||||
'title': 'Tatort: Die robuste Roswita',
|
||||
'description': r're:^Der Mord.*trüber ist als die Ilm.',
|
||||
'duration': 5316,
|
||||
'thumbnail': 'https://img.ardmediathek.de/standard/00/55/43/59/34/-1774185891/16x9/960?mandant=ard',
|
||||
'upload_date': '20180826',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('video_id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
|
||||
data = self._parse_json(data_json, display_id)
|
||||
|
||||
res = {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
}
|
||||
formats = []
|
||||
for widget in data.values():
|
||||
if widget.get('_geoblocked'):
|
||||
raise ExtractorError('This video is not available due to geoblocking', expected=True)
|
||||
|
||||
if '_duration' in widget:
|
||||
res['duration'] = widget['_duration']
|
||||
if 'clipTitle' in widget:
|
||||
res['title'] = widget['clipTitle']
|
||||
if '_previewImage' in widget:
|
||||
res['thumbnail'] = widget['_previewImage']
|
||||
if 'broadcastedOn' in widget:
|
||||
res['upload_date'] = unified_strdate(widget['broadcastedOn'])
|
||||
if 'synopsis' in widget:
|
||||
res['description'] = widget['synopsis']
|
||||
if '_subtitleUrl' in widget:
|
||||
res['subtitles'] = {'de': [{
|
||||
'ext': 'ttml',
|
||||
'url': widget['_subtitleUrl'],
|
||||
}]}
|
||||
if '_quality' in widget:
|
||||
format_url = widget['_stream']['json'][0]
|
||||
|
||||
if format_url.endswith('.f4m'):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
format_url + '?hdcore=3.11.0',
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
elif format_url.endswith('m3u8'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': 'http-' + widget['_quality'],
|
||||
'url': format_url,
|
||||
'preference': 10, # Plain HTTP, that's nice
|
||||
})
|
||||
|
||||
self._sort_formats(formats)
|
||||
res['formats'] = formats
|
||||
|
||||
return res
|
||||
|
@ -8,7 +8,6 @@ from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
remove_end,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@ -34,19 +33,40 @@ class AsianCrushIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
data = self._download_json(
|
||||
'https://www.asiancrush.com/wp-admin/admin-ajax.php', video_id,
|
||||
data=urlencode_postdata({
|
||||
'postid': video_id,
|
||||
'action': 'get_channel_kaltura_vars',
|
||||
}))
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
entry_id = data['entry_id']
|
||||
entry_id, partner_id, title = [None] * 3
|
||||
|
||||
vars = self._parse_json(
|
||||
self._search_regex(
|
||||
r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
|
||||
default='{}'), video_id, fatal=False)
|
||||
if vars:
|
||||
entry_id = vars.get('entry_id')
|
||||
partner_id = vars.get('partner_id')
|
||||
title = vars.get('vid_label')
|
||||
|
||||
if not entry_id:
|
||||
entry_id = self._search_regex(
|
||||
r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id')
|
||||
|
||||
player = self._download_webpage(
|
||||
'https://api.asiancrush.com/embeddedVideoPlayer', video_id,
|
||||
query={'id': entry_id})
|
||||
|
||||
kaltura_id = self._search_regex(
|
||||
r'entry_id["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', player,
|
||||
'kaltura id', group='id')
|
||||
|
||||
if not partner_id:
|
||||
partner_id = self._search_regex(
|
||||
r'/p(?:artner_id)?/(\d+)', player, 'partner id',
|
||||
default='513551')
|
||||
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (data['partner_id'], entry_id),
|
||||
ie=KalturaIE.ie_key(), video_id=entry_id,
|
||||
video_title=data.get('vid_label'))
|
||||
'kaltura:%s:%s' % (partner_id, kaltura_id),
|
||||
ie=KalturaIE.ie_key(), video_id=kaltura_id,
|
||||
video_title=title)
|
||||
|
||||
|
||||
class AsianCrushPlaylistIE(InfoExtractor):
|
||||
|
@ -28,8 +28,10 @@ class ATVAtIE(InfoExtractor):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_data = self._parse_json(unescapeHTML(self._search_regex(
|
||||
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="([^"]+)"',
|
||||
webpage, 'player data')), display_id)['config']['initial_video']
|
||||
[r'flashPlayerOptions\s*=\s*(["\'])(?P<json>(?:(?!\1).)+)\1',
|
||||
r'class="[^"]*jsb_video/FlashPlayer[^"]*"[^>]+data-jsb="(?P<json>[^"]+)"'],
|
||||
webpage, 'player data', group='json')),
|
||||
display_id)['config']['initial_video']
|
||||
|
||||
video_id = video_data['id']
|
||||
video_title = video_data['title']
|
||||
|
@ -1,213 +1,90 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
get_element_by_class,
|
||||
get_element_by_id,
|
||||
strip_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class AZMedienBaseIE(InfoExtractor):
|
||||
def _kaltura_video(self, partner_id, entry_id):
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (partner_id, entry_id), ie=KalturaIE.ie_key(),
|
||||
video_id=entry_id)
|
||||
|
||||
|
||||
class AZMedienIE(AZMedienBaseIE):
|
||||
class AZMedienIE(InfoExtractor):
|
||||
IE_DESC = 'AZ Medien videos'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
(?P<host>
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
[0-9]+-show-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-episode-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-segment-(?:[^/\#]+\#)?|
|
||||
\#
|
||||
)|
|
||||
\#
|
||||
[^/]+/
|
||||
(?P<id>
|
||||
[^/]+-(?P<article_id>\d+)
|
||||
)
|
||||
(?P<id>[^\#]+)
|
||||
(?:
|
||||
\#video=
|
||||
(?P<kaltura_id>
|
||||
[_0-9a-z]+
|
||||
)
|
||||
)?
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# URL with 'segment'
|
||||
'url': 'http://www.telezueri.ch/62-show-zuerinews/13772-episode-sonntag-18-dezember-2016/32419-segment-massenabweisungen-beim-hiltl-club-wegen-pelzboom',
|
||||
'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
|
||||
'info_dict': {
|
||||
'id': '1_2444peh4',
|
||||
'id': '1_anruz3wy',
|
||||
'ext': 'mp4',
|
||||
'title': 'Massenabweisungen beim Hiltl Club wegen Pelzboom',
|
||||
'description': 'md5:9ea9dd1b159ad65b36ddcf7f0d7c76a8',
|
||||
'uploader_id': 'TeleZ?ri',
|
||||
'upload_date': '20161218',
|
||||
'timestamp': 1482084490,
|
||||
'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
|
||||
'description': 'md5:dd9f96751ec9c35e409a698a328402f3',
|
||||
'uploader_id': 'TVOnline',
|
||||
'upload_date': '20180930',
|
||||
'timestamp': 1538328802,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
# URL with 'segment' and fragment:
|
||||
'url': 'http://www.telebaern.tv/118-show-news/14240-episode-dienstag-17-januar-2017/33666-segment-achtung-gefahr#zu-wenig-pflegerinnen-und-pfleger',
|
||||
'only_matching': True
|
||||
}, {
|
||||
# URL with 'episode' and fragment:
|
||||
'url': 'http://www.telem1.ch/47-show-sonntalk/13986-episode-soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz#soldaten-fuer-grenzschutz-energiestrategie-obama-bilanz',
|
||||
'only_matching': True
|
||||
}, {
|
||||
# URL with 'show' and fragment:
|
||||
'url': 'http://www.telezueri.ch/66-show-sonntalk#burka-plakate-trump-putin-china-besuch',
|
||||
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
||||
'only_matching': True
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'<script[^>]+src=["\'](?:https?:)?//(?:[^/]+\.)?kaltura\.com(?:/[^/]+)*/(?:p|partner_id)/([0-9]+)',
|
||||
webpage, 'kaltura partner id')
|
||||
entry_id = self._html_search_regex(
|
||||
r'<a[^>]+data-id=(["\'])(?P<id>(?:(?!\1).)+)\1[^>]+data-slug=["\']%s'
|
||||
% re.escape(video_id), webpage, 'kaltura entry id', group='id')
|
||||
|
||||
return self._kaltura_video(partner_id, entry_id)
|
||||
|
||||
|
||||
class AZMedienPlaylistIE(AZMedienBaseIE):
|
||||
IE_DESC = 'AZ Medien playlists'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
(?P<id>[0-9]+-
|
||||
(?:
|
||||
show|
|
||||
topic|
|
||||
themen
|
||||
)-[^/\#]+
|
||||
(?:
|
||||
/[0-9]+-episode-[^/\#]+
|
||||
)?
|
||||
)$
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
# URL with 'episode'
|
||||
'url': 'http://www.telebaern.tv/118-show-news/13735-episode-donnerstag-15-dezember-2016',
|
||||
'info_dict': {
|
||||
'id': '118-show-news/13735-episode-donnerstag-15-dezember-2016',
|
||||
'title': 'News - Donnerstag, 15. Dezember 2016',
|
||||
},
|
||||
'playlist_count': 9,
|
||||
}, {
|
||||
# URL with 'themen'
|
||||
'url': 'http://www.telem1.ch/258-themen-tele-m1-classics',
|
||||
'info_dict': {
|
||||
'id': '258-themen-tele-m1-classics',
|
||||
'title': 'Tele M1 Classics',
|
||||
},
|
||||
'playlist_mincount': 15,
|
||||
}, {
|
||||
# URL with 'topic', contains nested playlists
|
||||
'url': 'http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# URL with 'show' only
|
||||
'url': 'http://www.telezueri.ch/86-show-talktaeglich',
|
||||
'only_matching': True
|
||||
}]
|
||||
_PARTNER_ID = '1719221'
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, show_id)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
entry_id = mobj.group('kaltura_id')
|
||||
|
||||
entries = []
|
||||
if not entry_id:
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
api_path = self._search_regex(
|
||||
r'["\']apiPath["\']\s*:\s*["\']([^"^\']+)["\']',
|
||||
webpage, 'api path')
|
||||
api_url = 'https://www.%s%s' % (mobj.group('host'), api_path)
|
||||
payload = {
|
||||
'query': '''query VideoContext($articleId: ID!) {
|
||||
article: node(id: $articleId) {
|
||||
... on Article {
|
||||
mainAssetRelation {
|
||||
asset {
|
||||
... on VideoAsset {
|
||||
kalturaId
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}''',
|
||||
'variables': {'articleId': 'Article:%s' % mobj.group('article_id')},
|
||||
}
|
||||
json_data = self._download_json(
|
||||
api_url, video_id, headers={
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
data=json.dumps(payload).encode())
|
||||
entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId']
|
||||
|
||||
partner_id = self._search_regex(
|
||||
r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
|
||||
webpage, 'kaltura partner id', default=None)
|
||||
|
||||
if partner_id:
|
||||
entries = [
|
||||
self._kaltura_video(partner_id, m.group('id'))
|
||||
for m in re.finditer(
|
||||
r'data-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage)]
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
self.url_result(m.group('url'), ie=AZMedienIE.ie_key())
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+data-real=(["\'])(?P<url>http.+?)\1', webpage)]
|
||||
|
||||
if not entries:
|
||||
entries = [
|
||||
# May contain nested playlists (e.g. [1]) thus no explicit
|
||||
# ie_key
|
||||
# 1. http://www.telezueri.ch/219-topic-aera-trump-hat-offiziell-begonnen)
|
||||
self.url_result(urljoin(url, m.group('url')))
|
||||
for m in re.finditer(
|
||||
r'<a[^>]+name=[^>]+href=(["\'])(?P<url>/.+?)\1', webpage)]
|
||||
|
||||
title = self._search_regex(
|
||||
r'episodeShareTitle\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
|
||||
webpage, 'title',
|
||||
default=strip_or_none(get_element_by_id(
|
||||
'video-title', webpage)), group='title')
|
||||
|
||||
return self.playlist_result(entries, show_id, title)
|
||||
|
||||
|
||||
class AZMedienShowPlaylistIE(AZMedienBaseIE):
|
||||
IE_DESC = 'AZ Medien show playlists'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
telezueri\.ch|
|
||||
telebaern\.tv|
|
||||
telem1\.ch
|
||||
)/
|
||||
(?:
|
||||
all-episodes|
|
||||
alle-episoden
|
||||
)/
|
||||
(?P<id>[^/?#&]+)
|
||||
'''
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.telezueri.ch/all-episodes/astrotalk',
|
||||
'info_dict': {
|
||||
'id': 'astrotalk',
|
||||
'title': 'TeleZüri: AstroTalk - alle episoden',
|
||||
'description': 'md5:4c0f7e7d741d906004266e295ceb4a26',
|
||||
},
|
||||
'playlist_mincount': 13,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
playlist_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
episodes = get_element_by_class('search-mobile-box', webpage)
|
||||
entries = [self.url_result(
|
||||
urljoin(url, m.group('url'))) for m in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', episodes)]
|
||||
title = self._og_search_title(webpage, fatal=False)
|
||||
description = self._og_search_description(webpage)
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
return self.url_result(
|
||||
'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
|
||||
ie=KalturaIE.ie_key(), video_id=entry_id)
|
||||
|
@ -1,6 +1,5 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
@ -16,15 +15,18 @@ from ..utils import (
|
||||
int_or_none,
|
||||
KNOWN_EXTENSIONS,
|
||||
parse_filesize,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
unified_strdate,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BandcampIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://.*?\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
|
||||
'md5': 'c557841d5e50261777a6585648adf439',
|
||||
@ -36,13 +38,44 @@ class BandcampIE(InfoExtractor):
|
||||
},
|
||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||
}, {
|
||||
# free download
|
||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||
'md5': '0369ace6b939f0927e62c67a1a8d9fa7',
|
||||
'md5': '853e35bf34aa1d6fe2615ae612564b36',
|
||||
'info_dict': {
|
||||
'id': '2650410135',
|
||||
'ext': 'aiff',
|
||||
'title': 'Ben Prunty - Lanius (Battle)',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Ben Prunty',
|
||||
'timestamp': 1396508491,
|
||||
'upload_date': '20140403',
|
||||
'release_date': '20140403',
|
||||
'duration': 260.877,
|
||||
'track': 'Lanius (Battle)',
|
||||
'track_number': 1,
|
||||
'track_id': '2650410135',
|
||||
'artist': 'Ben Prunty',
|
||||
'album': 'FTL: Advanced Edition Soundtrack',
|
||||
},
|
||||
}, {
|
||||
# no free download, mp3 128
|
||||
'url': 'https://relapsealumni.bandcamp.com/track/hail-to-fire',
|
||||
'md5': 'fec12ff55e804bb7f7ebeb77a800c8b7',
|
||||
'info_dict': {
|
||||
'id': '2584466013',
|
||||
'ext': 'mp3',
|
||||
'title': 'Mastodon - Hail to Fire',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Mastodon',
|
||||
'timestamp': 1322005399,
|
||||
'upload_date': '20111122',
|
||||
'release_date': '20040207',
|
||||
'duration': 120.79,
|
||||
'track': 'Hail to Fire',
|
||||
'track_number': 5,
|
||||
'track_id': '2584466013',
|
||||
'artist': 'Mastodon',
|
||||
'album': 'Call of the Mastodon',
|
||||
},
|
||||
}]
|
||||
|
||||
@ -51,19 +84,23 @@ class BandcampIE(InfoExtractor):
|
||||
title = mobj.group('title')
|
||||
webpage = self._download_webpage(url, title)
|
||||
thumbnail = self._html_search_meta('og:image', webpage, default=None)
|
||||
m_download = re.search(r'freeDownloadPage: "(.*?)"', webpage)
|
||||
if not m_download:
|
||||
m_trackinfo = re.search(r'trackinfo: (.+),\s*?\n', webpage)
|
||||
if m_trackinfo:
|
||||
json_code = m_trackinfo.group(1)
|
||||
data = json.loads(json_code)[0]
|
||||
track_id = compat_str(data['id'])
|
||||
|
||||
if not data.get('file'):
|
||||
raise ExtractorError('Not streamable', video_id=track_id, expected=True)
|
||||
track_id = None
|
||||
track = None
|
||||
track_number = None
|
||||
duration = None
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in data['file'].items():
|
||||
formats = []
|
||||
track_info = self._parse_json(
|
||||
self._search_regex(
|
||||
r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n',
|
||||
webpage, 'track info', default='{}'), title)
|
||||
if track_info:
|
||||
file_ = track_info.get('file')
|
||||
if isinstance(file_, dict):
|
||||
for format_id, format_url in file_.items():
|
||||
if not url_or_none(format_url):
|
||||
continue
|
||||
ext, abr_str = format_id.split('-', 1)
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
@ -73,85 +110,110 @@ class BandcampIE(InfoExtractor):
|
||||
'acodec': ext,
|
||||
'abr': int_or_none(abr_str),
|
||||
})
|
||||
track = track_info.get('title')
|
||||
track_id = str_or_none(track_info.get('track_id') or track_info.get('id'))
|
||||
track_number = int_or_none(track_info.get('track_num'))
|
||||
duration = float_or_none(track_info.get('duration'))
|
||||
|
||||
self._sort_formats(formats)
|
||||
def extract(key):
|
||||
return self._search_regex(
|
||||
r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key,
|
||||
webpage, key, default=None, group='value')
|
||||
|
||||
return {
|
||||
'id': track_id,
|
||||
'title': data['title'],
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
'duration': float_or_none(data.get('duration')),
|
||||
}
|
||||
else:
|
||||
raise ExtractorError('No free songs found')
|
||||
artist = extract('artist')
|
||||
album = extract('album_title')
|
||||
timestamp = unified_timestamp(
|
||||
extract('publish_date') or extract('album_publish_date'))
|
||||
release_date = unified_strdate(extract('album_release_date'))
|
||||
|
||||
download_link = m_download.group(1)
|
||||
video_id = self._search_regex(
|
||||
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
|
||||
webpage, 'video id')
|
||||
download_link = self._search_regex(
|
||||
r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'download link', default=None, group='url')
|
||||
if download_link:
|
||||
track_id = self._search_regex(
|
||||
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
|
||||
webpage, 'track id')
|
||||
|
||||
download_webpage = self._download_webpage(
|
||||
download_link, video_id, 'Downloading free downloads page')
|
||||
download_webpage = self._download_webpage(
|
||||
download_link, track_id, 'Downloading free downloads page')
|
||||
|
||||
blob = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
||||
'blob', group='blob'),
|
||||
video_id, transform_source=unescapeHTML)
|
||||
blob = self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
||||
'blob', group='blob'),
|
||||
track_id, transform_source=unescapeHTML)
|
||||
|
||||
info = blob['digital_items'][0]
|
||||
info = try_get(
|
||||
blob, (lambda x: x['digital_items'][0],
|
||||
lambda x: x['download_items'][0]), dict)
|
||||
if info:
|
||||
downloads = info.get('downloads')
|
||||
if isinstance(downloads, dict):
|
||||
if not track:
|
||||
track = info.get('title')
|
||||
if not artist:
|
||||
artist = info.get('artist')
|
||||
if not thumbnail:
|
||||
thumbnail = info.get('thumb_url')
|
||||
|
||||
downloads = info['downloads']
|
||||
track = info['title']
|
||||
download_formats = {}
|
||||
download_formats_list = blob.get('download_formats')
|
||||
if isinstance(download_formats_list, list):
|
||||
for f in blob['download_formats']:
|
||||
name, ext = f.get('name'), f.get('file_extension')
|
||||
if all(isinstance(x, compat_str) for x in (name, ext)):
|
||||
download_formats[name] = ext.strip('.')
|
||||
|
||||
artist = info.get('artist')
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
for format_id, f in downloads.items():
|
||||
format_url = f.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
# Stat URL generation algorithm is reverse engineered from
|
||||
# download_*_bundle_*.js
|
||||
stat_url = update_url_query(
|
||||
format_url.replace('/download/', '/statdownload/'), {
|
||||
'.rand': int(time.time() * 1000 * random.random()),
|
||||
})
|
||||
format_id = f.get('encoding_name') or format_id
|
||||
stat = self._download_json(
|
||||
stat_url, track_id, 'Downloading %s JSON' % format_id,
|
||||
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
|
||||
fatal=False)
|
||||
if not stat:
|
||||
continue
|
||||
retry_url = url_or_none(stat.get('retry_url'))
|
||||
if not retry_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(retry_url, 'http:'),
|
||||
'ext': download_formats.get(format_id),
|
||||
'format_id': format_id,
|
||||
'format_note': f.get('description'),
|
||||
'filesize': parse_filesize(f.get('size_mb')),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
download_formats = {}
|
||||
for f in blob['download_formats']:
|
||||
name, ext = f.get('name'), f.get('file_extension')
|
||||
if all(isinstance(x, compat_str) for x in (name, ext)):
|
||||
download_formats[name] = ext.strip('.')
|
||||
|
||||
formats = []
|
||||
for format_id, f in downloads.items():
|
||||
format_url = f.get('url')
|
||||
if not format_url:
|
||||
continue
|
||||
# Stat URL generation algorithm is reverse engineered from
|
||||
# download_*_bundle_*.js
|
||||
stat_url = update_url_query(
|
||||
format_url.replace('/download/', '/statdownload/'), {
|
||||
'.rand': int(time.time() * 1000 * random.random()),
|
||||
})
|
||||
format_id = f.get('encoding_name') or format_id
|
||||
stat = self._download_json(
|
||||
stat_url, video_id, 'Downloading %s JSON' % format_id,
|
||||
transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
|
||||
fatal=False)
|
||||
if not stat:
|
||||
continue
|
||||
retry_url = url_or_none(stat.get('retry_url'))
|
||||
if not retry_url:
|
||||
continue
|
||||
formats.append({
|
||||
'url': self._proto_relative_url(retry_url, 'http:'),
|
||||
'ext': download_formats.get(format_id),
|
||||
'format_id': format_id,
|
||||
'format_note': f.get('description'),
|
||||
'filesize': parse_filesize(f.get('size_mb')),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = '%s - %s' % (artist, track) if artist else track
|
||||
|
||||
if not duration:
|
||||
duration = float_or_none(self._html_search_meta(
|
||||
'duration', webpage, default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'id': track_id,
|
||||
'title': title,
|
||||
'thumbnail': info.get('thumb_url') or thumbnail,
|
||||
'uploader': info.get('artist'),
|
||||
'artist': artist,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': artist,
|
||||
'timestamp': timestamp,
|
||||
'release_date': release_date,
|
||||
'duration': duration,
|
||||
'track': track,
|
||||
'track_number': track_number,
|
||||
'track_id': track_id,
|
||||
'artist': artist,
|
||||
'album': album,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
@ -33,9 +33,11 @@ class BitChuteIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://www.bitchute.com/video/%s' % video_id, video_id)
|
||||
'https://www.bitchute.com/video/%s' % video_id, video_id, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
|
||||
})
|
||||
|
||||
title = self._search_regex(
|
||||
title = self._html_search_regex(
|
||||
(r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'description', webpage, 'title',
|
||||
|
@ -1,8 +1,10 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
import struct
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .adobepass import AdobePassIE
|
||||
@ -310,6 +312,10 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
'Cannot find playerKey= variable. Did you forget quotes in a shell invocation?',
|
||||
expected=True)
|
||||
|
||||
def _brightcove_new_url_result(self, publisher_id, video_id):
|
||||
brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
|
||||
return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
|
||||
|
||||
def _get_video_info(self, video_id, query, referer=None):
|
||||
headers = {}
|
||||
linkBase = query.get('linkBaseURL')
|
||||
@ -323,6 +329,28 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage,
|
||||
'error message', default=None)
|
||||
if error_msg is not None:
|
||||
publisher_id = query.get('publisherId')
|
||||
if publisher_id and publisher_id[0].isdigit():
|
||||
publisher_id = publisher_id[0]
|
||||
if not publisher_id:
|
||||
player_key = query.get('playerKey')
|
||||
if player_key and ',' in player_key[0]:
|
||||
player_key = player_key[0]
|
||||
else:
|
||||
player_id = query.get('playerID')
|
||||
if player_id and player_id[0].isdigit():
|
||||
player_page = self._download_webpage(
|
||||
'http://link.brightcove.com/services/player/bcpid' + player_id[0],
|
||||
video_id, headers=headers, fatal=False)
|
||||
if player_page:
|
||||
player_key = self._search_regex(
|
||||
r'<param\s+name="playerKey"\s+value="([\w~,-]+)"',
|
||||
player_page, 'player key', fatal=False)
|
||||
if player_key:
|
||||
enc_pub_id = player_key.split(',')[1].replace('~', '=')
|
||||
publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
|
||||
if publisher_id:
|
||||
return self._brightcove_new_url_result(publisher_id, video_id)
|
||||
raise ExtractorError(
|
||||
'brightcove said: %s' % error_msg, expected=True)
|
||||
|
||||
@ -444,8 +472,12 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||
else:
|
||||
return ad_info
|
||||
|
||||
if 'url' not in info and not info.get('formats'):
|
||||
raise ExtractorError('Unable to extract video url for %s' % video_id)
|
||||
if not info.get('url') and not info.get('formats'):
|
||||
uploader_id = info.get('uploader_id')
|
||||
if uploader_id:
|
||||
info.update(self._brightcove_new_url_result(uploader_id, video_id))
|
||||
else:
|
||||
raise ExtractorError('Unable to extract video url for %s' % video_id)
|
||||
return info
|
||||
|
||||
|
||||
|
@ -1,19 +1,10 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
|
||||
|
||||
_translation_table = {
|
||||
'a': 'h', 'd': 'e', 'e': 'v', 'f': 'o', 'g': 'f', 'i': 'd', 'l': 'n',
|
||||
'm': 'a', 'n': 'm', 'p': 'u', 'q': 't', 'r': 's', 'v': 'p', 'x': 'r',
|
||||
'y': 'l', 'z': 'i',
|
||||
'$': ':', '&': '.', '(': '=', '^': '&', '=': '/',
|
||||
}
|
||||
|
||||
|
||||
def _decode(s):
|
||||
return ''.join(_translation_table.get(c, c) for c in s)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CliphunterIE(InfoExtractor):
|
||||
@ -60,14 +51,14 @@ class CliphunterIE(InfoExtractor):
|
||||
|
||||
formats = []
|
||||
for format_id, f in gexo_files.items():
|
||||
video_url = f.get('url')
|
||||
video_url = url_or_none(f.get('url'))
|
||||
if not video_url:
|
||||
continue
|
||||
fmt = f.get('fmt')
|
||||
height = f.get('h')
|
||||
format_id = '%s_%sp' % (fmt, height) if fmt and height else format_id
|
||||
formats.append({
|
||||
'url': _decode(video_url),
|
||||
'url': video_url,
|
||||
'format_id': format_id,
|
||||
'width': int_or_none(f.get('w')),
|
||||
'height': int_or_none(height),
|
||||
|
@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import smuggle_url
|
||||
|
||||
@ -34,3 +35,32 @@ class CNBCIE(InfoExtractor):
|
||||
{'force_smil_url': True}),
|
||||
'id': video_id,
|
||||
}
|
||||
|
||||
|
||||
class CNBCVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
|
||||
'info_dict': {
|
||||
'id': '7000031301',
|
||||
'ext': 'mp4',
|
||||
'title': "Trump: I don't necessarily agree with raising rates",
|
||||
'description': 'md5:878d8f0b4ebb5bb1dda3514b91b49de3',
|
||||
'timestamp': 1531958400,
|
||||
'upload_date': '20180719',
|
||||
'uploader': 'NBCU-CNBC',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(
|
||||
r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
|
||||
'video id')
|
||||
return self.url_result(
|
||||
'http://video.cnbc.com/gallery/?video=%s' % video_id,
|
||||
CNBCIE.ie_key())
|
||||
|
@ -69,6 +69,7 @@ from ..utils import (
|
||||
update_url_query,
|
||||
urljoin,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
xpath_element,
|
||||
xpath_text,
|
||||
xpath_with_ns,
|
||||
@ -211,6 +212,11 @@ class InfoExtractor(object):
|
||||
If not explicitly set, calculated from timestamp.
|
||||
uploader_id: Nickname or id of the video uploader.
|
||||
uploader_url: Full URL to a personal webpage of the video uploader.
|
||||
channel: Full name of the channel the video is uploaded on.
|
||||
Note that channel fields may or may not repeat uploader
|
||||
fields. This depends on a particular extractor.
|
||||
channel_id: Id of the channel.
|
||||
channel_url: Full URL to a channel webpage.
|
||||
location: Physical location where the video was filmed.
|
||||
subtitles: The available subtitles as a dictionary in the format
|
||||
{tag: subformats}. "tag" is usually a language code, and
|
||||
@ -600,6 +606,11 @@ class InfoExtractor(object):
|
||||
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
|
||||
if isinstance(err, compat_urllib_error.HTTPError):
|
||||
if self.__can_accept_status_code(err, expected_status):
|
||||
# Retain reference to error to prevent file object from
|
||||
# being closed before it can be read. Works around the
|
||||
# effects of <https://bugs.python.org/issue15002>
|
||||
# introduced in Python 3.4.1.
|
||||
err.fp._error = err
|
||||
return err.fp
|
||||
|
||||
if errnote is False:
|
||||
@ -1205,10 +1216,10 @@ class InfoExtractor(object):
|
||||
def extract_video_object(e):
|
||||
assert e['@type'] == 'VideoObject'
|
||||
info.update({
|
||||
'url': e.get('contentUrl'),
|
||||
'url': url_or_none(e.get('contentUrl')),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
|
||||
'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
'filesize': float_or_none(e.get('contentSize')),
|
||||
@ -1698,9 +1709,9 @@ class InfoExtractor(object):
|
||||
# However, this is not always respected, for example, [2]
|
||||
# contains EXT-X-STREAM-INF tag which references AUDIO
|
||||
# rendition group but does not have CODECS and despite
|
||||
# referencing audio group an audio group, it represents
|
||||
# a complete (with audio and video) format. So, for such cases
|
||||
# we will ignore references to rendition groups and treat them
|
||||
# referencing an audio group it represents a complete
|
||||
# (with audio and video) format. So, for such cases we will
|
||||
# ignore references to rendition groups and treat them
|
||||
# as complete formats.
|
||||
if audio_group_id and codecs and f.get('vcodec') != 'none':
|
||||
audio_group = groups.get(audio_group_id)
|
||||
|
@ -3,11 +3,13 @@ from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import json
|
||||
import xml.etree.ElementTree as etree
|
||||
import zlib
|
||||
|
||||
from hashlib import sha1
|
||||
from math import pow, sqrt, floor
|
||||
from .common import InfoExtractor
|
||||
from .vrv import VRVIE
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_etree_fromstring,
|
||||
@ -18,6 +20,8 @@ from ..compat import (
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
bytes_to_intlist,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
intlist_to_bytes,
|
||||
int_or_none,
|
||||
lowercase_escape,
|
||||
@ -26,7 +30,6 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
xpath_text,
|
||||
extract_attributes,
|
||||
)
|
||||
from ..aes import (
|
||||
aes_cbc_decrypt,
|
||||
@ -43,7 +46,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
data['req'] = 'RpcApi' + method
|
||||
data = compat_urllib_parse_urlencode(data).encode('utf-8')
|
||||
return self._download_xml(
|
||||
'http://www.crunchyroll.com/xml/',
|
||||
'https://www.crunchyroll.com/xml/',
|
||||
video_id, note, fatal=False, data=data, headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
@ -139,7 +142,8 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||
parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
|
||||
|
||||
|
||||
class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||
IE_NAME = 'crunchyroll'
|
||||
_VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
|
||||
@ -148,7 +152,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
|
||||
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
|
||||
'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'uploader': 'Yomiuri Telecasting Corporation (YTV)',
|
||||
'upload_date': '20131013',
|
||||
'url': 're:(?!.*&)',
|
||||
@ -221,7 +225,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
|
||||
'info_dict': {
|
||||
'id': '535080',
|
||||
'ext': 'mp4',
|
||||
'title': '11eyes Episode 1 – Piros éjszaka - Red Night',
|
||||
'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka',
|
||||
'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
|
||||
'uploader': 'Marvelous AQL Inc.',
|
||||
'upload_date': '20091021',
|
||||
@ -395,7 +399,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'Downloading subtitles for ' + sub_name, data={
|
||||
'subtitle_script_id': sub_id,
|
||||
})
|
||||
if sub_doc is None:
|
||||
if not isinstance(sub_doc, etree.Element):
|
||||
continue
|
||||
sid = sub_doc.get('id')
|
||||
iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
|
||||
@ -437,13 +441,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
if 'To view this, please log in to verify you are 18 or older.' in webpage:
|
||||
self.raise_login_required()
|
||||
|
||||
media = self._parse_json(self._search_regex(
|
||||
r'vilos\.config\.media\s*=\s*({.+?});',
|
||||
webpage, 'vilos media', default='{}'), video_id)
|
||||
media_metadata = media.get('metadata') or {}
|
||||
|
||||
language = self._search_regex(
|
||||
r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P<lang>(?:(?!\1).)+)\1',
|
||||
webpage, 'language', default=None, group='lang')
|
||||
|
||||
video_title = self._html_search_regex(
|
||||
r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
|
||||
webpage, 'video_title')
|
||||
video_title = re.sub(r' {2,}', ' ', video_title)
|
||||
video_description = self._parse_json(self._html_search_regex(
|
||||
video_description = (self._parse_json(self._html_search_regex(
|
||||
r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
|
||||
webpage, 'description', default='{}'), video_id).get('description')
|
||||
webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
|
||||
if video_description:
|
||||
video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
|
||||
video_upload_date = self._html_search_regex(
|
||||
@ -456,92 +469,113 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
[r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
|
||||
webpage, 'video_uploader', fatal=False)
|
||||
|
||||
available_fmts = []
|
||||
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
|
||||
attrs = extract_attributes(a)
|
||||
href = attrs.get('href')
|
||||
if href and '/freetrial' in href:
|
||||
continue
|
||||
available_fmts.append(fmt)
|
||||
if not available_fmts:
|
||||
for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
|
||||
available_fmts = re.findall(p, webpage)
|
||||
if available_fmts:
|
||||
break
|
||||
video_encode_ids = []
|
||||
formats = []
|
||||
for fmt in available_fmts:
|
||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||
video_format = fmt + 'p'
|
||||
stream_infos = []
|
||||
streamdata = self._call_rpc_api(
|
||||
'VideoPlayer_GetStandardConfig', video_id,
|
||||
'Downloading media info for %s' % video_format, data={
|
||||
'media_id': video_id,
|
||||
'video_format': stream_format,
|
||||
'video_quality': stream_quality,
|
||||
'current_page': url,
|
||||
})
|
||||
if streamdata is not None:
|
||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||
if stream_info is not None:
|
||||
for stream in media.get('streams', []):
|
||||
audio_lang = stream.get('audio_lang')
|
||||
hardsub_lang = stream.get('hardsub_lang')
|
||||
vrv_formats = self._extract_vrv_formats(
|
||||
stream.get('url'), video_id, stream.get('format'),
|
||||
audio_lang, hardsub_lang)
|
||||
for f in vrv_formats:
|
||||
if not hardsub_lang:
|
||||
f['preference'] = 1
|
||||
language_preference = 0
|
||||
if audio_lang == language:
|
||||
language_preference += 1
|
||||
if hardsub_lang == language:
|
||||
language_preference += 1
|
||||
if language_preference:
|
||||
f['language_preference'] = language_preference
|
||||
formats.extend(vrv_formats)
|
||||
if not formats:
|
||||
available_fmts = []
|
||||
for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
|
||||
attrs = extract_attributes(a)
|
||||
href = attrs.get('href')
|
||||
if href and '/freetrial' in href:
|
||||
continue
|
||||
available_fmts.append(fmt)
|
||||
if not available_fmts:
|
||||
for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
|
||||
available_fmts = re.findall(p, webpage)
|
||||
if available_fmts:
|
||||
break
|
||||
if not available_fmts:
|
||||
available_fmts = self._FORMAT_IDS.keys()
|
||||
video_encode_ids = []
|
||||
|
||||
for fmt in available_fmts:
|
||||
stream_quality, stream_format = self._FORMAT_IDS[fmt]
|
||||
video_format = fmt + 'p'
|
||||
stream_infos = []
|
||||
streamdata = self._call_rpc_api(
|
||||
'VideoPlayer_GetStandardConfig', video_id,
|
||||
'Downloading media info for %s' % video_format, data={
|
||||
'media_id': video_id,
|
||||
'video_format': stream_format,
|
||||
'video_quality': stream_quality,
|
||||
'current_page': url,
|
||||
})
|
||||
if isinstance(streamdata, etree.Element):
|
||||
stream_info = streamdata.find('./{default}preload/stream_info')
|
||||
if stream_info is not None:
|
||||
stream_infos.append(stream_info)
|
||||
stream_info = self._call_rpc_api(
|
||||
'VideoEncode_GetStreamInfo', video_id,
|
||||
'Downloading stream info for %s' % video_format, data={
|
||||
'media_id': video_id,
|
||||
'video_format': stream_format,
|
||||
'video_encode_quality': stream_quality,
|
||||
})
|
||||
if isinstance(stream_info, etree.Element):
|
||||
stream_infos.append(stream_info)
|
||||
stream_info = self._call_rpc_api(
|
||||
'VideoEncode_GetStreamInfo', video_id,
|
||||
'Downloading stream info for %s' % video_format, data={
|
||||
'media_id': video_id,
|
||||
'video_format': stream_format,
|
||||
'video_encode_quality': stream_quality,
|
||||
})
|
||||
if stream_info is not None:
|
||||
stream_infos.append(stream_info)
|
||||
for stream_info in stream_infos:
|
||||
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
||||
if video_encode_id in video_encode_ids:
|
||||
continue
|
||||
video_encode_ids.append(video_encode_id)
|
||||
for stream_info in stream_infos:
|
||||
video_encode_id = xpath_text(stream_info, './video_encode_id')
|
||||
if video_encode_id in video_encode_ids:
|
||||
continue
|
||||
video_encode_ids.append(video_encode_id)
|
||||
|
||||
video_file = xpath_text(stream_info, './file')
|
||||
if not video_file:
|
||||
continue
|
||||
if video_file.startswith('http'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
|
||||
video_url = xpath_text(stream_info, './host')
|
||||
if not video_url:
|
||||
continue
|
||||
metadata = stream_info.find('./metadata')
|
||||
format_info = {
|
||||
'format': video_format,
|
||||
'height': int_or_none(xpath_text(metadata, './height')),
|
||||
'width': int_or_none(xpath_text(metadata, './width')),
|
||||
}
|
||||
|
||||
if '.fplive.net/' in video_url:
|
||||
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
|
||||
parsed_video_url = compat_urlparse.urlparse(video_url)
|
||||
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
||||
netloc='v.lvlt.crcdn.net',
|
||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
|
||||
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||
format_info.update({
|
||||
'format_id': 'http-' + video_format,
|
||||
'url': direct_video_url,
|
||||
})
|
||||
formats.append(format_info)
|
||||
video_file = xpath_text(stream_info, './file')
|
||||
if not video_file:
|
||||
continue
|
||||
if video_file.startswith('http'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
video_file, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
continue
|
||||
|
||||
format_info.update({
|
||||
'format_id': 'rtmp-' + video_format,
|
||||
'url': video_url,
|
||||
'play_path': video_file,
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(format_info)
|
||||
self._sort_formats(formats, ('height', 'width', 'tbr', 'fps'))
|
||||
video_url = xpath_text(stream_info, './host')
|
||||
if not video_url:
|
||||
continue
|
||||
metadata = stream_info.find('./metadata')
|
||||
format_info = {
|
||||
'format': video_format,
|
||||
'height': int_or_none(xpath_text(metadata, './height')),
|
||||
'width': int_or_none(xpath_text(metadata, './width')),
|
||||
}
|
||||
|
||||
if '.fplive.net/' in video_url:
|
||||
video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
|
||||
parsed_video_url = compat_urlparse.urlparse(video_url)
|
||||
direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
|
||||
netloc='v.lvlt.crcdn.net',
|
||||
path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
|
||||
if self._is_valid_url(direct_video_url, video_id, video_format):
|
||||
format_info.update({
|
||||
'format_id': 'http-' + video_format,
|
||||
'url': direct_video_url,
|
||||
})
|
||||
formats.append(format_info)
|
||||
continue
|
||||
|
||||
format_info.update({
|
||||
'format_id': 'rtmp-' + video_format,
|
||||
'url': video_url,
|
||||
'play_path': video_file,
|
||||
'ext': 'flv',
|
||||
})
|
||||
formats.append(format_info)
|
||||
self._sort_formats(formats, ('preference', 'language_preference', 'height', 'width', 'tbr', 'fps'))
|
||||
|
||||
metadata = self._call_rpc_api(
|
||||
'VideoPlayer_GetMediaMetadata', video_id,
|
||||
@ -549,16 +583,38 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'media_id': video_id,
|
||||
})
|
||||
|
||||
subtitles = self.extract_subtitles(video_id, webpage)
|
||||
subtitles = {}
|
||||
for subtitle in media.get('subtitles', []):
|
||||
subtitle_url = subtitle.get('url')
|
||||
if not subtitle_url:
|
||||
continue
|
||||
subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
|
||||
'url': subtitle_url,
|
||||
'ext': subtitle.get('format', 'ass'),
|
||||
})
|
||||
if not subtitles:
|
||||
subtitles = self.extract_subtitles(video_id, webpage)
|
||||
|
||||
# webpage provide more accurate data than series_title from XML
|
||||
series = self._html_search_regex(
|
||||
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
|
||||
webpage, 'series', fatal=False)
|
||||
season = xpath_text(metadata, 'series_title')
|
||||
|
||||
episode = xpath_text(metadata, 'episode_title')
|
||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||
season = episode = episode_number = duration = thumbnail = None
|
||||
|
||||
if isinstance(metadata, etree.Element):
|
||||
season = xpath_text(metadata, 'series_title')
|
||||
episode = xpath_text(metadata, 'episode_title')
|
||||
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
|
||||
duration = float_or_none(media_metadata.get('duration'), 1000)
|
||||
thumbnail = xpath_text(metadata, 'episode_image_url')
|
||||
|
||||
if not episode:
|
||||
episode = media_metadata.get('title')
|
||||
if not episode_number:
|
||||
episode_number = int_or_none(media_metadata.get('episode_number'))
|
||||
if not thumbnail:
|
||||
thumbnail = media_metadata.get('thumbnail', {}).get('url')
|
||||
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
|
||||
@ -568,7 +624,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'description': video_description,
|
||||
'thumbnail': xpath_text(metadata, 'episode_image_url'),
|
||||
'duration': duration,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': video_upload_date,
|
||||
'series': series,
|
||||
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
parse_iso8601,
|
||||
@ -66,9 +67,12 @@ class CWTVIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
data = self._download_json(
|
||||
'http://images.cwtv.com/feed/mobileapp/video-meta/apiversion_8/guid_' + video_id,
|
||||
video_id)['video']
|
||||
video_id)
|
||||
if data.get('result') != 'ok':
|
||||
raise ExtractorError(data['msg'], expected=True)
|
||||
video_data = data['video']
|
||||
title = video_data['title']
|
||||
mpx_url = video_data.get('mpx_url') or 'http://link.theplatform.com/s/cwtv/media/guid/2703454149/%s?formats=M3U' % video_id
|
||||
|
||||
|
@ -49,6 +49,9 @@ class DailyMailIE(InfoExtractor):
|
||||
'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id)
|
||||
|
||||
video_sources = self._download_json(sources_url, video_id)
|
||||
body = video_sources.get('body')
|
||||
if body:
|
||||
video_sources = body
|
||||
|
||||
formats = []
|
||||
for rendition in video_sources['renditions']:
|
||||
|
@ -22,7 +22,10 @@ from ..utils import (
|
||||
parse_iso8601,
|
||||
sanitized_Request,
|
||||
str_to_int,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@ -171,10 +174,25 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||
r'__PLAYER_CONFIG__\s*=\s*({.+?});'],
|
||||
webpage, 'player v5', default=None)
|
||||
if player_v5:
|
||||
player = self._parse_json(player_v5, video_id)
|
||||
metadata = player['metadata']
|
||||
player = self._parse_json(player_v5, video_id, fatal=False) or {}
|
||||
metadata = try_get(player, lambda x: x['metadata'], dict)
|
||||
if not metadata:
|
||||
metadata_url = url_or_none(try_get(
|
||||
player, lambda x: x['context']['metadata_template_url1']))
|
||||
if metadata_url:
|
||||
metadata_url = metadata_url.replace(':videoId', video_id)
|
||||
else:
|
||||
metadata_url = update_url_query(
|
||||
'https://www.dailymotion.com/player/metadata/video/%s'
|
||||
% video_id, {
|
||||
'embedder': url,
|
||||
'integration': 'inline',
|
||||
'GK_PV5_NEON': '1',
|
||||
})
|
||||
metadata = self._download_json(
|
||||
metadata_url, video_id, 'Downloading metadata JSON')
|
||||
|
||||
if metadata.get('error', {}).get('type') == 'password_protected':
|
||||
if try_get(metadata, lambda x: x['error']['type']) == 'password_protected':
|
||||
password = self._downloader.params.get('videopassword')
|
||||
if password:
|
||||
r = int(metadata['id'][1:], 36)
|
||||
|
@ -59,7 +59,7 @@ class DTubeIE(InfoExtractor):
|
||||
try:
|
||||
self.to_screen('%s: Checking %s video format URL' % (video_id, format_id))
|
||||
self._downloader._opener.open(video_url, timeout=5).close()
|
||||
except timeout as e:
|
||||
except timeout:
|
||||
self.to_screen(
|
||||
'%s: %s URL is invalid, skipping' % (video_id, format_id))
|
||||
continue
|
||||
|
@ -9,6 +9,7 @@ from ..utils import (
|
||||
encode_base_n,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_duration,
|
||||
str_to_int,
|
||||
url_or_none,
|
||||
@ -25,10 +26,16 @@ class EpornerIE(InfoExtractor):
|
||||
'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
|
||||
'ext': 'mp4',
|
||||
'title': 'Infamous Tiffany Teen Strip Tease Video',
|
||||
'description': 'md5:764f39abf932daafa37485eb46efa152',
|
||||
'timestamp': 1232520922,
|
||||
'upload_date': '20090121',
|
||||
'duration': 1838,
|
||||
'view_count': int,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'proxy': '127.0.0.1:8118'
|
||||
}
|
||||
}, {
|
||||
# New (May 2016) URL layout
|
||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
|
||||
@ -104,12 +111,15 @@ class EpornerIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
duration = parse_duration(self._html_search_meta('duration', webpage))
|
||||
json_ld = self._search_json_ld(webpage, display_id, default={})
|
||||
|
||||
duration = parse_duration(self._html_search_meta(
|
||||
'duration', webpage, default=None))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'id="cinemaviews">\s*([0-9,]+)\s*<small>views',
|
||||
webpage, 'view count', fatal=False))
|
||||
|
||||
return {
|
||||
return merge_dicts(json_ld, {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
@ -117,4 +127,4 @@ class EpornerIE(InfoExtractor):
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
'age_limit': 18,
|
||||
}
|
||||
})
|
||||
|
@ -54,6 +54,7 @@ from .appletrailers import (
|
||||
from .archiveorg import ArchiveOrgIE
|
||||
from .arkena import ArkenaIE
|
||||
from .ard import (
|
||||
ARDBetaMediathekIE,
|
||||
ARDIE,
|
||||
ARDMediathekIE,
|
||||
)
|
||||
@ -87,11 +88,7 @@ from .awaan import (
|
||||
AWAANLiveIE,
|
||||
AWAANSeasonIE,
|
||||
)
|
||||
from .azmedien import (
|
||||
AZMedienIE,
|
||||
AZMedienPlaylistIE,
|
||||
AZMedienShowPlaylistIE,
|
||||
)
|
||||
from .azmedien import AZMedienIE
|
||||
from .baidu import BaiduVideoIE
|
||||
from .bambuser import BambuserIE, BambuserChannelIE
|
||||
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||
@ -208,7 +205,10 @@ from .cloudy import CloudyIE
|
||||
from .clubic import ClubicIE
|
||||
from .clyp import ClypIE
|
||||
from .cmt import CMTIE
|
||||
from .cnbc import CNBCIE
|
||||
from .cnbc import (
|
||||
CNBCIE,
|
||||
CNBCVideoIE,
|
||||
)
|
||||
from .cnn import (
|
||||
CNNIE,
|
||||
CNNBlogsIE,
|
||||
@ -520,6 +520,7 @@ from .keezmovies import KeezMoviesIE
|
||||
from .ketnet import KetnetIE
|
||||
from .khanacademy import KhanAcademyIE
|
||||
from .kickstarter import KickStarterIE
|
||||
from .kinopoisk import KinoPoiskIE
|
||||
from .keek import KeekIE
|
||||
from .konserthusetplay import KonserthusetPlayIE
|
||||
from .kontrtube import KontrTubeIE
|
||||
@ -538,6 +539,7 @@ from .la7 import LA7IE
|
||||
from .laola1tv import (
|
||||
Laola1TvEmbedIE,
|
||||
Laola1TvIE,
|
||||
EHFTVIE,
|
||||
ITTFIE,
|
||||
)
|
||||
from .lci import LCIIE
|
||||
@ -567,6 +569,10 @@ from .limelight import (
|
||||
LimelightChannelListIE,
|
||||
)
|
||||
from .line import LineTVIE
|
||||
from .linkedin import (
|
||||
LinkedInLearningIE,
|
||||
LinkedInLearningCourseIE,
|
||||
)
|
||||
from .litv import LiTVIE
|
||||
from .liveleak import (
|
||||
LiveLeakIE,
|
||||
@ -740,7 +746,10 @@ from .nonktube import NonkTubeIE
|
||||
from .noovo import NoovoIE
|
||||
from .normalboots import NormalbootsIE
|
||||
from .nosvideo import NosVideoIE
|
||||
from .nova import NovaIE
|
||||
from .nova import (
|
||||
NovaEmbedIE,
|
||||
NovaIE,
|
||||
)
|
||||
from .novamov import (
|
||||
AuroraVidIE,
|
||||
CloudTimeIE,
|
||||
@ -1038,7 +1047,7 @@ from .spike import (
|
||||
)
|
||||
from .stitcher import StitcherIE
|
||||
from .sport5 import Sport5IE
|
||||
from .sportbox import SportBoxEmbedIE
|
||||
from .sportbox import SportBoxIE
|
||||
from .sportdeutschland import SportDeutschlandIE
|
||||
from .springboardplatform import SpringboardPlatformIE
|
||||
from .sprout import SproutIE
|
||||
@ -1081,6 +1090,7 @@ from .teachingchannel import TeachingChannelIE
|
||||
from .teamcoco import TeamcocoIE
|
||||
from .techtalks import TechTalksIE
|
||||
from .ted import TEDIE
|
||||
from .tele5 import Tele5IE
|
||||
from .tele13 import Tele13IE
|
||||
from .telebruxelles import TeleBruxellesIE
|
||||
from .telecinco import TelecincoIE
|
||||
@ -1147,7 +1157,6 @@ from .tv2 import (
|
||||
TV2ArticleIE,
|
||||
)
|
||||
from .tv2hu import TV2HuIE
|
||||
from .tv3 import TV3IE
|
||||
from .tv4 import TV4IE
|
||||
from .tv5mondeplus import TV5MondePlusIE
|
||||
from .tva import TVAIE
|
||||
@ -1177,12 +1186,14 @@ from .tvp import (
|
||||
from .tvplay import (
|
||||
TVPlayIE,
|
||||
ViafreeIE,
|
||||
TVPlayHomeIE,
|
||||
)
|
||||
from .tvplayer import TVPlayerIE
|
||||
from .tweakers import TweakersIE
|
||||
from .twentyfourvideo import TwentyFourVideoIE
|
||||
from .twentymin import TwentyMinutenIE
|
||||
from .twentythreevideo import TwentyThreeVideoIE
|
||||
from .twitcasting import TwitCastingIE
|
||||
from .twitch import (
|
||||
TwitchVideoIE,
|
||||
TwitchChapterIE,
|
||||
@ -1375,6 +1386,7 @@ from .wsj import (
|
||||
WSJIE,
|
||||
WSJArticleIE,
|
||||
)
|
||||
from .wwe import WWEIE
|
||||
from .xbef import XBefIE
|
||||
from .xboxclips import XboxClipsIE
|
||||
from .xfileshare import XFileShareIE
|
||||
@ -1424,6 +1436,7 @@ from .younow import (
|
||||
YouNowMomentIE,
|
||||
)
|
||||
from .youporn import YouPornIE
|
||||
from .yourporn import YourPornIE
|
||||
from .yourupload import YourUploadIE
|
||||
from .youtube import (
|
||||
YoutubeIE,
|
||||
@ -1447,10 +1460,23 @@ from .youtube import (
|
||||
from .zapiks import ZapiksIE
|
||||
from .zaq1 import Zaq1IE
|
||||
from .zattoo import (
|
||||
BBVTVIE,
|
||||
EinsUndEinsTVIE,
|
||||
EWETVIE,
|
||||
GlattvisionTVIE,
|
||||
MNetTVIE,
|
||||
MyVisionTVIE,
|
||||
NetPlusIE,
|
||||
OsnatelTVIE,
|
||||
QuantumTVIE,
|
||||
QuicklineIE,
|
||||
QuicklineLiveIE,
|
||||
SAKTVIE,
|
||||
VTXTVIE,
|
||||
WalyTVIE,
|
||||
ZattooIE,
|
||||
ZattooLiveIE,
|
||||
)
|
||||
from .zdf import ZDFIE, ZDFChannelIE
|
||||
from .zingmp3 import ZingMp3IE
|
||||
from .zype import ZypeIE
|
||||
|
@ -57,7 +57,7 @@ class FacebookIE(InfoExtractor):
|
||||
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
||||
|
||||
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
||||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true'
|
||||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
|
||||
|
@ -3,15 +3,45 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urlparse
|
||||
from ..compat import (
|
||||
compat_b64decode,
|
||||
compat_str,
|
||||
compat_urllib_parse_unquote,
|
||||
compat_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class FourTubeBaseIE(InfoExtractor):
|
||||
_TKN_HOST = 'tkn.kodicdn.com'
|
||||
|
||||
def _extract_formats(self, url, video_id, media_id, sources):
|
||||
token_url = 'https://%s/%s/desktop/%s' % (
|
||||
self._TKN_HOST, media_id, '+'.join(sources))
|
||||
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
tokens = self._download_json(token_url, video_id, data=b'', headers={
|
||||
'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname),
|
||||
'Referer': url,
|
||||
})
|
||||
formats = [{
|
||||
'url': tokens[format]['token'],
|
||||
'format_id': format + 'p',
|
||||
'resolution': format + 'p',
|
||||
'quality': int(format),
|
||||
} for format in sources]
|
||||
self._sort_formats(formats)
|
||||
return formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
kind, video_id, display_id = mobj.group('kind', 'id', 'display_id')
|
||||
@ -68,21 +98,7 @@ class FourTubeBaseIE(InfoExtractor):
|
||||
media_id = params[0]
|
||||
sources = ['%s' % p for p in params[2]]
|
||||
|
||||
token_url = 'https://tkn.kodicdn.com/{0}/desktop/{1}'.format(
|
||||
media_id, '+'.join(sources))
|
||||
|
||||
parsed_url = compat_urlparse.urlparse(url)
|
||||
tokens = self._download_json(token_url, video_id, data=b'', headers={
|
||||
'Origin': '%s://%s' % (parsed_url.scheme, parsed_url.hostname),
|
||||
'Referer': url,
|
||||
})
|
||||
formats = [{
|
||||
'url': tokens[format]['token'],
|
||||
'format_id': format + 'p',
|
||||
'resolution': format + 'p',
|
||||
'quality': int(format),
|
||||
} for format in sources]
|
||||
self._sort_formats(formats)
|
||||
formats = self._extract_formats(url, video_id, media_id, sources)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
@ -164,6 +180,7 @@ class FuxIE(FourTubeBaseIE):
|
||||
class PornTubeIE(FourTubeBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?porntube\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
|
||||
_URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s'
|
||||
_TKN_HOST = 'tkn.porntube.com'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759',
|
||||
'info_dict': {
|
||||
@ -171,13 +188,32 @@ class PornTubeIE(FourTubeBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': 'Teen couple doing anal',
|
||||
'uploader': 'Alexy',
|
||||
'uploader_id': 'Alexy',
|
||||
'uploader_id': '91488',
|
||||
'upload_date': '20150606',
|
||||
'timestamp': 1433595647,
|
||||
'duration': 5052,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'categories': list,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.porntube.com/videos/squirting-teen-ballerina-ecg_1331406',
|
||||
'info_dict': {
|
||||
'id': '1331406',
|
||||
'ext': 'mp4',
|
||||
'title': 'Squirting Teen Ballerina on ECG',
|
||||
'uploader': 'Exploited College Girls',
|
||||
'uploader_id': '665',
|
||||
'channel': 'Exploited College Girls',
|
||||
'channel_id': '665',
|
||||
'upload_date': '20130920',
|
||||
'timestamp': 1379685485,
|
||||
'duration': 851,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
@ -191,6 +227,55 @@ class PornTubeIE(FourTubeBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id, display_id = mobj.group('id', 'display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
video = self._parse_json(
|
||||
self._search_regex(
|
||||
r'INITIALSTATE\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
||||
webpage, 'data', group='value'), video_id,
|
||||
transform_source=lambda x: compat_urllib_parse_unquote(
|
||||
compat_b64decode(x).decode('utf-8')))['page']['video']
|
||||
|
||||
title = video['title']
|
||||
media_id = video['mediaId']
|
||||
sources = [compat_str(e['height'])
|
||||
for e in video['encodings'] if e.get('height')]
|
||||
formats = self._extract_formats(url, video_id, media_id, sources)
|
||||
|
||||
thumbnail = url_or_none(video.get('masterThumb'))
|
||||
uploader = try_get(video, lambda x: x['user']['username'], compat_str)
|
||||
uploader_id = str_or_none(try_get(
|
||||
video, lambda x: x['user']['id'], int))
|
||||
channel = try_get(video, lambda x: x['channel']['name'], compat_str)
|
||||
channel_id = str_or_none(try_get(
|
||||
video, lambda x: x['channel']['id'], int))
|
||||
like_count = int_or_none(video.get('likes'))
|
||||
dislike_count = int_or_none(video.get('dislikes'))
|
||||
view_count = int_or_none(video.get('playsQty'))
|
||||
duration = int_or_none(video.get('durationInSeconds'))
|
||||
timestamp = unified_timestamp(video.get('publishedAt'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader': uploader or channel,
|
||||
'uploader_id': uploader_id or channel_id,
|
||||
'channel': channel,
|
||||
'channel_id': channel_id,
|
||||
'timestamp': timestamp,
|
||||
'like_count': like_count,
|
||||
'dislike_count': dislike_count,
|
||||
'view_count': view_count,
|
||||
'duration': duration,
|
||||
'age_limit': 18,
|
||||
}
|
||||
|
||||
|
||||
class PornerBrosIE(FourTubeBaseIE):
|
||||
_VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
|
||||
|
@ -32,7 +32,6 @@ from ..utils import (
|
||||
unified_strdate,
|
||||
unsmuggle_url,
|
||||
UnsupportedError,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from .commonprotocols import RtmpIE
|
||||
@ -48,7 +47,7 @@ from .nbc import NBCSportsVPlayerIE
|
||||
from .ooyala import OoyalaIE
|
||||
from .rutv import RUTVIE
|
||||
from .tvc import TVCIE
|
||||
from .sportbox import SportBoxEmbedIE
|
||||
from .sportbox import SportBoxIE
|
||||
from .smotri import SmotriIE
|
||||
from .myvi import MyviIE
|
||||
from .condenast import CondeNastIE
|
||||
@ -115,6 +114,7 @@ from .apa import APAIE
|
||||
from .foxnews import FoxNewsIE
|
||||
from .viqeo import ViqeoIE
|
||||
from .expressen import ExpressenIE
|
||||
from .zype import ZypeIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
@ -2071,6 +2071,35 @@ class GenericIE(InfoExtractor):
|
||||
},
|
||||
'playlist_count': 6,
|
||||
},
|
||||
{
|
||||
# Zype embed
|
||||
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
||||
'info_dict': {
|
||||
'id': '5b400b834b32992a310622b9',
|
||||
'ext': 'mp4',
|
||||
'title': 'Smoky Barbecue Favorites',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
},
|
||||
'add_ie': [ZypeIE.ie_key()],
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# videojs embed
|
||||
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
|
||||
'info_dict': {
|
||||
'id': 'shell',
|
||||
'ext': 'mp4',
|
||||
'title': 'Доставщик пиццы спросил разрешения сыграть на фортепиано',
|
||||
'description': 'md5:89209cdc587dab1e4a090453dbaa2cb1',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'expected_warnings': ['Failed to download MPD manifest'],
|
||||
},
|
||||
# {
|
||||
# # TODO: find another test
|
||||
# # http://schema.org/VideoObject
|
||||
@ -2622,9 +2651,9 @@ class GenericIE(InfoExtractor):
|
||||
return self.url_result(tvc_url, 'TVC')
|
||||
|
||||
# Look for embedded SportBox player
|
||||
sportbox_urls = SportBoxEmbedIE._extract_urls(webpage)
|
||||
sportbox_urls = SportBoxIE._extract_urls(webpage)
|
||||
if sportbox_urls:
|
||||
return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie='SportBoxEmbed')
|
||||
return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key())
|
||||
|
||||
# Look for embedded XHamster player
|
||||
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
|
||||
@ -3009,7 +3038,7 @@ class GenericIE(InfoExtractor):
|
||||
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
|
||||
|
||||
# Look for Mediaset embeds
|
||||
mediaset_urls = MediasetIE._extract_urls(webpage)
|
||||
mediaset_urls = MediasetIE._extract_urls(self, webpage)
|
||||
if mediaset_urls:
|
||||
return self.playlist_from_matches(
|
||||
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
|
||||
@ -3098,7 +3127,7 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
|
||||
|
||||
sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
|
||||
sharevideos_urls = [sharevideos_mobj.group('url') for sharevideos_mobj in re.finditer(
|
||||
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
|
||||
webpage)]
|
||||
if sharevideos_urls:
|
||||
@ -3115,6 +3144,11 @@ class GenericIE(InfoExtractor):
|
||||
return self.playlist_from_matches(
|
||||
expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
|
||||
|
||||
zype_urls = ZypeIE._extract_urls(webpage)
|
||||
if zype_urls:
|
||||
return self.playlist_from_matches(
|
||||
zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
|
||||
|
||||
# Look for HTML5 media
|
||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||
if entries:
|
||||
@ -3136,9 +3170,13 @@ class GenericIE(InfoExtractor):
|
||||
jwplayer_data = self._find_jwplayer_data(
|
||||
webpage, video_id, transform_source=js_to_json)
|
||||
if jwplayer_data:
|
||||
info = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||
return merge_dicts(info, info_dict)
|
||||
try:
|
||||
info = self._parse_jwplayer_data(
|
||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||
return merge_dicts(info, info_dict)
|
||||
except ExtractorError:
|
||||
# See https://github.com/rg3/youtube-dl/pull/16735
|
||||
pass
|
||||
|
||||
# Video.js embed
|
||||
mobj = re.search(
|
||||
@ -3152,8 +3190,8 @@ class GenericIE(InfoExtractor):
|
||||
sources = [sources]
|
||||
formats = []
|
||||
for source in sources:
|
||||
src = url_or_none(source.get('src'))
|
||||
if not src:
|
||||
src = source.get('src')
|
||||
if not src or not isinstance(src, compat_str):
|
||||
continue
|
||||
src = compat_urlparse.urljoin(url, src)
|
||||
src_type = source.get('type')
|
||||
|
@ -36,7 +36,8 @@ class GoIE(AdobePassIE):
|
||||
'requestor_id': 'DisneyXD',
|
||||
}
|
||||
}
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))' % '|'.join(_SITE_INFO.keys())
|
||||
_VALID_URL = r'https?://(?:(?P<sub_domain>%s)\.)?go\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
|
||||
% '|'.join(list(_SITE_INFO.keys()) + ['disneynow'])
|
||||
_TESTS = [{
|
||||
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
|
||||
'info_dict': {
|
||||
@ -62,6 +63,14 @@ class GoIE(AdobePassIE):
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# brand 004
|
||||
'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# brand 008
|
||||
'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_videos(self, brand, video_id='-1', show_id='-1'):
|
||||
@ -72,14 +81,23 @@ class GoIE(AdobePassIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
sub_domain, video_id, display_id = re.match(self._VALID_URL, url).groups()
|
||||
site_info = self._SITE_INFO[sub_domain]
|
||||
brand = site_info['brand']
|
||||
if not video_id:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
site_info = self._SITE_INFO.get(sub_domain, {})
|
||||
brand = site_info.get('brand')
|
||||
if not video_id or not site_info:
|
||||
webpage = self._download_webpage(url, display_id or video_id)
|
||||
video_id = self._search_regex(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id', default=None)
|
||||
r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id',
|
||||
default=None)
|
||||
if not site_info:
|
||||
brand = self._search_regex(
|
||||
(r'data-brand=\s*["\']\s*(\d+)',
|
||||
r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand',
|
||||
default='004')
|
||||
site_info = next(
|
||||
si for _, si in self._SITE_INFO.items()
|
||||
if si.get('brand') == brand)
|
||||
if not video_id:
|
||||
# show extraction works for Disney, DisneyJunior and DisneyXD
|
||||
# ABC and Freeform has different layout
|
||||
|
@ -1,49 +1,55 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import hashlib
|
||||
import hmac
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class HotStarBaseIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['IN']
|
||||
_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
|
||||
|
||||
def _download_json(self, *args, **kwargs):
|
||||
response = super(HotStarBaseIE, self)._download_json(*args, **kwargs)
|
||||
if response['resultCode'] != 'OK':
|
||||
if kwargs.get('fatal'):
|
||||
raise ExtractorError(
|
||||
response['errorDescription'], expected=True)
|
||||
return None
|
||||
return response['resultObj']
|
||||
|
||||
def _download_content_info(self, content_id):
|
||||
return self._download_json(
|
||||
'https://account.hotstar.com/AVS/besc', content_id, query={
|
||||
'action': 'GetAggregatedContentDetails',
|
||||
'appVersion': '5.0.40',
|
||||
'channel': 'PCTV',
|
||||
'contentId': content_id,
|
||||
})['contentInfo'][0]
|
||||
def _call_api(self, path, video_id, query_name='contentId'):
|
||||
st = int(time.time())
|
||||
exp = st + 6000
|
||||
auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
|
||||
auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
|
||||
response = self._download_json(
|
||||
'https://api.hotstar.com/' + path,
|
||||
video_id, headers={
|
||||
'hotstarauth': auth,
|
||||
'x-country-code': 'IN',
|
||||
'x-platform-code': 'JIO',
|
||||
}, query={
|
||||
query_name: video_id,
|
||||
'tas': 10000,
|
||||
})
|
||||
if response['statusCode'] != 'OK':
|
||||
raise ExtractorError(
|
||||
response['body']['message'], expected=True)
|
||||
return response['body']['results']
|
||||
|
||||
|
||||
class HotStarIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
|
||||
'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
|
||||
'info_dict': {
|
||||
'id': '1000076273',
|
||||
'ext': 'mp4',
|
||||
'title': 'On Air With AIB',
|
||||
'title': 'Can You Not Spread Rumours?',
|
||||
'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
|
||||
'timestamp': 1447227000,
|
||||
'timestamp': 1447248600,
|
||||
'upload_date': '20151111',
|
||||
'duration': 381,
|
||||
},
|
||||
@ -58,47 +64,47 @@ class HotStarIE(HotStarBaseIE):
|
||||
'url': 'http://www.hotstar.com/1000000515',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_GEO_BYPASS = False
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_data = self._download_content_info(video_id)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
app_state = self._parse_json(self._search_regex(
|
||||
r'<script>window\.APP_STATE\s*=\s*({.+?})</script>',
|
||||
webpage, 'app state'), video_id)
|
||||
video_data = {}
|
||||
for v in app_state.values():
|
||||
content = try_get(v, lambda x: x['initialState']['contentData']['content'], dict)
|
||||
if content and content.get('contentId') == video_id:
|
||||
video_data = content
|
||||
|
||||
title = video_data['episodeTitle']
|
||||
title = video_data['title']
|
||||
|
||||
if video_data.get('encrypted') == 'Y':
|
||||
if video_data.get('drmProtected'):
|
||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||
|
||||
formats = []
|
||||
for f in ('JIO',):
|
||||
format_data = self._download_json(
|
||||
'http://getcdn.hotstar.com/AVS/besc',
|
||||
video_id, 'Downloading %s JSON metadata' % f,
|
||||
fatal=False, query={
|
||||
'action': 'GetCDN',
|
||||
'asJson': 'Y',
|
||||
'channel': f,
|
||||
'id': video_id,
|
||||
'type': 'VOD',
|
||||
})
|
||||
if format_data:
|
||||
format_url = format_data.get('src')
|
||||
if not format_url:
|
||||
continue
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
# produce broken files
|
||||
continue
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'width': int_or_none(format_data.get('width')),
|
||||
'height': int_or_none(format_data.get('height')),
|
||||
})
|
||||
format_data = self._call_api('h/v1/play', video_id)['item']
|
||||
format_url = format_data['playbackUrl']
|
||||
ext = determine_ext(format_url)
|
||||
if ext == 'm3u8':
|
||||
try:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id='hls'))
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||
self.raise_geo_restricted(countries=['IN'])
|
||||
raise
|
||||
elif ext == 'f4m':
|
||||
# produce broken files
|
||||
pass
|
||||
else:
|
||||
formats.append({
|
||||
'url': format_url,
|
||||
'width': int_or_none(format_data.get('width')),
|
||||
'height': int_or_none(format_data.get('height')),
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
@ -106,57 +112,43 @@ class HotStarIE(HotStarBaseIE):
|
||||
'title': title,
|
||||
'description': video_data.get('description'),
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': int_or_none(video_data.get('broadcastDate')),
|
||||
'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')),
|
||||
'formats': formats,
|
||||
'channel': video_data.get('channelName'),
|
||||
'channel_id': video_data.get('channelId'),
|
||||
'series': video_data.get('showName'),
|
||||
'season': video_data.get('seasonName'),
|
||||
'season_number': int_or_none(video_data.get('seasonNo')),
|
||||
'season_id': video_data.get('seasonId'),
|
||||
'episode': title,
|
||||
'episode_number': int_or_none(video_data.get('episodeNumber')),
|
||||
'series': video_data.get('contentTitle'),
|
||||
'episode_number': int_or_none(video_data.get('episodeNo')),
|
||||
}
|
||||
|
||||
|
||||
class HotStarPlaylistIE(HotStarBaseIE):
|
||||
IE_NAME = 'hotstar:playlist'
|
||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com/tv/[^/]+/(?P<content_id>\d+))/(?P<type>[^/]+)/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.hotstar.com/tv/pratidaan/14982/episodes/14812/9993',
|
||||
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
|
||||
'info_dict': {
|
||||
'id': '14812',
|
||||
'id': '3_2_26',
|
||||
},
|
||||
'playlist_mincount': 75,
|
||||
'playlist_mincount': 20,
|
||||
}, {
|
||||
'url': 'http://www.hotstar.com/tv/pratidaan/14982/popular-clips/9998/9998',
|
||||
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_ITEM_TYPES = {
|
||||
'episodes': 'EPISODE',
|
||||
'popular-clips': 'CLIPS',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
base_url = mobj.group('url')
|
||||
content_id = mobj.group('content_id')
|
||||
playlist_type = mobj.group('type')
|
||||
playlist_id = self._match_id(url)
|
||||
|
||||
content_info = self._download_content_info(content_id)
|
||||
playlist_id = compat_str(content_info['categoryId'])
|
||||
|
||||
collection = self._download_json(
|
||||
'https://search.hotstar.com/AVS/besc', playlist_id, query={
|
||||
'action': 'SearchContents',
|
||||
'appVersion': '5.0.40',
|
||||
'channel': 'PCTV',
|
||||
'moreFilters': 'series:%s;' % playlist_id,
|
||||
'query': '*',
|
||||
'searchOrder': 'last_broadcast_date desc,year desc,title asc',
|
||||
'type': self._ITEM_TYPES.get(playlist_type, 'EPISODE'),
|
||||
})
|
||||
collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')
|
||||
|
||||
entries = [
|
||||
self.url_result(
|
||||
'%s/_/%s' % (base_url, video['contentId']),
|
||||
'https://www.hotstar.com/%s' % video['contentId'],
|
||||
ie=HotStarIE.ie_key(), video_id=video['contentId'])
|
||||
for video in collection['response']['docs']
|
||||
for video in collection['assets']['items']
|
||||
if video.get('contentId')]
|
||||
|
||||
return self.playlist_result(entries, playlist_id)
|
||||
|
@ -7,7 +7,7 @@ from ..utils import unified_timestamp
|
||||
|
||||
class InternazionaleIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?internazionale\.it/video/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://www.internazionale.it/video/2015/02/19/richard-linklater-racconta-una-scena-di-boyhood',
|
||||
'md5': '3e39d32b66882c1218e305acbf8348ca',
|
||||
'info_dict': {
|
||||
@ -23,7 +23,23 @@ class InternazionaleIE(InfoExtractor):
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.internazionale.it/video/2018/08/29/telefono-stare-con-noi-stessi',
|
||||
'md5': '9db8663704cab73eb972d1cee0082c79',
|
||||
'info_dict': {
|
||||
'id': '761344',
|
||||
'display_id': 'telefono-stare-con-noi-stessi',
|
||||
'ext': 'mp4',
|
||||
'title': 'Usiamo il telefono per evitare di stare con noi stessi',
|
||||
'description': 'md5:75ccfb0d6bcefc6e7428c68b4aa1fe44',
|
||||
'timestamp': 1535528954,
|
||||
'upload_date': '20180829',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@ -40,8 +56,13 @@ class InternazionaleIE(InfoExtractor):
|
||||
DATA_RE % 'job-id', webpage, 'video id', group='value')
|
||||
video_path = self._search_regex(
|
||||
DATA_RE % 'video-path', webpage, 'video path', group='value')
|
||||
video_available_abroad = self._search_regex(
|
||||
DATA_RE % 'video-available_abroad', webpage,
|
||||
'video available aboard', default='1', group='value')
|
||||
video_available_abroad = video_available_abroad == '1'
|
||||
|
||||
video_base = 'https://video.internazionale.it/%s/%s.' % (video_path, video_id)
|
||||
video_base = 'https://video%s.internazionale.it/%s/%s.' % \
|
||||
('' if video_available_abroad else '-ita', video_path, video_id)
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
video_base + 'm3u8', display_id, 'mp4',
|
||||
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class IPrimaIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
|
||||
_VALID_URL = r'https?://(?:play|prima)\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
|
||||
_GEO_BYPASS = False
|
||||
|
||||
_TESTS = [{
|
||||
@ -33,14 +33,27 @@ class IPrimaIE(InfoExtractor):
|
||||
# geo restricted
|
||||
'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# iframe api.play-backend.iprima.cz
|
||||
'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# iframe prima.iprima.cz
|
||||
'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
self._set_cookie('play.iprima.cz', 'ott_adult_confirmed', '1')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')
|
||||
video_id = self._search_regex(
|
||||
(r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
|
||||
r'data-product="([^"]+)">'),
|
||||
webpage, 'real id')
|
||||
|
||||
playerpage = self._download_webpage(
|
||||
'http://play.iprima.cz/prehravac/init',
|
||||
|
@ -15,7 +15,7 @@ from ..utils import (
|
||||
class IviIE(InfoExtractor):
|
||||
IE_DESC = 'ivi.ru'
|
||||
IE_NAME = 'ivi'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
|
||||
_GEO_BYPASS = False
|
||||
_GEO_COUNTRIES = ['RU']
|
||||
|
||||
@ -65,7 +65,11 @@ class IviIE(InfoExtractor):
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'skip': 'Only works from Russia',
|
||||
}
|
||||
},
|
||||
{
|
||||
'url': 'https://www.ivi.tv/watch/33560/',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
# Sorted by quality
|
||||
|
@ -26,8 +26,15 @@ class JamendoBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class JamendoIE(JamendoBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
licensing\.jamendo\.com/[^/]+|
|
||||
(?:www\.)?jamendo\.com
|
||||
)
|
||||
/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
|
||||
'md5': '6e9e82ed6db98678f171c25a8ed09ffd',
|
||||
'info_dict': {
|
||||
@ -40,14 +47,19 @@ class JamendoIE(JamendoBaseIE):
|
||||
'duration': 210,
|
||||
'thumbnail': r're:^https?://.*\.jpg'
|
||||
}
|
||||
}
|
||||
}, {
|
||||
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._VALID_URL_RE.match(url)
|
||||
track_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id')
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
webpage = self._download_webpage(
|
||||
'https://www.jamendo.com/track/%s/%s' % (track_id, display_id),
|
||||
display_id)
|
||||
|
||||
title, artist, track = self._extract_meta(webpage)
|
||||
|
||||
|
@ -192,6 +192,8 @@ class KalturaIE(InfoExtractor):
|
||||
'entryId': video_id,
|
||||
'service': 'baseentry',
|
||||
'ks': '{1:result:ks}',
|
||||
'responseProfile:fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId',
|
||||
'responseProfile:type': 1,
|
||||
},
|
||||
{
|
||||
'action': 'getbyentryid',
|
||||
|
70
youtube_dl/extractor/kinopoisk.py
Normal file
70
youtube_dl/extractor/kinopoisk.py
Normal file
@ -0,0 +1,70 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
dict_get,
|
||||
int_or_none,
|
||||
)
|
||||
|
||||
|
||||
class KinoPoiskIE(InfoExtractor):
|
||||
_GEO_COUNTRIES = ['RU']
|
||||
_VALID_URL = r'https?://(?:www\.)?kinopoisk\.ru/film/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.kinopoisk.ru/film/81041/watch/',
|
||||
'md5': '4f71c80baea10dfa54a837a46111d326',
|
||||
'info_dict': {
|
||||
'id': '81041',
|
||||
'ext': 'mp4',
|
||||
'title': 'Алеша попович и тугарин змей',
|
||||
'description': 'md5:43787e673d68b805d0aa1df5a5aea701',
|
||||
'thumbnail': r're:^https?://.*',
|
||||
'duration': 4533,
|
||||
'age_limit': 12,
|
||||
},
|
||||
'params': {
|
||||
'format': 'bestvideo',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.kinopoisk.ru/film/81041',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
'https://ott-widget.kinopoisk.ru/v1/kp/', video_id,
|
||||
query={'kpId': video_id})
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)<script[^>]+\btype=["\']application/json[^>]+>(.+?)<',
|
||||
webpage, 'data'),
|
||||
video_id)['models']
|
||||
|
||||
film = data['filmStatus']
|
||||
title = film.get('title') or film['originalTitle']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
data['playlistEntity']['uri'], video_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
description = dict_get(
|
||||
film, ('descriptscription', 'description',
|
||||
'shortDescriptscription', 'shortDescription'))
|
||||
thumbnail = film.get('coverUrl') or film.get('posterUrl')
|
||||
duration = int_or_none(film.get('duration'))
|
||||
age_limit = int_or_none(film.get('restrictionAge'))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
@ -2,6 +2,7 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
@ -32,7 +33,8 @@ class Laola1TvEmbedIE(InfoExtractor):
|
||||
|
||||
def _extract_token_url(self, stream_access_url, video_id, data):
|
||||
return self._download_json(
|
||||
stream_access_url, video_id, headers={
|
||||
self._proto_relative_url(stream_access_url, 'https:'), video_id,
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
}, data=json.dumps(data).encode())['data']['stream-access'][0]
|
||||
|
||||
@ -119,9 +121,59 @@ class Laola1TvEmbedIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class Laola1TvIE(Laola1TvEmbedIE):
|
||||
class Laola1TvBaseIE(Laola1TvEmbedIE):
|
||||
def _extract_video(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if 'Dieser Livestream ist bereits beendet.' in webpage:
|
||||
raise ExtractorError('This live stream has already finished.', expected=True)
|
||||
|
||||
conf = self._parse_json(self._search_regex(
|
||||
r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
|
||||
display_id,
|
||||
transform_source=lambda s: js_to_json(re.sub(r'shareurl:.+,', '', s)))
|
||||
video_id = conf['videoid']
|
||||
|
||||
config = self._download_json(conf['configUrl'], video_id, query={
|
||||
'videoid': video_id,
|
||||
'partnerid': conf['partnerid'],
|
||||
'language': conf.get('language', ''),
|
||||
'portal': conf.get('portalid', ''),
|
||||
})
|
||||
error = config.get('error')
|
||||
if error:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
video_data = config['video']
|
||||
title = video_data['title']
|
||||
is_live = video_data.get('isLivestream') and video_data.get('isLive')
|
||||
meta = video_data.get('metaInformation')
|
||||
sports = meta.get('sports')
|
||||
categories = sports.split(',') if sports else []
|
||||
|
||||
token_url = self._extract_token_url(
|
||||
video_data['streamAccess'], video_id,
|
||||
video_data['abo']['required'])
|
||||
|
||||
formats = self._extract_formats(token_url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('image'),
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
|
||||
class Laola1TvIE(Laola1TvBaseIE):
|
||||
IE_NAME = 'laola1tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?laola1\.tv/[a-z]+-[a-z]+/[^/]+/(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html',
|
||||
'info_dict': {
|
||||
@ -169,52 +221,30 @@ class Laola1TvIE(Laola1TvEmbedIE):
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._extract_video(url)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
if 'Dieser Livestream ist bereits beendet.' in webpage:
|
||||
raise ExtractorError('This live stream has already finished.', expected=True)
|
||||
class EHFTVIE(Laola1TvBaseIE):
|
||||
IE_NAME = 'ehftv'
|
||||
_VALID_URL = r'https?://(?:www\.)?ehftv\.com/[a-z]+(?:-[a-z]+)?/[^/]+/(?P<id>[^/?#&]+)'
|
||||
|
||||
conf = self._parse_json(self._search_regex(
|
||||
r'(?s)conf\s*=\s*({.+?});', webpage, 'conf'),
|
||||
display_id, js_to_json)
|
||||
_TESTS = [{
|
||||
'url': 'https://www.ehftv.com/int/video/paris-saint-germain-handball-pge-vive-kielce/1166761',
|
||||
'info_dict': {
|
||||
'id': '1166761',
|
||||
'display_id': 'paris-saint-germain-handball-pge-vive-kielce',
|
||||
'ext': 'mp4',
|
||||
'title': 'Paris Saint-Germain Handball - PGE Vive Kielce',
|
||||
'is_live': False,
|
||||
'categories': ['Handball'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
video_id = conf['videoid']
|
||||
|
||||
config = self._download_json(conf['configUrl'], video_id, query={
|
||||
'videoid': video_id,
|
||||
'partnerid': conf['partnerid'],
|
||||
'language': conf.get('language', ''),
|
||||
'portal': conf.get('portalid', ''),
|
||||
})
|
||||
error = config.get('error')
|
||||
if error:
|
||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||
|
||||
video_data = config['video']
|
||||
title = video_data['title']
|
||||
is_live = video_data.get('isLivestream') and video_data.get('isLive')
|
||||
meta = video_data.get('metaInformation')
|
||||
sports = meta.get('sports')
|
||||
categories = sports.split(',') if sports else []
|
||||
|
||||
token_url = self._extract_token_url(
|
||||
video_data['streamAccess'], video_id,
|
||||
video_data['abo']['required'])
|
||||
|
||||
formats = self._extract_formats(token_url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': self._live_title(title) if is_live else title,
|
||||
'description': video_data.get('description'),
|
||||
'thumbnail': video_data.get('image'),
|
||||
'categories': categories,
|
||||
'formats': formats,
|
||||
'is_live': is_live,
|
||||
}
|
||||
def _real_extract(self, url):
|
||||
return self._extract_video(url)
|
||||
|
||||
|
||||
class ITTFIE(InfoExtractor):
|
||||
|
175
youtube_dl/extractor/linkedin.py
Normal file
175
youtube_dl/extractor/linkedin.py
Normal file
@ -0,0 +1,175 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class LinkedInLearningBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'linkedin'
|
||||
|
||||
def _call_api(self, course_slug, fields, video_slug=None, resolution=None):
|
||||
query = {
|
||||
'courseSlug': course_slug,
|
||||
'fields': fields,
|
||||
'q': 'slugs',
|
||||
}
|
||||
sub = ''
|
||||
if video_slug:
|
||||
query.update({
|
||||
'videoSlug': video_slug,
|
||||
'resolution': '_%s' % resolution,
|
||||
})
|
||||
sub = ' %dp' % resolution
|
||||
api_url = 'https://www.linkedin.com/learning-api/detailedCourses'
|
||||
return self._download_json(
|
||||
api_url, video_slug, 'Downloading%s JSON metadata' % sub, headers={
|
||||
'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value,
|
||||
}, query=query)['elements'][0]
|
||||
|
||||
def _get_video_id(self, urn, course_slug, video_slug):
|
||||
if urn:
|
||||
mobj = re.search(r'urn:li:lyndaCourse:\d+,(\d+)', urn)
|
||||
if mobj:
|
||||
return mobj.group(1)
|
||||
return '%s/%s' % (course_slug, video_slug)
|
||||
|
||||
def _real_initialize(self):
|
||||
email, password = self._get_login_info()
|
||||
if email is None:
|
||||
return
|
||||
|
||||
login_page = self._download_webpage(
|
||||
'https://www.linkedin.com/uas/login?trk=learning',
|
||||
None, 'Downloading login page')
|
||||
action_url = self._search_regex(
|
||||
r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, 'post url',
|
||||
default='https://www.linkedin.com/uas/login-submit', group='url')
|
||||
data = self._hidden_inputs(login_page)
|
||||
data.update({
|
||||
'session_key': email,
|
||||
'session_password': password,
|
||||
})
|
||||
login_submit_page = self._download_webpage(
|
||||
action_url, None, 'Logging in',
|
||||
data=urlencode_postdata(data))
|
||||
error = self._search_regex(
|
||||
r'<span[^>]+class="error"[^>]*>\s*(.+?)\s*</span>',
|
||||
login_submit_page, 'error', default=None)
|
||||
if error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
|
||||
|
||||
class LinkedInLearningIE(LinkedInLearningBaseIE):
|
||||
IE_NAME = 'linkedin:learning'
|
||||
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P<course_slug>[^/]+)/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals/welcome?autoplay=true',
|
||||
'md5': 'a1d74422ff0d5e66a792deb996693167',
|
||||
'info_dict': {
|
||||
'id': '90426',
|
||||
'ext': 'mp4',
|
||||
'title': 'Welcome',
|
||||
'timestamp': 1430396150.82,
|
||||
'upload_date': '20150430',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_slug, video_slug = re.match(self._VALID_URL, url).groups()
|
||||
|
||||
video_data = None
|
||||
formats = []
|
||||
for width, height in ((640, 360), (960, 540), (1280, 720)):
|
||||
video_data = self._call_api(
|
||||
course_slug, 'selectedVideo', video_slug, height)['selectedVideo']
|
||||
|
||||
video_url_data = video_data.get('url') or {}
|
||||
progressive_url = video_url_data.get('progressiveUrl')
|
||||
if progressive_url:
|
||||
formats.append({
|
||||
'format_id': 'progressive-%dp' % height,
|
||||
'url': progressive_url,
|
||||
'height': height,
|
||||
'width': width,
|
||||
'source_preference': 1,
|
||||
})
|
||||
|
||||
title = video_data['title']
|
||||
|
||||
audio_url = video_data.get('audio', {}).get('progressiveUrl')
|
||||
if audio_url:
|
||||
formats.append({
|
||||
'abr': 64,
|
||||
'ext': 'm4a',
|
||||
'format_id': 'audio',
|
||||
'url': audio_url,
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
streaming_url = video_url_data.get('streamingUrl')
|
||||
if streaming_url:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
streaming_url, video_slug, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
self._sort_formats(formats, ('width', 'height', 'source_preference', 'tbr', 'abr'))
|
||||
|
||||
return {
|
||||
'id': self._get_video_id(video_data.get('urn'), course_slug, video_slug),
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': video_data.get('defaultThumbnail'),
|
||||
'timestamp': float_or_none(video_data.get('publishedOn'), 1000),
|
||||
'duration': int_or_none(video_data.get('durationInSeconds')),
|
||||
}
|
||||
|
||||
|
||||
class LinkedInLearningCourseIE(LinkedInLearningBaseIE):
|
||||
IE_NAME = 'linkedin:learning:course'
|
||||
_VALID_URL = r'https?://(?:www\.)?linkedin\.com/learning/(?P<id>[^/?#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals',
|
||||
'info_dict': {
|
||||
'id': 'programming-foundations-fundamentals',
|
||||
'title': 'Programming Foundations: Fundamentals',
|
||||
'description': 'md5:76e580b017694eb89dc8e8923fff5c86',
|
||||
},
|
||||
'playlist_mincount': 61,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if LinkedInLearningIE.suitable(url) else super(LinkedInLearningCourseIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_slug = self._match_id(url)
|
||||
course_data = self._call_api(course_slug, 'chapters,description,title')
|
||||
|
||||
entries = []
|
||||
for chapter in course_data.get('chapters', []):
|
||||
chapter_title = chapter.get('title')
|
||||
for video in chapter.get('videos', []):
|
||||
video_slug = video.get('slug')
|
||||
if not video_slug:
|
||||
continue
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'id': self._get_video_id(video.get('urn'), course_slug, video_slug),
|
||||
'title': video.get('title'),
|
||||
'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug, video_slug),
|
||||
'chapter': chapter_title,
|
||||
'ie_key': LinkedInLearningIE.ie_key(),
|
||||
})
|
||||
|
||||
return self.playlist_result(
|
||||
entries, course_slug,
|
||||
course_data.get('title'),
|
||||
course_data.get('description'))
|
@ -4,6 +4,11 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .theplatform import ThePlatformBaseIE
|
||||
from ..compat import (
|
||||
compat_parse_qs,
|
||||
compat_str,
|
||||
compat_urllib_parse_urlparse,
|
||||
)
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
@ -76,12 +81,33 @@ class MediasetIE(ThePlatformBaseIE):
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>https?://(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=\d+.*?)\1',
|
||||
webpage)]
|
||||
def _extract_urls(ie, webpage):
|
||||
def _qs(url):
|
||||
return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||
|
||||
def _program_guid(qs):
|
||||
return qs.get('programGuid', [None])[0]
|
||||
|
||||
entries = []
|
||||
for mobj in re.finditer(
|
||||
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml.*?)\1',
|
||||
webpage):
|
||||
embed_url = mobj.group('url')
|
||||
embed_qs = _qs(embed_url)
|
||||
program_guid = _program_guid(embed_qs)
|
||||
if program_guid:
|
||||
entries.append(embed_url)
|
||||
continue
|
||||
video_id = embed_qs.get('id', [None])[0]
|
||||
if not video_id:
|
||||
continue
|
||||
urlh = ie._request_webpage(
|
||||
embed_url, video_id, note='Following embed URL redirect')
|
||||
embed_url = compat_str(urlh.geturl())
|
||||
program_guid = _program_guid(_qs(embed_url))
|
||||
if program_guid:
|
||||
entries.append(embed_url)
|
||||
return entries
|
||||
|
||||
def _real_extract(self, url):
|
||||
guid = self._match_id(url)
|
||||
|
@ -167,9 +167,9 @@ class MotherlessGroupIE(InfoExtractor):
|
||||
if not entries:
|
||||
entries = [
|
||||
self.url_result(
|
||||
compat_urlparse.urljoin(base, '/' + video_id),
|
||||
ie=MotherlessIE.ie_key(), video_id=video_id)
|
||||
for video_id in orderedSet(re.findall(
|
||||
compat_urlparse.urljoin(base, '/' + entry_id),
|
||||
ie=MotherlessIE.ie_key(), video_id=entry_id)
|
||||
for entry_id in orderedSet(re.findall(
|
||||
r'data-codename=["\']([A-Z0-9]+)', webpage))]
|
||||
return entries
|
||||
|
||||
|
@ -7,6 +7,7 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE
|
||||
from .adobepass import AdobePassIE
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from ..utils import (
|
||||
find_xpath_attr,
|
||||
smuggle_url,
|
||||
@ -75,11 +76,16 @@ class NBCIE(AdobePassIE):
|
||||
'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# Percent escaped url
|
||||
'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
def _real_extract(self, url):
|
||||
permalink, video_id = re.match(self._VALID_URL, url).groups()
|
||||
permalink = 'http' + permalink
|
||||
permalink = 'http' + compat_urllib_parse_unquote(permalink)
|
||||
response = self._download_json(
|
||||
'https://api.nbc.com/v3/videos', video_id, query={
|
||||
'filter[permalink]': permalink,
|
||||
|
@ -252,7 +252,7 @@ class NiconicoIE(InfoExtractor):
|
||||
},
|
||||
'timing_constraint': 'unlimited'
|
||||
}
|
||||
}))
|
||||
}).encode())
|
||||
|
||||
resolution = video_quality.get('resolution', {})
|
||||
|
||||
|
@ -31,6 +31,8 @@ class NJPWWorldIE(InfoExtractor):
|
||||
'skip': 'Requires login',
|
||||
}
|
||||
|
||||
_LOGIN_URL = 'https://front.njpwworld.com/auth/login'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._login()
|
||||
|
||||
@ -40,13 +42,17 @@ class NJPWWorldIE(InfoExtractor):
|
||||
if not username:
|
||||
return True
|
||||
|
||||
# Setup session (will set necessary cookies)
|
||||
self._request_webpage(
|
||||
'https://njpwworld.com/', None, note='Setting up session')
|
||||
|
||||
webpage, urlh = self._download_webpage_handle(
|
||||
'https://njpwworld.com/auth/login', None,
|
||||
self._LOGIN_URL, None,
|
||||
note='Logging in', errnote='Unable to login',
|
||||
data=urlencode_postdata({'login_id': username, 'pw': password}),
|
||||
headers={'Referer': 'https://njpwworld.com/auth'})
|
||||
headers={'Referer': 'https://front.njpwworld.com/auth'})
|
||||
# /auth/login will return 302 for successful logins
|
||||
if urlh.geturl() == 'https://njpwworld.com/auth/login':
|
||||
if urlh.geturl() == self._LOGIN_URL:
|
||||
self.report_warning('unable to login')
|
||||
return False
|
||||
|
||||
|
@ -6,30 +6,90 @@ import re
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
qualities,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NovaEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
|
||||
_TEST = {
|
||||
'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
|
||||
'md5': 'b3834f6de5401baabf31ed57456463f7',
|
||||
'info_dict': {
|
||||
'id': '8o0n0r',
|
||||
'ext': 'mp4',
|
||||
'title': '2180. díl',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 2578,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
bitrates = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
||||
quality_key = qualities(QUALITIES)
|
||||
|
||||
formats = []
|
||||
for format_id, format_list in bitrates.items():
|
||||
if not isinstance(format_list, list):
|
||||
continue
|
||||
for format_url in format_list:
|
||||
format_url = url_or_none(format_url)
|
||||
if not format_url:
|
||||
continue
|
||||
f = {
|
||||
'url': format_url,
|
||||
}
|
||||
f_id = format_id
|
||||
for quality in QUALITIES:
|
||||
if '%s.mp4' % quality in format_url:
|
||||
f_id += '-%s' % quality
|
||||
f.update({
|
||||
'quality': quality_key(quality),
|
||||
'format_note': quality.upper(),
|
||||
})
|
||||
break
|
||||
f['format_id'] = f_id
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(
|
||||
webpage, default=None) or self._search_regex(
|
||||
(r'<value>(?P<title>[^<]+)',
|
||||
r'videoTitle\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||
'title', group='value')
|
||||
thumbnail = self._og_search_thumbnail(
|
||||
webpage, default=None) or self._search_regex(
|
||||
r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'thumbnail', fatal=False, group='value')
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
|
||||
class NovaIE(InfoExtractor):
|
||||
IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz'
|
||||
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
|
||||
_TESTS = [{
|
||||
'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html?utm_source=tvnoviny&utm_medium=cpfooter&utm_campaign=novaplus',
|
||||
'info_dict': {
|
||||
'id': '1608920',
|
||||
'display_id': 'co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou',
|
||||
'ext': 'flv',
|
||||
'title': 'Duel: Michal Hrdlička a Petr Suchoň',
|
||||
'description': 'md5:d0cc509858eee1b1374111c588c6f5d5',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg)',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
|
||||
'md5': '1dd7b9d5ea27bc361f110cd855a19bd3',
|
||||
'info_dict': {
|
||||
@ -40,33 +100,6 @@ class NovaIE(InfoExtractor):
|
||||
'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg)',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://novaplus.nova.cz/porad/policie-modrava/video/5591-policie-modrava-15-dil-blondynka-na-hrbitove',
|
||||
'info_dict': {
|
||||
'id': '1756825',
|
||||
'display_id': '5591-policie-modrava-15-dil-blondynka-na-hrbitove',
|
||||
'ext': 'flv',
|
||||
'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově',
|
||||
'description': 'md5:dc24e50be5908df83348e50d1431295e', # Make sure this description is clean of html tags
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg)',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://novaplus.nova.cz/porad/televizni-noviny/video/5585-televizni-noviny-30-5-2015/',
|
||||
'info_dict': {
|
||||
'id': '1756858',
|
||||
'ext': 'flv',
|
||||
'title': 'Televizní noviny - 30. 5. 2015',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg)',
|
||||
'upload_date': '20150530',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
|
||||
'info_dict': {
|
||||
@ -81,6 +114,20 @@ class NovaIE(InfoExtractor):
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
}, {
|
||||
# media.cms.nova.cz embed
|
||||
'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil',
|
||||
'info_dict': {
|
||||
'id': '8o0n0r',
|
||||
'ext': 'mp4',
|
||||
'title': '2180. díl',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'duration': 2578,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [NovaEmbedIE.ie_key()],
|
||||
}, {
|
||||
'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
|
||||
'only_matching': True,
|
||||
@ -105,6 +152,15 @@ class NovaIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
# novaplus
|
||||
embed_id = self._search_regex(
|
||||
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
|
||||
webpage, 'embed url', default=None)
|
||||
if embed_id:
|
||||
return self.url_result(
|
||||
'https://media.cms.nova.cz/embed/%s' % embed_id,
|
||||
ie=NovaEmbedIE.ie_key(), video_id=embed_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
[r"(?:media|video_id)\s*:\s*'(\d+)'",
|
||||
r'media=(\d+)',
|
||||
|
@ -243,7 +243,18 @@ class PhantomJSwrapper(object):
|
||||
|
||||
|
||||
class OpenloadIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?P<host>
|
||||
(?:www\.)?
|
||||
(?:
|
||||
openload\.(?:co|io|link)|
|
||||
oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun)
|
||||
)
|
||||
)/
|
||||
(?:f|embed)/
|
||||
(?P<id>[a-zA-Z0-9-_]+)
|
||||
'''
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://openload.co/f/kUEfGclsU9o',
|
||||
@ -307,10 +318,22 @@ class OpenloadIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://oload.download/f/kUEfGclsU9o',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oload.cloud/f/4ZDnBXRWiB8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# Its title has not got its extension but url has it
|
||||
'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oload.cc/embed/5NEAbI2BDSk',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oload.icu/f/-_i4y_F_Hs8',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://oload.fun/f/gb6G1H4sHXY',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
|
||||
@ -322,8 +345,11 @@ class OpenloadIE(InfoExtractor):
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
url_pattern = 'https://openload.co/%%s/%s/' % video_id
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
url_pattern = 'https://%s/%%s/%s/' % (host, video_id)
|
||||
headers = {
|
||||
'User-Agent': self._USER_AGENT,
|
||||
}
|
||||
@ -356,7 +382,7 @@ class OpenloadIE(InfoExtractor):
|
||||
r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
|
||||
'stream URL'))
|
||||
|
||||
video_url = 'https://openload.co/stream/%s?mime=true' % decoded_id
|
||||
video_url = 'https://%s/stream/%s?mime=true' % (host, decoded_id)
|
||||
|
||||
title = self._og_search_title(webpage, default=None) or self._search_regex(
|
||||
r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
|
||||
@ -367,7 +393,7 @@ class OpenloadIE(InfoExtractor):
|
||||
entry = entries[0] if entries else {}
|
||||
subtitles = entry.get('subtitles')
|
||||
|
||||
info_dict = {
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
|
||||
@ -376,4 +402,3 @@ class OpenloadIE(InfoExtractor):
|
||||
'subtitles': subtitles,
|
||||
'http_headers': headers,
|
||||
}
|
||||
return info_dict
|
||||
|
@ -15,6 +15,7 @@ from ..utils import (
|
||||
strip_jsonp,
|
||||
unescapeHTML,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
@ -68,26 +69,35 @@ class ORFTVthekIE(InfoExtractor):
|
||||
webpage, 'playlist', group='json'),
|
||||
playlist_id, transform_source=unescapeHTML)['playlist']['videos']
|
||||
|
||||
def quality_to_int(s):
|
||||
m = re.search('([0-9]+)', s)
|
||||
if m is None:
|
||||
return -1
|
||||
return int(m.group(1))
|
||||
|
||||
entries = []
|
||||
for sd in data_jsb:
|
||||
video_id, title = sd.get('id'), sd.get('title')
|
||||
if not video_id or not title:
|
||||
continue
|
||||
video_id = compat_str(video_id)
|
||||
formats = [{
|
||||
'preference': -10 if fd['delivery'] == 'hls' else None,
|
||||
'format_id': '%s-%s-%s' % (
|
||||
fd['delivery'], fd['quality'], fd['quality_string']),
|
||||
'url': fd['src'],
|
||||
'protocol': fd['protocol'],
|
||||
'quality': quality_to_int(fd['quality']),
|
||||
} for fd in sd['sources']]
|
||||
formats = []
|
||||
for fd in sd['sources']:
|
||||
src = url_or_none(fd.get('src'))
|
||||
if not src:
|
||||
continue
|
||||
format_id_list = []
|
||||
for key in ('delivery', 'quality', 'quality_string'):
|
||||
value = fd.get(key)
|
||||
if value:
|
||||
format_id_list.append(value)
|
||||
format_id = '-'.join(format_id_list)
|
||||
if determine_ext(fd['src']) == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
fd['src'], video_id, 'mp4', m3u8_id=format_id))
|
||||
elif determine_ext(fd['src']) == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
fd['src'], video_id, f4m_id=format_id))
|
||||
else:
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'url': src,
|
||||
'protocol': fd.get('protocol'),
|
||||
})
|
||||
|
||||
# Check for geoblocking.
|
||||
# There is a property is_geoprotection, but that's always false
|
||||
|
@ -2,52 +2,63 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import js_to_json
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
|
||||
|
||||
class PatreonIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.patreon.com/creation?hid=743933',
|
||||
'md5': 'e25505eec1053a6e6813b8ed369875cc',
|
||||
'info_dict': {
|
||||
'id': '743933',
|
||||
'ext': 'mp3',
|
||||
'title': 'Episode 166: David Smalley of Dogma Debate',
|
||||
'uploader': 'Cognitive Dissonance Podcast',
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
},
|
||||
_VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.patreon.com/creation?hid=743933',
|
||||
'md5': 'e25505eec1053a6e6813b8ed369875cc',
|
||||
'info_dict': {
|
||||
'id': '743933',
|
||||
'ext': 'mp3',
|
||||
'title': 'Episode 166: David Smalley of Dogma Debate',
|
||||
'description': 'md5:713b08b772cd6271b9f3906683cfacdf',
|
||||
'uploader': 'Cognitive Dissonance Podcast',
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
'timestamp': 1406473987,
|
||||
'upload_date': '20140727',
|
||||
},
|
||||
{
|
||||
'url': 'http://www.patreon.com/creation?hid=754133',
|
||||
'md5': '3eb09345bf44bf60451b8b0b81759d0a',
|
||||
'info_dict': {
|
||||
'id': '754133',
|
||||
'ext': 'mp3',
|
||||
'title': 'CD 167 Extra',
|
||||
'uploader': 'Cognitive Dissonance Podcast',
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://www.patreon.com/creation?hid=754133',
|
||||
'md5': '3eb09345bf44bf60451b8b0b81759d0a',
|
||||
'info_dict': {
|
||||
'id': '754133',
|
||||
'ext': 'mp3',
|
||||
'title': 'CD 167 Extra',
|
||||
'uploader': 'Cognitive Dissonance Podcast',
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
},
|
||||
{
|
||||
'url': 'https://www.patreon.com/creation?hid=1682498',
|
||||
'info_dict': {
|
||||
'id': 'SU4fj_aEMVw',
|
||||
'ext': 'mp4',
|
||||
'title': 'I\'m on Patreon!',
|
||||
'uploader': 'TraciJHines',
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
'upload_date': '20150211',
|
||||
'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
|
||||
'uploader_id': 'TraciJHines',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
'skip_download': True,
|
||||
}
|
||||
'skip': 'Patron-only content',
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/creation?hid=1682498',
|
||||
'info_dict': {
|
||||
'id': 'SU4fj_aEMVw',
|
||||
'ext': 'mp4',
|
||||
'title': 'I\'m on Patreon!',
|
||||
'uploader': 'TraciJHines',
|
||||
'thumbnail': 're:^https?://.*$',
|
||||
'upload_date': '20150211',
|
||||
'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
|
||||
'uploader_id': 'TraciJHines',
|
||||
},
|
||||
'params': {
|
||||
'noplaylist': True,
|
||||
'skip_download': True,
|
||||
}
|
||||
]
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/posts/episode-166-of-743933',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.patreon.com/posts/743933',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
# Currently Patreon exposes download URL via hidden CSS, so login is not
|
||||
# needed. Keeping this commented for when this inevitably changes.
|
||||
@ -78,38 +89,48 @@ class PatreonIE(InfoExtractor):
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
title = self._og_search_title(webpage).strip()
|
||||
|
||||
attach_fn = self._html_search_regex(
|
||||
r'<div class="attach"><a target="_blank" href="([^"]+)">',
|
||||
webpage, 'attachment URL', default=None)
|
||||
embed = self._html_search_regex(
|
||||
r'<div[^>]+id="watchCreation"[^>]*>\s*<iframe[^>]+src="([^"]+)"',
|
||||
webpage, 'embedded URL', default=None)
|
||||
|
||||
if attach_fn is not None:
|
||||
video_url = 'http://www.patreon.com' + attach_fn
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
uploader = self._html_search_regex(
|
||||
r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
|
||||
elif embed is not None:
|
||||
return self.url_result(embed)
|
||||
else:
|
||||
playlist = self._parse_json(self._search_regex(
|
||||
r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
|
||||
webpage, 'playlist JSON'),
|
||||
video_id, transform_source=js_to_json)
|
||||
data = playlist[0]
|
||||
video_url = self._proto_relative_url(data['mp3'])
|
||||
thumbnail = self._proto_relative_url(data.get('cover'))
|
||||
uploader = data.get('artist')
|
||||
|
||||
return {
|
||||
post = self._download_json(
|
||||
'https://www.patreon.com/api/posts/' + video_id, video_id)
|
||||
attributes = post['data']['attributes']
|
||||
title = attributes['title'].strip()
|
||||
image = attributes.get('image') or {}
|
||||
info = {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'ext': 'mp3',
|
||||
'title': title,
|
||||
'uploader': uploader,
|
||||
'thumbnail': thumbnail,
|
||||
'description': clean_html(attributes.get('content')),
|
||||
'thumbnail': image.get('large_url') or image.get('url'),
|
||||
'timestamp': parse_iso8601(attributes.get('published_at')),
|
||||
'like_count': int_or_none(attributes.get('like_count')),
|
||||
'comment_count': int_or_none(attributes.get('comment_count')),
|
||||
}
|
||||
|
||||
def add_file(file_data):
|
||||
file_url = file_data.get('url')
|
||||
if file_url:
|
||||
info.update({
|
||||
'url': file_url,
|
||||
'ext': determine_ext(file_data.get('name'), 'mp3'),
|
||||
})
|
||||
|
||||
for i in post.get('included', []):
|
||||
i_type = i.get('type')
|
||||
if i_type == 'attachment':
|
||||
add_file(i.get('attributes') or {})
|
||||
elif i_type == 'user':
|
||||
user_attributes = i.get('attributes')
|
||||
if user_attributes:
|
||||
info.update({
|
||||
'uploader': user_attributes.get('full_name'),
|
||||
'uploader_url': user_attributes.get('url'),
|
||||
})
|
||||
|
||||
if not info.get('url'):
|
||||
add_file(attributes.get('post_file') or {})
|
||||
|
||||
if not info.get('url'):
|
||||
info.update({
|
||||
'_type': 'url',
|
||||
'url': attributes['embed']['url'],
|
||||
})
|
||||
|
||||
return info
|
||||
|
@ -2,31 +2,38 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
xpath_text,
|
||||
try_get,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class PhilharmonieDeParisIE(InfoExtractor):
|
||||
IE_DESC = 'Philharmonie de Paris'
|
||||
_VALID_URL = r'https?://live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)(?P<id>\d+)'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|misc/Playlist\.ashx\?id=)|
|
||||
pad\.philharmoniedeparis\.fr/doc/CIMU/
|
||||
)
|
||||
(?P<id>\d+)
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'http://pad.philharmoniedeparis.fr/doc/CIMU/1086697/jazz-a-la-villette-knower',
|
||||
'md5': 'a0a4b195f544645073631cbec166a2c2',
|
||||
'info_dict': {
|
||||
'id': '1086697',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jazz à la Villette : Knower',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://live.philharmoniedeparis.fr/concert/1032066.html',
|
||||
'info_dict': {
|
||||
'id': '1032066',
|
||||
'ext': 'flv',
|
||||
'title': 'md5:d1f5585d87d041d07ce9434804bc8425',
|
||||
'timestamp': 1428179400,
|
||||
'upload_date': '20150404',
|
||||
'duration': 6592.278,
|
||||
'title': 'md5:0a031b81807b3593cffa3c9a87a167a0',
|
||||
},
|
||||
'params': {
|
||||
# rtmp download
|
||||
'skip_download': True,
|
||||
}
|
||||
'playlist_mincount': 2,
|
||||
}, {
|
||||
'url': 'http://live.philharmoniedeparis.fr/Concert/1030324.html',
|
||||
'only_matching': True,
|
||||
@ -34,45 +41,60 @@ class PhilharmonieDeParisIE(InfoExtractor):
|
||||
'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_LIVE_URL = 'https://live.philharmoniedeparis.fr'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
concert = self._download_xml(
|
||||
'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=%s' % video_id,
|
||||
video_id).find('./concert')
|
||||
config = self._download_json(
|
||||
'%s/otoPlayer/config.ashx' % self._LIVE_URL, video_id, query={
|
||||
'id': video_id,
|
||||
'lang': 'fr-FR',
|
||||
})
|
||||
|
||||
formats = []
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': xpath_text(concert, './titre', 'title', fatal=True),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
fichiers = concert.find('./fichiers')
|
||||
stream = fichiers.attrib['serveurstream']
|
||||
for fichier in fichiers.findall('./fichier'):
|
||||
info_dict['duration'] = float_or_none(fichier.get('timecodefin'))
|
||||
for quality, (format_id, suffix) in enumerate([('lq', ''), ('hq', '_hd')]):
|
||||
format_url = fichier.get('url%s' % suffix)
|
||||
if not format_url:
|
||||
def extract_entry(source):
|
||||
if not isinstance(source, dict):
|
||||
return
|
||||
title = source.get('title')
|
||||
if not title:
|
||||
return
|
||||
files = source.get('files')
|
||||
if not isinstance(files, dict):
|
||||
return
|
||||
format_urls = set()
|
||||
formats = []
|
||||
for format_id in ('mobile', 'desktop'):
|
||||
format_url = try_get(
|
||||
files, lambda x: x[format_id]['file'], compat_str)
|
||||
if not format_url or format_url in format_urls:
|
||||
continue
|
||||
formats.append({
|
||||
'url': stream,
|
||||
'play_path': format_url,
|
||||
'ext': 'flv',
|
||||
'format_id': format_id,
|
||||
'width': int_or_none(concert.get('largeur%s' % suffix)),
|
||||
'height': int_or_none(concert.get('hauteur%s' % suffix)),
|
||||
'quality': quality,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
format_urls.add(format_url)
|
||||
m3u8_url = urljoin(self._LIVE_URL, format_url)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
if not formats:
|
||||
return
|
||||
self._sort_formats(formats)
|
||||
return {
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
date, hour = concert.get('date'), concert.get('heure')
|
||||
if date and hour:
|
||||
info_dict['timestamp'] = parse_iso8601(
|
||||
'%s-%s-%sT%s:00' % (date[0:4], date[4:6], date[6:8], hour))
|
||||
elif date:
|
||||
info_dict['upload_date'] = date
|
||||
thumbnail = urljoin(self._LIVE_URL, config.get('image'))
|
||||
|
||||
return info_dict
|
||||
info = extract_entry(config)
|
||||
if info:
|
||||
info.update({
|
||||
'id': video_id,
|
||||
'thumbnail': thumbnail,
|
||||
})
|
||||
return info
|
||||
|
||||
entries = []
|
||||
for num, chapter in enumerate(config['chapters'], start=1):
|
||||
entry = extract_entry(chapter)
|
||||
entry['id'] = '%s-%d' % (video_id, num)
|
||||
entries.append(entry)
|
||||
|
||||
return self.playlist_result(entries, video_id, config.get('title'))
|
||||
|
@ -1,6 +1,7 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import time
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -15,7 +16,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class PicartoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
|
||||
_TEST = {
|
||||
'url': 'https://picarto.tv/Setz',
|
||||
'info_dict': {
|
||||
@ -33,20 +34,14 @@ class PicartoIE(InfoExtractor):
|
||||
return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel_id = self._match_id(url)
|
||||
stream_page = self._download_webpage(url, channel_id)
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
channel_id = mobj.group('id')
|
||||
|
||||
if '>This channel does not exist' in stream_page:
|
||||
raise ExtractorError(
|
||||
'Channel %s does not exist' % channel_id, expected=True)
|
||||
metadata = self._download_json(
|
||||
'https://api.picarto.tv/v1/channel/name/' + channel_id,
|
||||
channel_id)
|
||||
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page,
|
||||
'player settings'),
|
||||
channel_id, transform_source=js_to_json)
|
||||
|
||||
if player.get('online') is False:
|
||||
if metadata.get('online') is False:
|
||||
raise ExtractorError('Stream is offline', expected=True)
|
||||
|
||||
cdn_data = self._download_json(
|
||||
@ -54,20 +49,13 @@ class PicartoIE(InfoExtractor):
|
||||
data=urlencode_postdata({'loadbalancinginfo': channel_id}),
|
||||
note='Downloading load balancing info')
|
||||
|
||||
def get_event(key):
|
||||
return try_get(player, lambda x: x['event'][key], compat_str) or ''
|
||||
|
||||
token = mobj.group('token') or 'public'
|
||||
params = {
|
||||
'token': player.get('token') or '',
|
||||
'ticket': get_event('ticket'),
|
||||
'con': int(time.time() * 1000),
|
||||
'type': get_event('ticket'),
|
||||
'scope': get_event('scope'),
|
||||
'token': token,
|
||||
}
|
||||
|
||||
prefered_edge = cdn_data.get('preferedEdge')
|
||||
default_tech = player.get('defaultTech')
|
||||
|
||||
formats = []
|
||||
|
||||
for edge in cdn_data['edges']:
|
||||
@ -81,8 +69,6 @@ class PicartoIE(InfoExtractor):
|
||||
preference = 0
|
||||
if edge_id == prefered_edge:
|
||||
preference += 1
|
||||
if tech_type == default_tech:
|
||||
preference += 1
|
||||
format_id = []
|
||||
if edge_id:
|
||||
format_id.append(edge_id)
|
||||
@ -109,7 +95,7 @@ class PicartoIE(InfoExtractor):
|
||||
continue
|
||||
self._sort_formats(formats)
|
||||
|
||||
mature = player.get('mature')
|
||||
mature = metadata.get('adult')
|
||||
if mature is None:
|
||||
age_limit = None
|
||||
else:
|
||||
@ -117,9 +103,11 @@ class PicartoIE(InfoExtractor):
|
||||
|
||||
return {
|
||||
'id': channel_id,
|
||||
'title': self._live_title(channel_id),
|
||||
'title': self._live_title(metadata.get('title') or channel_id),
|
||||
'is_live': True,
|
||||
'thumbnail': player.get('vodThumb'),
|
||||
'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']),
|
||||
'channel': channel_id,
|
||||
'channel_url': 'https://picarto.tv/%s' % channel_id,
|
||||
'age_limit': age_limit,
|
||||
'formats': formats,
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ import collections
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import (
|
||||
@ -196,7 +197,10 @@ query viewClip {
|
||||
if error:
|
||||
raise ExtractorError('Unable to login: %s' % error, expected=True)
|
||||
|
||||
if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
|
||||
if all(not re.search(p, response) for p in (
|
||||
r'__INITIAL_STATE__', r'["\']currentUser["\']',
|
||||
# new layout?
|
||||
r'>\s*Sign out\s*<')):
|
||||
BLOCKED = 'Your account has been blocked due to suspicious activity'
|
||||
if BLOCKED in response:
|
||||
raise ExtractorError(
|
||||
@ -210,18 +214,26 @@ query viewClip {
|
||||
|
||||
raise ExtractorError('Unable to log in')
|
||||
|
||||
def _get_subtitles(self, author, clip_idx, lang, name, duration, video_id):
|
||||
captions_post = {
|
||||
'a': author,
|
||||
'cn': clip_idx,
|
||||
'lc': lang,
|
||||
'm': name,
|
||||
}
|
||||
captions = self._download_json(
|
||||
'%s/player/retrieve-captions' % self._API_BASE, video_id,
|
||||
'Downloading captions JSON', 'Unable to download captions JSON',
|
||||
fatal=False, data=json.dumps(captions_post).encode('utf-8'),
|
||||
headers={'Content-Type': 'application/json;charset=utf-8'})
|
||||
def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_id):
|
||||
captions = None
|
||||
if clip_id:
|
||||
captions = self._download_json(
|
||||
'%s/transcript/api/v1/caption/json/%s/%s'
|
||||
% (self._API_BASE, clip_id, lang), video_id,
|
||||
'Downloading captions JSON', 'Unable to download captions JSON',
|
||||
fatal=False)
|
||||
if not captions:
|
||||
captions_post = {
|
||||
'a': author,
|
||||
'cn': int(clip_idx),
|
||||
'lc': lang,
|
||||
'm': name,
|
||||
}
|
||||
captions = self._download_json(
|
||||
'%s/player/retrieve-captions' % self._API_BASE, video_id,
|
||||
'Downloading captions JSON', 'Unable to download captions JSON',
|
||||
fatal=False, data=json.dumps(captions_post).encode('utf-8'),
|
||||
headers={'Content-Type': 'application/json;charset=utf-8'})
|
||||
if captions:
|
||||
return {
|
||||
lang: [{
|
||||
@ -413,7 +425,7 @@ query viewClip {
|
||||
|
||||
# TODO: other languages?
|
||||
subtitles = self.extract_subtitles(
|
||||
author, clip_idx, 'en', name, duration, display_id)
|
||||
author, clip_idx, clip.get('clipId'), 'en', name, duration, display_id)
|
||||
|
||||
return {
|
||||
'id': clip_id,
|
||||
|
@ -58,8 +58,6 @@ class PopcornTVIE(InfoExtractor):
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
timestamp = unified_timestamp(self._html_search_meta(
|
||||
'uploadDate', webpage, 'timestamp'))
|
||||
print(self._html_search_meta(
|
||||
'duration', webpage))
|
||||
duration = int_or_none(self._html_search_meta(
|
||||
'duration', webpage), invscale=60)
|
||||
view_count = int_or_none(self._html_search_meta(
|
||||
|
@ -27,7 +27,7 @@ class PornHubIE(InfoExtractor):
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?:
|
||||
(?:[^/]+\.)?pornhub\.com/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||
(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||
(?:www\.)?thumbzilla\.com/video/
|
||||
)
|
||||
(?P<id>[\da-z]+)
|
||||
@ -40,6 +40,7 @@ class PornHubIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
|
||||
'uploader': 'Babes',
|
||||
'upload_date': '20130628',
|
||||
'duration': 361,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
@ -57,6 +58,7 @@ class PornHubIE(InfoExtractor):
|
||||
'ext': 'mp4',
|
||||
'title': '重庆婷婷女王足交',
|
||||
'uploader': 'Unknown',
|
||||
'upload_date': '20150213',
|
||||
'duration': 1753,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
@ -119,6 +121,9 @@ class PornHubIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
@ -237,8 +242,14 @@ class PornHubIE(InfoExtractor):
|
||||
video_urls.append((video_url, None))
|
||||
video_urls_set.add(video_url)
|
||||
|
||||
upload_date = None
|
||||
formats = []
|
||||
for video_url, height in video_urls:
|
||||
if not upload_date:
|
||||
upload_date = self._search_regex(
|
||||
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
|
||||
if upload_date:
|
||||
upload_date = upload_date.replace('/', '')
|
||||
tbr = None
|
||||
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
|
||||
if mobj:
|
||||
@ -254,7 +265,7 @@ class PornHubIE(InfoExtractor):
|
||||
self._sort_formats(formats)
|
||||
|
||||
video_uploader = self._html_search_regex(
|
||||
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:user|channel)s/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||
webpage, 'uploader', fatal=False)
|
||||
|
||||
view_count = self._extract_count(
|
||||
@ -278,6 +289,7 @@ class PornHubIE(InfoExtractor):
|
||||
return {
|
||||
'id': video_id,
|
||||
'uploader': video_uploader,
|
||||
'upload_date': upload_date,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
@ -331,7 +343,7 @@ class PornHubPlaylistBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/playlist/(?P<id>\d+)'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.(?:com|net)/playlist/(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/playlist/4667351',
|
||||
'info_dict': {
|
||||
@ -346,7 +358,7 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
|
||||
|
||||
|
||||
class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.com/(?:user|channel)s/(?P<id>[^/]+)/videos'
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
|
||||
'info_dict': {
|
||||
@ -378,6 +390,12 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
|
||||
}, {
|
||||
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/model/jayndrea/videos/upload',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -4,8 +4,11 @@ import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
unified_strdate,
|
||||
parse_resolution,
|
||||
str_to_int,
|
||||
unified_strdate,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
@ -29,13 +32,26 @@ class RadioJavanIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
download_host = self._download_json(
|
||||
'https://www.radiojavan.com/videos/video_host', video_id,
|
||||
data=urlencode_postdata({'id': video_id}),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'Referer': url,
|
||||
}).get('host', 'https://host1.rjmusicmedia.com')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
formats = [{
|
||||
'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path,
|
||||
'format_id': '%sp' % height,
|
||||
'height': int(height),
|
||||
} for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]
|
||||
formats = []
|
||||
for format_id, _, video_path in re.findall(
|
||||
r'RJ\.video(?P<format_id>\d+[pPkK])\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2',
|
||||
webpage):
|
||||
f = parse_resolution(format_id)
|
||||
f.update({
|
||||
'url': urljoin(download_host, video_path),
|
||||
'format_id': format_id,
|
||||
})
|
||||
formats.append(f)
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._og_search_title(webpage)
|
||||
|
@ -274,7 +274,6 @@ class RaiPlayPlaylistIE(InfoExtractor):
|
||||
('programma', 'nomeProgramma'), webpage, 'title')
|
||||
description = unescapeHTML(self._html_search_meta(
|
||||
('description', 'og:description'), webpage, 'description'))
|
||||
print(description)
|
||||
|
||||
entries = []
|
||||
for mobj in re.finditer(
|
||||
|
@ -8,7 +8,10 @@ from ..compat import compat_HTTPError
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
@ -17,65 +20,87 @@ class RteBaseIE(InfoExtractor):
|
||||
def _real_extract(self, url):
|
||||
item_id = self._match_id(url)
|
||||
|
||||
try:
|
||||
json_string = self._download_json(
|
||||
'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id,
|
||||
item_id)
|
||||
except ExtractorError as ee:
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
||||
error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
|
||||
if error_info:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error_info['message']),
|
||||
expected=True)
|
||||
raise
|
||||
|
||||
# NB the string values in the JSON are stored using XML escaping(!)
|
||||
show = json_string['shows'][0]
|
||||
title = unescapeHTML(show['title'])
|
||||
description = unescapeHTML(show.get('description'))
|
||||
thumbnail = show.get('thumbnail')
|
||||
duration = float_or_none(show.get('duration'), 1000)
|
||||
timestamp = parse_iso8601(show.get('published'))
|
||||
|
||||
mg = show['media:group'][0]
|
||||
|
||||
info_dict = {}
|
||||
formats = []
|
||||
|
||||
if mg.get('url'):
|
||||
m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
|
||||
if m:
|
||||
m = m.groupdict()
|
||||
formats.append({
|
||||
'url': m['url'] + '/' + m['app'],
|
||||
'app': m['app'],
|
||||
'play_path': m['playpath'],
|
||||
'player_url': url,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp',
|
||||
})
|
||||
ENDPOINTS = (
|
||||
'https://feeds.rasset.ie/rteavgen/player/playlist?type=iptv&format=json&showId=',
|
||||
'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=',
|
||||
)
|
||||
|
||||
if mg.get('hls_server') and mg.get('hls_url'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
mg['hls_server'] + mg['hls_url'], item_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
for num, ep_url in enumerate(ENDPOINTS, start=1):
|
||||
try:
|
||||
data = self._download_json(ep_url + item_id, item_id)
|
||||
except ExtractorError as ee:
|
||||
if num < len(ENDPOINTS) or formats:
|
||||
continue
|
||||
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404:
|
||||
error_info = self._parse_json(ee.cause.read().decode(), item_id, fatal=False)
|
||||
if error_info:
|
||||
raise ExtractorError(
|
||||
'%s said: %s' % (self.IE_NAME, error_info['message']),
|
||||
expected=True)
|
||||
raise
|
||||
|
||||
if mg.get('hds_server') and mg.get('hds_url'):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
mg['hds_server'] + mg['hds_url'], item_id,
|
||||
f4m_id='hds', fatal=False))
|
||||
# NB the string values in the JSON are stored using XML escaping(!)
|
||||
show = try_get(data, lambda x: x['shows'][0], dict)
|
||||
if not show:
|
||||
continue
|
||||
|
||||
if not info_dict:
|
||||
title = unescapeHTML(show['title'])
|
||||
description = unescapeHTML(show.get('description'))
|
||||
thumbnail = show.get('thumbnail')
|
||||
duration = float_or_none(show.get('duration'), 1000)
|
||||
timestamp = parse_iso8601(show.get('published'))
|
||||
info_dict = {
|
||||
'id': item_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
}
|
||||
|
||||
mg = try_get(show, lambda x: x['media:group'][0], dict)
|
||||
if not mg:
|
||||
continue
|
||||
|
||||
if mg.get('url'):
|
||||
m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url'])
|
||||
if m:
|
||||
m = m.groupdict()
|
||||
formats.append({
|
||||
'url': m['url'] + '/' + m['app'],
|
||||
'app': m['app'],
|
||||
'play_path': m['playpath'],
|
||||
'player_url': url,
|
||||
'ext': 'flv',
|
||||
'format_id': 'rtmp',
|
||||
})
|
||||
|
||||
if mg.get('hls_server') and mg.get('hls_url'):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
mg['hls_server'] + mg['hls_url'], item_id, 'mp4',
|
||||
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
if mg.get('hds_server') and mg.get('hds_url'):
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
mg['hds_server'] + mg['hds_url'], item_id,
|
||||
f4m_id='hds', fatal=False))
|
||||
|
||||
mg_rte_server = str_or_none(mg.get('rte:server'))
|
||||
mg_url = str_or_none(mg.get('url'))
|
||||
if mg_rte_server and mg_url:
|
||||
hds_url = url_or_none(mg_rte_server + mg_url)
|
||||
if hds_url:
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
hds_url, item_id, f4m_id='hds', fatal=False))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': item_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': timestamp,
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
}
|
||||
info_dict['formats'] = formats
|
||||
return info_dict
|
||||
|
||||
|
||||
class RteIE(RteBaseIE):
|
||||
|
@ -103,7 +103,8 @@ class RutubeIE(RutubeBaseIE):
|
||||
|
||||
options = self._download_json(
|
||||
'http://rutube.ru/api/play/options/%s/?format=json' % video_id,
|
||||
video_id, 'Downloading options JSON')
|
||||
video_id, 'Downloading options JSON',
|
||||
headers=self.geo_verification_headers())
|
||||
|
||||
formats = []
|
||||
for format_id, format_url in options['video_balancer'].items():
|
||||
|
@ -65,7 +65,8 @@ class RuutuIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
video_xml = self._download_xml(
|
||||
'http://gatling.ruutu.fi/media-xml-cache?id=%s' % video_id, video_id)
|
||||
'https://gatling.nelonenmedia.fi/media-xml-cache', video_id,
|
||||
query={'id': video_id})
|
||||
|
||||
formats = []
|
||||
processed_urls = []
|
||||
|
@ -90,6 +90,15 @@ class ScreencastIE(InfoExtractor):
|
||||
r'src=(.*?)(?:$|&)', video_meta,
|
||||
'meta tag video URL', default=None)
|
||||
|
||||
if video_url is None:
|
||||
video_url = self._html_search_regex(
|
||||
r'MediaContentUrl["\']\s*:(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'video url', default=None, group='url')
|
||||
|
||||
if video_url is None:
|
||||
video_url = self._html_search_meta(
|
||||
'og:video', webpage, default=None)
|
||||
|
||||
if video_url is None:
|
||||
raise ExtractorError('Cannot find video')
|
||||
|
||||
|
@ -164,6 +164,6 @@ class SeznamZpravyArticleIE(InfoExtractor):
|
||||
description = info.get('description') or self._og_search_description(webpage)
|
||||
|
||||
return self.playlist_result([
|
||||
self.url_result(url, ie=SeznamZpravyIE.ie_key())
|
||||
for url in SeznamZpravyIE._extract_urls(webpage)],
|
||||
self.url_result(entry_url, ie=SeznamZpravyIE.ie_key())
|
||||
for entry_url in SeznamZpravyIE._extract_urls(webpage)],
|
||||
article_id, title, description)
|
||||
|
@ -5,6 +5,7 @@ from ..compat import compat_b64decode
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
@ -86,9 +87,16 @@ class VivoIE(SharedBaseIE):
|
||||
}
|
||||
|
||||
def _extract_video_url(self, webpage, video_id, *args):
|
||||
def decode_url(encoded_url):
|
||||
return compat_b64decode(encoded_url).decode('utf-8')
|
||||
|
||||
stream_url = url_or_none(decode_url(self._search_regex(
|
||||
r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'stream url', default=None, group='url')))
|
||||
if stream_url:
|
||||
return stream_url
|
||||
return self._parse_json(
|
||||
self._search_regex(
|
||||
r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
webpage, 'stream', group='url'),
|
||||
video_id,
|
||||
transform_source=lambda x: compat_b64decode(x).decode('utf-8'))[0]
|
||||
video_id, transform_source=decode_url)[0]
|
||||
|
@ -19,7 +19,7 @@ from ..utils import (
|
||||
|
||||
class SixPlayIE(InfoExtractor):
|
||||
IE_NAME = '6play'
|
||||
_VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay.be)/.+?-c_)(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'(?:6play:|https?://(?:www\.)?(?P<domain>6play\.fr|rtlplay\.be|play\.rtl\.hr)/.+?-c_)(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.6play.fr/minute-par-minute-p_9533/le-but-qui-a-marque-lhistoire-du-football-francais-c_12041051',
|
||||
'md5': '31fcd112637baa0c2ab92c4fcd8baf27',
|
||||
@ -32,6 +32,9 @@ class SixPlayIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://www.rtlplay.be/rtl-info-13h-p_8551/les-titres-du-rtlinfo-13h-c_12045869',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://play.rtl.hr/pj-masks-p_9455/epizoda-34-sezona-1-catboyevo-cudo-na-dva-kotaca-c_11984989',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -39,6 +42,7 @@ class SixPlayIE(InfoExtractor):
|
||||
service, consumer_name = {
|
||||
'6play.fr': ('6play', 'm6web'),
|
||||
'rtlplay.be': ('rtlbe_rtl_play', 'rtlbe'),
|
||||
'play.rtl.hr': ('rtlhr_rtl_play', 'rtlhr'),
|
||||
}.get(domain, ('6play', 'm6web'))
|
||||
|
||||
data = self._download_json(
|
||||
@ -60,7 +64,7 @@ class SixPlayIE(InfoExtractor):
|
||||
for asset in clip_data['assets']:
|
||||
asset_url = asset.get('full_physical_path')
|
||||
protocol = asset.get('protocol')
|
||||
if not asset_url or protocol == 'primetime' or asset_url in urls:
|
||||
if not asset_url or protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264' or asset_url in urls:
|
||||
continue
|
||||
urls.append(asset_url)
|
||||
container = asset.get('video_container')
|
||||
@ -77,19 +81,17 @@ class SixPlayIE(InfoExtractor):
|
||||
if not urlh:
|
||||
continue
|
||||
asset_url = urlh.geturl()
|
||||
asset_url = re.sub(r'/([^/]+)\.ism/[^/]*\.m3u8', r'/\1.ism/\1.m3u8', asset_url)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
asset_url.replace('.m3u8', '.f4m'),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
asset_url.replace('.m3u8', '.mpd'),
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
formats.extend(self._extract_ism_formats(
|
||||
re.sub(r'/[^/]+\.m3u8', '/Manifest', asset_url),
|
||||
video_id, ism_id='mss', fatal=False))
|
||||
for i in range(3, 0, -1):
|
||||
asset_url = asset_url = asset_url.replace('_sd1/', '_sd%d/' % i)
|
||||
m3u8_formats = self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False)
|
||||
formats.extend(m3u8_formats)
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
asset_url.replace('.m3u8', '.mpd'),
|
||||
video_id, mpd_id='dash', fatal=False))
|
||||
if m3u8_formats:
|
||||
break
|
||||
else:
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
asset_url, video_id, 'mp4', 'm3u8_native',
|
||||
|
@ -8,6 +8,7 @@ from ..utils import ExtractorError
|
||||
class SlidesLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
# video_service_name = YOUTUBE
|
||||
'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
|
||||
'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f',
|
||||
'info_dict': {
|
||||
@ -19,14 +20,18 @@ class SlidesLiveIE(InfoExtractor):
|
||||
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
|
||||
'upload_date': '20170925',
|
||||
}
|
||||
}, {
|
||||
# video_service_name = youtube
|
||||
'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
url, video_id, headers={'Accept': 'application/json'})
|
||||
service_name = video_data['video_service_name']
|
||||
if service_name == 'YOUTUBE':
|
||||
service_name = video_data['video_service_name'].lower()
|
||||
if service_name == 'youtube':
|
||||
yt_video_id = video_data['video_service_id']
|
||||
return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id)
|
||||
else:
|
||||
|
@ -44,3 +44,10 @@ class ParamountNetworkIE(MTVServicesInfoExtractor):
|
||||
|
||||
_FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/'
|
||||
_GEO_COUNTRIES = ['US']
|
||||
|
||||
def _extract_mgid(self, webpage):
|
||||
cs = self._parse_json(self._search_regex(
|
||||
r'window\.__DATA__\s*=\s*({.+})',
|
||||
webpage, 'data'), None)['children']
|
||||
c = next(c for c in cs if c.get('type') == 'VideoPlayer')
|
||||
return c['props']['media']['video']['config']['uri']
|
||||
|
@ -8,20 +8,24 @@ from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
merge_dicts,
|
||||
)
|
||||
|
||||
|
||||
class SportBoxEmbedIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://news\.sportbox\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
|
||||
class SportBoxIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:news\.sportbox|matchtv)\.ru/vdl/player(?:/[^/]+/|\?.*?\bn?id=)(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://news.sportbox.ru/vdl/player/ci/211355',
|
||||
'info_dict': {
|
||||
'id': '211355',
|
||||
'id': '109158',
|
||||
'ext': 'mp4',
|
||||
'title': '211355',
|
||||
'title': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
|
||||
'description': 'В Новороссийске прошел детский турнир «Поле славы боевой»',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'duration': 292,
|
||||
'view_count': int,
|
||||
'timestamp': 1426237001,
|
||||
'upload_date': '20150313',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@ -33,12 +37,18 @@ class SportBoxEmbedIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://news.sportbox.ru/vdl/player/media/193095',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://news.sportbox.ru/vdl/player/media/109158',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://matchtv.ru/vdl/player/media/109158',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+src="(https?://news\.sportbox\.ru/vdl/player[^"]+)"',
|
||||
r'<iframe[^>]+src="(https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -46,13 +56,14 @@ class SportBoxEmbedIE(InfoExtractor):
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
wjplayer_data = self._parse_json(
|
||||
sources = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)wjplayer\(({.+?})\);', webpage, 'wjplayer settings'),
|
||||
r'(?s)playerOptions\.sources(?:WithRes)?\s*=\s*(\[.+?\])\s*;\s*\n',
|
||||
webpage, 'sources'),
|
||||
video_id, transform_source=js_to_json)
|
||||
|
||||
formats = []
|
||||
for source in wjplayer_data['sources']:
|
||||
for source in sources:
|
||||
src = source.get('src')
|
||||
if not src:
|
||||
continue
|
||||
@ -66,14 +77,23 @@ class SportBoxEmbedIE(InfoExtractor):
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
player = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)playerOptions\s*=\s*({.+?})\s*;\s*\n', webpage,
|
||||
'player options', default='{}'),
|
||||
video_id, transform_source=js_to_json)
|
||||
media_id = player['mediaId']
|
||||
|
||||
info = self._search_json_ld(webpage, media_id, default={})
|
||||
|
||||
view_count = int_or_none(self._search_regex(
|
||||
r'Просмотров\s*:\s*(\d+)', webpage, 'view count', default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_id,
|
||||
'thumbnail': wjplayer_data.get('poster'),
|
||||
'duration': int_or_none(wjplayer_data.get('duration')),
|
||||
return merge_dicts(info, {
|
||||
'id': media_id,
|
||||
'title': self._og_search_title(webpage, default=None) or media_id,
|
||||
'thumbnail': player.get('poster'),
|
||||
'duration': int_or_none(player.get('duration')),
|
||||
'view_count': view_count,
|
||||
'formats': formats,
|
||||
}
|
||||
})
|
||||
|
@ -212,8 +212,6 @@ class TEDIE(InfoExtractor):
|
||||
|
||||
http_url = None
|
||||
for format_id, resources in resources_.items():
|
||||
if not isinstance(resources, dict):
|
||||
continue
|
||||
if format_id == 'h264':
|
||||
for resource in resources:
|
||||
h264_url = resource.get('file')
|
||||
@ -242,6 +240,8 @@ class TEDIE(InfoExtractor):
|
||||
'tbr': int_or_none(resource.get('bitrate')),
|
||||
})
|
||||
elif format_id == 'hls':
|
||||
if not isinstance(resources, dict):
|
||||
continue
|
||||
stream_url = url_or_none(resources.get('stream'))
|
||||
if not stream_url:
|
||||
continue
|
||||
|
44
youtube_dl/extractor/tele5.py
Normal file
44
youtube_dl/extractor/tele5.py
Normal file
@ -0,0 +1,44 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .nexx import NexxIE
|
||||
from ..compat import compat_urlparse
|
||||
|
||||
|
||||
class Tele5IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:mediathek|tv)/(?P<id>[^?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416',
|
||||
'info_dict': {
|
||||
'id': '1549416',
|
||||
'ext': 'mp4',
|
||||
'upload_date': '20180814',
|
||||
'timestamp': 1534290623,
|
||||
'title': 'Pandorum',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.tele5.de/tv/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.tele5.de/tv/dark-matter/videos',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||
video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0]
|
||||
|
||||
if not video_id:
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._html_search_regex(
|
||||
r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](\d+)',
|
||||
webpage, 'video id')
|
||||
|
||||
return self.url_result(
|
||||
'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id,
|
||||
ie=NexxIE.ie_key(), video_id=video_id)
|
@ -39,9 +39,17 @@ class ThePlatformBaseIE(OnceIE):
|
||||
smil_url, video_id, note=note, query={'format': 'SMIL'},
|
||||
headers=self.geo_verification_headers())
|
||||
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
|
||||
if error_element is not None and error_element.attrib['src'].startswith(
|
||||
'http://link.theplatform.%s/s/errorFiles/Unavailable.' % self._TP_TLD):
|
||||
raise ExtractorError(error_element.attrib['abstract'], expected=True)
|
||||
if error_element is not None:
|
||||
exception = find_xpath_attr(
|
||||
error_element, _x('.//smil:param'), 'name', 'exception')
|
||||
if exception is not None:
|
||||
if exception.get('value') == 'GeoLocationBlocked':
|
||||
self.raise_geo_restricted(error_element.attrib['abstract'])
|
||||
elif error_element.attrib['src'].startswith(
|
||||
'http://link.theplatform.%s/s/errorFiles/Unavailable.'
|
||||
% self._TP_TLD):
|
||||
raise ExtractorError(
|
||||
error_element.attrib['abstract'], expected=True)
|
||||
|
||||
smil_formats = self._parse_smil_formats(
|
||||
meta, smil_url, video_id, namespace=default_ns,
|
||||
|
@ -18,8 +18,9 @@ from ..utils import (
|
||||
class TNAFlixNetworkBaseIE(InfoExtractor):
|
||||
# May be overridden in descendants if necessary
|
||||
_CONFIG_REGEX = [
|
||||
r'flashvars\.config\s*=\s*escape\("([^"]+)"',
|
||||
r'<input[^>]+name="config\d?" value="([^"]+)"',
|
||||
r'flashvars\.config\s*=\s*escape\("(?P<url>[^"]+)"',
|
||||
r'<input[^>]+name="config\d?" value="(?P<url>[^"]+)"',
|
||||
r'config\s*=\s*(["\'])(?P<url>(?:https?:)?//(?:(?!\1).)+)\1',
|
||||
]
|
||||
_HOST = 'tna'
|
||||
_VKEY_SUFFIX = ''
|
||||
@ -85,7 +86,8 @@ class TNAFlixNetworkBaseIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
cfg_url = self._proto_relative_url(self._html_search_regex(
|
||||
self._CONFIG_REGEX, webpage, 'flashvars.config', default=None), 'http:')
|
||||
self._CONFIG_REGEX, webpage, 'flashvars.config', default=None,
|
||||
group='url'), 'http:')
|
||||
|
||||
if not cfg_url:
|
||||
inputs = self._hidden_inputs(webpage)
|
||||
|
@ -45,7 +45,7 @@ class Tube8IE(KeezMoviesIE):
|
||||
r'videoTitle\s*=\s*"([^"]+)', webpage, 'title')
|
||||
|
||||
description = self._html_search_regex(
|
||||
r'>Description:</strong>\s*(.+?)\s*<', webpage, 'description', fatal=False)
|
||||
r'(?s)Description:</dt>\s*<dd>(.+?)</dd>', webpage, 'description', fatal=False)
|
||||
uploader = self._html_search_regex(
|
||||
r'<span class="username">\s*(.+?)\s*<',
|
||||
webpage, 'uploader', fatal=False)
|
||||
@ -55,19 +55,19 @@ class Tube8IE(KeezMoviesIE):
|
||||
dislike_count = int_or_none(self._search_regex(
|
||||
r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False))
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'<strong>Views: </strong>([\d,\.]+)\s*</li>',
|
||||
r'Views:\s*</dt>\s*<dd>([\d,\.]+)',
|
||||
webpage, 'view count', fatal=False))
|
||||
comment_count = str_to_int(self._search_regex(
|
||||
r'<span id="allCommentsCount">(\d+)</span>',
|
||||
webpage, 'comment count', fatal=False))
|
||||
|
||||
category = self._search_regex(
|
||||
r'Category:\s*</strong>\s*<a[^>]+href=[^>]+>([^<]+)',
|
||||
r'Category:\s*</dt>\s*<dd>\s*<a[^>]+href=[^>]+>([^<]+)',
|
||||
webpage, 'category', fatal=False)
|
||||
categories = [category] if category else None
|
||||
|
||||
tags_str = self._search_regex(
|
||||
r'(?s)Tags:\s*</strong>(.+?)</(?!a)',
|
||||
r'(?s)Tags:\s*</dt>\s*<dd>(.+?)</(?!a)',
|
||||
webpage, 'tags', fatal=False)
|
||||
tags = [t for t in re.findall(
|
||||
r'<a[^>]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None
|
||||
|
@ -1,34 +0,0 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class TV3IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?tv3\.co\.nz/(?P<id>[^/]+)/tabid/\d+/articleID/\d+/MCat/\d+/Default\.aspx'
|
||||
_TEST = {
|
||||
'url': 'http://www.tv3.co.nz/MOTORSPORT-SRS-SsangYong-Hampton-Downs-Round-3/tabid/3692/articleID/121615/MCat/2915/Default.aspx',
|
||||
'info_dict': {
|
||||
'id': '4659127992001',
|
||||
'ext': 'mp4',
|
||||
'title': 'CRC Motorsport: SRS SsangYong Hampton Downs Round 3 - S2015 Ep3',
|
||||
'description': 'SsangYong Racing Series returns for Round 3 with drivers from New Zealand and Australia taking to the grid at Hampton Downs raceway.',
|
||||
'uploader_id': '3812193411001',
|
||||
'upload_date': '20151213',
|
||||
'timestamp': 1449975272,
|
||||
},
|
||||
'expected_warnings': [
|
||||
'Failed to download MPD manifest'
|
||||
],
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/3812193411001/default_default/index.html?videoId=%s'
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
brightcove_id = self._search_regex(r'<param\s*name="@videoPlayer"\s*value="(\d+)"', webpage, 'brightcove id')
|
||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
@ -32,12 +32,12 @@ class TVPlayIE(InfoExtractor):
|
||||
https?://
|
||||
(?:www\.)?
|
||||
(?:
|
||||
tvplay(?:\.skaties)?\.lv/parraides|
|
||||
(?:tv3play|play\.tv3)\.lt/programos|
|
||||
tvplay(?:\.skaties)?\.lv(?:/parraides)?|
|
||||
(?:tv3play|play\.tv3)\.lt(?:/programos)?|
|
||||
tv3play(?:\.tv3)?\.ee/sisu|
|
||||
(?:tv(?:3|6|8|10)play|viafree)\.se/program|
|
||||
(?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer|
|
||||
play\.novatv\.bg/programi
|
||||
play\.nova(?:tv)?\.bg/programi
|
||||
)
|
||||
/(?:[^/]+/)+
|
||||
)
|
||||
@ -203,10 +203,18 @@ class TVPlayIE(InfoExtractor):
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://play.nova.bg/programi/zdravei-bulgariya/764300?autostart=true',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://tvplay.skaties.lv/vinas-melo-labak/418113/?autostart=true',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# views is null
|
||||
'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183',
|
||||
@ -288,6 +296,7 @@ class TVPlayIE(InfoExtractor):
|
||||
'url': m.group('url'),
|
||||
'app': m.group('app'),
|
||||
'play_path': m.group('playpath'),
|
||||
'preference': -1,
|
||||
})
|
||||
else:
|
||||
fmt.update({
|
||||
@ -447,3 +456,102 @@ class ViafreeIE(InfoExtractor):
|
||||
'skip_rtmp': True,
|
||||
}),
|
||||
ie=TVPlayIE.ie_key(), video_id=video_id)
|
||||
|
||||
|
||||
class TVPlayHomeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://tvplay\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/[^/]+/[^/?#&]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/',
|
||||
'info_dict': {
|
||||
'id': '366367',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aferistai',
|
||||
'description': 'Aferistai. Kalėdinė pasaka.',
|
||||
'series': 'Aferistai [N-7]',
|
||||
'season': '1 sezonas',
|
||||
'season_number': 1,
|
||||
'duration': 464,
|
||||
'timestamp': 1394209658,
|
||||
'upload_date': '20140307',
|
||||
'age_limit': 18,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': [TVPlayIE.ie_key()],
|
||||
}, {
|
||||
'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_id = self._search_regex(
|
||||
r'data-asset-id\s*=\s*["\'](\d{5,7})\b', webpage, 'video id',
|
||||
default=None)
|
||||
|
||||
if video_id:
|
||||
return self.url_result(
|
||||
'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'data-file\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'm3u8 url', group='url')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
title = self._search_regex(
|
||||
r'data-title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'title', default=None, group='value') or self._html_search_meta(
|
||||
'title', webpage, default=None) or self._og_search_title(
|
||||
webpage)
|
||||
|
||||
description = self._html_search_meta(
|
||||
'description', webpage,
|
||||
default=None) or self._og_search_description(webpage)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'data-image\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
||||
'thumbnail', default=None, group='url') or self._html_search_meta(
|
||||
'thumbnail', webpage, default=None) or self._og_search_thumbnail(
|
||||
webpage)
|
||||
|
||||
duration = int_or_none(self._search_regex(
|
||||
r'data-duration\s*=\s*["\'](\d+)', webpage, 'duration',
|
||||
fatal=False))
|
||||
|
||||
season = self._search_regex(
|
||||
(r'data-series-title\s*=\s*(["\'])[^/]+/(?P<value>(?:(?!\1).)+)\1',
|
||||
r'\bseason\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||
'season', default=None, group='value')
|
||||
season_number = int_or_none(self._search_regex(
|
||||
r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
|
||||
default=None))
|
||||
episode = self._search_regex(
|
||||
r'(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'episode',
|
||||
default=None, group='value')
|
||||
episode_number = int_or_none(self._search_regex(
|
||||
r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
|
||||
default=None))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'duration': duration,
|
||||
'season': season,
|
||||
'season_number': season_number,
|
||||
'episode': episode,
|
||||
'episode_number': episode_number,
|
||||
'formats': formats,
|
||||
}
|
||||
|
60
youtube_dl/extractor/twitcasting.py
Normal file
60
youtube_dl/extractor/twitcasting.py
Normal file
@ -0,0 +1,60 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
import re
|
||||
|
||||
|
||||
class TwitCastingIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'https://twitcasting.tv/ivetesangalo/movie/2357609',
|
||||
'md5': '745243cad58c4681dc752490f7540d7f',
|
||||
'info_dict': {
|
||||
'id': '2357609',
|
||||
'ext': 'mp4',
|
||||
'title': 'Recorded Live #2357609',
|
||||
'uploader_id': 'ivetesangalo',
|
||||
'description': "Moi! I'm live on TwitCasting from my iPhone.",
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
uploader_id = mobj.group('uploader_id')
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)</',
|
||||
webpage, 'title', default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, fatal=True)
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
(r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
||||
r'(["\'])(?P<url>http.+?\.m3u8.*?)\1'),
|
||||
webpage, 'm3u8 url', group='url')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:description', webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'uploader_id': uploader_id,
|
||||
'formats': formats,
|
||||
}
|
@ -51,7 +51,9 @@ class TwitchBaseIE(InfoExtractor):
|
||||
expected=True)
|
||||
|
||||
def _call_api(self, path, item_id, *args, **kwargs):
|
||||
kwargs.setdefault('headers', {})['Client-ID'] = self._CLIENT_ID
|
||||
headers = kwargs.get('headers', {}).copy()
|
||||
headers['Client-ID'] = self._CLIENT_ID
|
||||
kwargs['headers'] = headers
|
||||
response = self._download_json(
|
||||
'%s/%s' % (self._API_BASE, path), item_id,
|
||||
*args, **compat_kwargs(kwargs))
|
||||
@ -559,7 +561,8 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
TwitchAllVideosIE,
|
||||
TwitchUploadsIE,
|
||||
TwitchPastBroadcastsIE,
|
||||
TwitchHighlightsIE))
|
||||
TwitchHighlightsIE,
|
||||
TwitchClipsIE))
|
||||
else super(TwitchStreamIE, cls).suitable(url))
|
||||
|
||||
def _real_extract(self, url):
|
||||
@ -633,7 +636,7 @@ class TwitchStreamIE(TwitchBaseIE):
|
||||
|
||||
class TwitchClipsIE(TwitchBaseIE):
|
||||
IE_NAME = 'twitch:clips'
|
||||
_VALID_URL = r'https?://clips\.twitch\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:[^/]+/)*|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
|
||||
@ -653,6 +656,9 @@ class TwitchClipsIE(TwitchBaseIE):
|
||||
# multiple formats
|
||||
'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -122,7 +122,9 @@ class UdemyIE(InfoExtractor):
|
||||
raise ExtractorError(error_str, expected=True)
|
||||
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
kwargs.setdefault('headers', {})['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4'
|
||||
headers = kwargs.get('headers', {}).copy()
|
||||
headers['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4'
|
||||
kwargs['headers'] = headers
|
||||
return super(UdemyIE, self)._download_webpage_handle(
|
||||
*args, **compat_kwargs(kwargs))
|
||||
|
||||
|
@ -13,7 +13,7 @@ from ..utils import (
|
||||
|
||||
|
||||
class VidziIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si|nu)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://vidzi.tv/cghql9yq6emu.html',
|
||||
'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
|
||||
@ -35,6 +35,9 @@ class VidziIE(InfoExtractor):
|
||||
}, {
|
||||
'url': 'https://vidzi.si/rph9gztxj1et.html',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vidzi.nu/cghql9yq6emu.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -130,16 +130,16 @@ class ViewsterIE(InfoExtractor):
|
||||
def concat(suffix, sep='-'):
|
||||
return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix
|
||||
|
||||
for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
|
||||
media = self._download_json(
|
||||
'https://public-api.viewster.com/movies/%s/video' % entry_id,
|
||||
video_id, 'Downloading %s JSON' % concat(media_type, ' '), fatal=False, query={
|
||||
'mediaType': media_type,
|
||||
'language': audio,
|
||||
'subtitle': subtitle,
|
||||
})
|
||||
if not media:
|
||||
continue
|
||||
medias = self._download_json(
|
||||
'https://public-api.viewster.com/movies/%s/videos' % entry_id,
|
||||
video_id, fatal=False, query={
|
||||
'mediaTypes': ['application/f4m+xml', 'application/x-mpegURL', 'video/mp4'],
|
||||
'language': audio,
|
||||
'subtitle': subtitle,
|
||||
})
|
||||
if not medias:
|
||||
continue
|
||||
for media in medias:
|
||||
video_url = media.get('Uri')
|
||||
if not video_url:
|
||||
continue
|
||||
|
@ -299,10 +299,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio',
|
||||
'uploader_id': 'atencio',
|
||||
'uploader': 'Peter Atencio',
|
||||
'channel_id': 'keypeele',
|
||||
'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/keypeele',
|
||||
'timestamp': 1380339469,
|
||||
'upload_date': '20130928',
|
||||
'duration': 187,
|
||||
},
|
||||
'expected_warnings': ['Unable to download JSON metadata'],
|
||||
},
|
||||
{
|
||||
'url': 'http://vimeo.com/76979871',
|
||||
@ -355,11 +358,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'url': 'https://vimeo.com/channels/tributes/6213729',
|
||||
'info_dict': {
|
||||
'id': '6213729',
|
||||
'ext': 'mov',
|
||||
'ext': 'mp4',
|
||||
'title': 'Vimeo Tribute: The Shining',
|
||||
'uploader': 'Casey Donahue',
|
||||
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue',
|
||||
'uploader_id': 'caseydonahue',
|
||||
'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/tributes',
|
||||
'channel_id': 'tributes',
|
||||
'timestamp': 1250886430,
|
||||
'upload_date': '20090821',
|
||||
'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
|
||||
@ -465,6 +470,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
if 'Referer' not in headers:
|
||||
headers['Referer'] = url
|
||||
|
||||
channel_id = self._search_regex(
|
||||
r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
|
||||
|
||||
# Extract ID from URL
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
@ -543,6 +551,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
else:
|
||||
config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
|
||||
config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;')
|
||||
config_re.append(r'\bconfig\s*=\s*({.+?})\s*;')
|
||||
config = self._search_regex(config_re, webpage, 'info section',
|
||||
flags=re.DOTALL)
|
||||
config = json.loads(config)
|
||||
@ -563,19 +572,23 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
if config.get('view') == 4:
|
||||
config = self._verify_player_video_password(redirect_url, video_id)
|
||||
|
||||
vod = config.get('video', {}).get('vod', {})
|
||||
|
||||
def is_rented():
|
||||
if '>You rented this title.<' in webpage:
|
||||
return True
|
||||
if config.get('user', {}).get('purchased'):
|
||||
return True
|
||||
label = try_get(
|
||||
config, lambda x: x['video']['vod']['purchase_options'][0]['label_string'], compat_str)
|
||||
if label and label.startswith('You rented this'):
|
||||
return True
|
||||
for purchase_option in vod.get('purchase_options', []):
|
||||
if purchase_option.get('purchased'):
|
||||
return True
|
||||
label = purchase_option.get('label_string')
|
||||
if label and (label.startswith('You rented this') or label.endswith(' remaining')):
|
||||
return True
|
||||
return False
|
||||
|
||||
if is_rented():
|
||||
feature_id = config.get('video', {}).get('vod', {}).get('feature_id')
|
||||
if is_rented() and vod.get('is_trailer'):
|
||||
feature_id = vod.get('feature_id')
|
||||
if feature_id and not data.get('force_feature_id', False):
|
||||
return self.url_result(smuggle_url(
|
||||
'https://player.vimeo.com/player/%s' % feature_id,
|
||||
@ -652,6 +665,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
|
||||
webpage, 'license', default=None, group='license')
|
||||
|
||||
channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
@ -662,6 +677,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||
'like_count': like_count,
|
||||
'comment_count': comment_count,
|
||||
'license': cc_license,
|
||||
'channel_id': channel_id,
|
||||
'channel_url': channel_url,
|
||||
}
|
||||
|
||||
info_dict = merge_dicts(info_dict, info_dict_config, json_ld)
|
||||
|
@ -293,8 +293,12 @@ class VKIE(VKBaseIE):
|
||||
# This video is no longer available, because its author has been blocked.
|
||||
'url': 'https://vk.com/video-10639516_456240611',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
# The video is not available in your region.
|
||||
'url': 'https://vk.com/video-51812607_171445436',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
@ -354,6 +358,9 @@ class VKIE(VKBaseIE):
|
||||
|
||||
r'<!>This video is no longer available, because it has been deleted.':
|
||||
'Video %s is no longer available, because it has been deleted.',
|
||||
|
||||
r'<!>The video .+? is not available in your region.':
|
||||
'Video %s is not available in your region.',
|
||||
}
|
||||
|
||||
for error_re, error_msg in ERRORS.items():
|
||||
|
@ -72,7 +72,7 @@ class VRVBaseIE(InfoExtractor):
|
||||
class VRVIE(VRVBaseIE):
|
||||
IE_NAME = 'vrv'
|
||||
_VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)'
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT',
|
||||
'info_dict': {
|
||||
'id': 'GR9PNZ396',
|
||||
@ -85,7 +85,34 @@ class VRVIE(VRVBaseIE):
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
}]
|
||||
|
||||
def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
|
||||
if not url or stream_format not in ('hls', 'dash'):
|
||||
return []
|
||||
assert audio_lang or hardsub_lang
|
||||
stream_id_list = []
|
||||
if audio_lang:
|
||||
stream_id_list.append('audio-%s' % audio_lang)
|
||||
if hardsub_lang:
|
||||
stream_id_list.append('hardsub-%s' % hardsub_lang)
|
||||
stream_id = '-'.join(stream_id_list)
|
||||
format_id = '%s-%s' % (stream_format, stream_id)
|
||||
if stream_format == 'hls':
|
||||
adaptive_formats = self._extract_m3u8_formats(
|
||||
url, video_id, 'mp4', m3u8_id=format_id,
|
||||
note='Downloading %s m3u8 information' % stream_id,
|
||||
fatal=False)
|
||||
elif stream_format == 'dash':
|
||||
adaptive_formats = self._extract_mpd_formats(
|
||||
url, video_id, mpd_id=format_id,
|
||||
note='Downloading %s MPD information' % stream_id,
|
||||
fatal=False)
|
||||
if audio_lang:
|
||||
for f in adaptive_formats:
|
||||
if f.get('acodec') != 'none':
|
||||
f['language'] = audio_lang
|
||||
return adaptive_formats
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
@ -115,26 +142,9 @@ class VRVIE(VRVBaseIE):
|
||||
for stream_type, streams in streams_json.get('streams', {}).items():
|
||||
if stream_type in ('adaptive_hls', 'adaptive_dash'):
|
||||
for stream in streams.values():
|
||||
stream_url = stream.get('url')
|
||||
if not stream_url:
|
||||
continue
|
||||
stream_id = stream.get('hardsub_locale') or audio_locale
|
||||
format_id = '%s-%s' % (stream_type.split('_')[1], stream_id)
|
||||
if stream_type == 'adaptive_hls':
|
||||
adaptive_formats = self._extract_m3u8_formats(
|
||||
stream_url, video_id, 'mp4', m3u8_id=format_id,
|
||||
note='Downloading %s m3u8 information' % stream_id,
|
||||
fatal=False)
|
||||
else:
|
||||
adaptive_formats = self._extract_mpd_formats(
|
||||
stream_url, video_id, mpd_id=format_id,
|
||||
note='Downloading %s MPD information' % stream_id,
|
||||
fatal=False)
|
||||
if audio_locale:
|
||||
for f in adaptive_formats:
|
||||
if f.get('acodec') != 'none':
|
||||
f['language'] = audio_locale
|
||||
formats.extend(adaptive_formats)
|
||||
formats.extend(self._extract_vrv_formats(
|
||||
stream.get('url'), video_id, stream_type.split('_')[1],
|
||||
audio_locale, stream.get('hardsub_locale')))
|
||||
self._sort_formats(formats)
|
||||
|
||||
subtitles = {}
|
||||
|
@ -4,15 +4,19 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
float_or_none,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class VzaarIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
# HTTP and HLS
|
||||
'url': 'https://vzaar.com/videos/1152805',
|
||||
'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
|
||||
'info_dict': {
|
||||
@ -40,24 +44,48 @@ class VzaarIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
|
||||
source_url = video_data['sourceUrl']
|
||||
|
||||
info = {
|
||||
title = video_data['videoTitle']
|
||||
|
||||
formats = []
|
||||
|
||||
source_url = url_or_none(video_data.get('sourceUrl'))
|
||||
if source_url:
|
||||
f = {
|
||||
'url': source_url,
|
||||
'format_id': 'http',
|
||||
}
|
||||
if 'audio' in source_url:
|
||||
f.update({
|
||||
'vcodec': 'none',
|
||||
'ext': 'mp3',
|
||||
})
|
||||
else:
|
||||
f.update({
|
||||
'width': int_or_none(video_data.get('width')),
|
||||
'height': int_or_none(video_data.get('height')),
|
||||
'ext': 'mp4',
|
||||
'fps': float_or_none(video_data.get('fps')),
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
video_guid = video_data.get('guid')
|
||||
usp = video_data.get('usp')
|
||||
if isinstance(video_guid, compat_str) and isinstance(usp, dict):
|
||||
m3u8_url = ('http://fable.vzaar.com/v4/usp/%s/%s.ism/.m3u8?'
|
||||
% (video_guid, video_id)) + '&'.join(
|
||||
'%s=%s' % (k, v) for k, v in usp.items())
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
|
||||
self._sort_formats(formats)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': video_data['videoTitle'],
|
||||
'url': source_url,
|
||||
'title': title,
|
||||
'thumbnail': self._proto_relative_url(video_data.get('poster')),
|
||||
'duration': float_or_none(video_data.get('videoDuration')),
|
||||
'timestamp': unified_timestamp(video_data.get('ts')),
|
||||
'formats': formats,
|
||||
}
|
||||
if 'audio' in source_url:
|
||||
info.update({
|
||||
'vcodec': 'none',
|
||||
'ext': 'mp3',
|
||||
})
|
||||
else:
|
||||
info.update({
|
||||
'width': int_or_none(video_data.get('width')),
|
||||
'height': int_or_none(video_data.get('height')),
|
||||
'ext': 'mp4',
|
||||
})
|
||||
return info
|
||||
|
@ -4,7 +4,10 @@ from __future__ import unicode_literals
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
)
|
||||
|
||||
|
||||
class WebOfStoriesIE(InfoExtractor):
|
||||
@ -133,8 +136,10 @@ class WebOfStoriesPlaylistIE(InfoExtractor):
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = [
|
||||
self.url_result('http://www.webofstories.com/play/%s' % video_number, 'WebOfStories')
|
||||
for video_number in set(re.findall(r'href="/playAll/%s\?sId=(\d+)"' % playlist_id, webpage))
|
||||
self.url_result(
|
||||
'http://www.webofstories.com/play/%s' % video_id,
|
||||
'WebOfStories', video_id=video_id)
|
||||
for video_id in orderedSet(re.findall(r'\bid=["\']td_(\d+)', webpage))
|
||||
]
|
||||
|
||||
title = self._search_regex(
|
||||
|
140
youtube_dl/extractor/wwe.py
Normal file
140
youtube_dl/extractor/wwe.py
Normal file
@ -0,0 +1,140 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class WWEBaseIE(InfoExtractor):
|
||||
_SUBTITLE_LANGS = {
|
||||
'English': 'en',
|
||||
'Deutsch': 'de',
|
||||
}
|
||||
|
||||
def _extract_entry(self, data, url, video_id=None):
|
||||
video_id = compat_str(video_id or data['nid'])
|
||||
title = data['title']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
data['file'], video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
|
||||
description = data.get('description')
|
||||
thumbnail = urljoin(url, data.get('image'))
|
||||
series = data.get('show_name')
|
||||
episode = data.get('episode_name')
|
||||
|
||||
subtitles = {}
|
||||
tracks = data.get('tracks')
|
||||
if isinstance(tracks, list):
|
||||
for track in tracks:
|
||||
if not isinstance(track, dict):
|
||||
continue
|
||||
if track.get('kind') != 'captions':
|
||||
continue
|
||||
track_file = url_or_none(track.get('file'))
|
||||
if not track_file:
|
||||
continue
|
||||
label = track.get('label')
|
||||
lang = self._SUBTITLE_LANGS.get(label, label) or 'en'
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'url': track_file,
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'thumbnail': thumbnail,
|
||||
'series': series,
|
||||
'episode': episode,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class WWEIE(WWEBaseIE):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*videos/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.wwe.com/videos/daniel-bryan-vs-andrade-cien-almas-smackdown-live-sept-4-2018',
|
||||
'md5': '92811c6a14bfc206f7a6a9c5d9140184',
|
||||
'info_dict': {
|
||||
'id': '40048199',
|
||||
'ext': 'mp4',
|
||||
'title': 'Daniel Bryan vs. Andrade "Cien" Almas: SmackDown LIVE, Sept. 4, 2018',
|
||||
'description': 'md5:2d7424dbc6755c61a0e649d2a8677f67',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://de.wwe.com/videos/gran-metalik-vs-tony-nese-wwe-205-live-sept-4-2018',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
landing = self._parse_json(
|
||||
self._html_search_regex(
|
||||
r'(?s)Drupal\.settings\s*,\s*({.+?})\s*\)\s*;',
|
||||
webpage, 'drupal settings'),
|
||||
display_id)['WWEVideoLanding']
|
||||
|
||||
data = landing['initialVideo']['playlist'][0]
|
||||
video_id = landing.get('initialVideoId')
|
||||
|
||||
info = self._extract_entry(data, url, video_id)
|
||||
info['display_id'] = display_id
|
||||
return info
|
||||
|
||||
|
||||
class WWEPlaylistIE(WWEBaseIE):
|
||||
_VALID_URL = r'https?://(?:[^/]+\.)?wwe\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.wwe.com/shows/raw/2018-11-12',
|
||||
'info_dict': {
|
||||
'id': '2018-11-12',
|
||||
},
|
||||
'playlist_mincount': 11,
|
||||
}, {
|
||||
'url': 'http://www.wwe.com/article/walk-the-prank-wwe-edition',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.wwe.com/shows/wwenxt/article/matt-riddle-interview',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if WWEIE.suitable(url) else super(WWEPlaylistIE, cls).suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
entries = []
|
||||
for mobj in re.finditer(
|
||||
r'data-video\s*=\s*(["\'])(?P<data>{.+?})\1', webpage):
|
||||
video = self._parse_json(
|
||||
mobj.group('data'), display_id, transform_source=unescapeHTML,
|
||||
fatal=False)
|
||||
if not video:
|
||||
continue
|
||||
data = try_get(video, lambda x: x['playlist'][0], dict)
|
||||
if not data:
|
||||
continue
|
||||
try:
|
||||
entry = self._extract_entry(data, url)
|
||||
except Exception:
|
||||
continue
|
||||
entry['extractor_key'] = WWEIE.ie_key()
|
||||
entries.append(entry)
|
||||
|
||||
return self.playlist_result(entries, display_id)
|
@ -23,7 +23,7 @@ class XFileShareIE(InfoExtractor):
|
||||
(r'powerwatch\.pw', 'PowerWatch'),
|
||||
(r'rapidvideo\.ws', 'Rapidvideo.ws'),
|
||||
(r'thevideobee\.to', 'TheVideoBee'),
|
||||
(r'vidto\.me', 'Vidto'),
|
||||
(r'vidto\.(?:me|se)', 'Vidto'),
|
||||
(r'streamin\.to', 'Streamin.To'),
|
||||
(r'xvidstage\.com', 'XVIDSTAGE'),
|
||||
(r'vidabc\.com', 'Vid ABC'),
|
||||
@ -115,7 +115,10 @@ class XFileShareIE(InfoExtractor):
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html',
|
||||
'only_matching': True
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://vidto.se/1tx1pf6t12cg.html',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
|
41
youtube_dl/extractor/yourporn.py
Normal file
41
youtube_dl/extractor/yourporn.py
Normal file
@ -0,0 +1,41 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import urljoin
|
||||
|
||||
|
||||
class YourPornIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?yourporn\.sexy/post/(?P<id>[^/?#&.]+)'
|
||||
_TEST = {
|
||||
'url': 'https://yourporn.sexy/post/57ffcb2e1179b.html',
|
||||
'md5': '6f8682b6464033d87acaa7a8ff0c092e',
|
||||
'info_dict': {
|
||||
'id': '57ffcb2e1179b',
|
||||
'ext': 'mp4',
|
||||
'title': 'md5:c9f43630bd968267672651ba905a7d35',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
video_url = urljoin(url, self._parse_json(
|
||||
self._search_regex(
|
||||
r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
|
||||
group='data'),
|
||||
video_id)[video_id])
|
||||
|
||||
title = (self._search_regex(
|
||||
r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
|
||||
default=None) or self._og_search_description(webpage)).strip()
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': video_url,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
}
|
@ -41,6 +41,7 @@ from ..utils import (
|
||||
remove_quotes,
|
||||
remove_start,
|
||||
smuggle_url,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
@ -64,7 +65,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
# If True it will raise an error if no login info is provided
|
||||
_LOGIN_REQUIRED = False
|
||||
|
||||
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
|
||||
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
|
||||
|
||||
def _set_language(self):
|
||||
self._set_cookie(
|
||||
@ -259,7 +260,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
return True
|
||||
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
kwargs.setdefault('query', {})['disable_polymer'] = 'true'
|
||||
query = kwargs.get('query', {}).copy()
|
||||
query['disable_polymer'] = 'true'
|
||||
kwargs['query'] = query
|
||||
return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
|
||||
*args, **compat_kwargs(kwargs))
|
||||
|
||||
@ -347,6 +350,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
(?:www\.)?hooktube\.com/|
|
||||
(?:www\.)?yourepeat\.com/|
|
||||
tube\.majestyc\.net/|
|
||||
(?:www\.)?invidio\.us/|
|
||||
youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
|
||||
(?:.*?\#/)? # handle anchor (#/) redirect urls
|
||||
(?: # the various things that can precede the ID:
|
||||
@ -490,12 +494,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': 'Philipp Hagemeister',
|
||||
'uploader_id': 'phihag',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
|
||||
'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
|
||||
'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
|
||||
'upload_date': '20121002',
|
||||
'license': 'Standard YouTube License',
|
||||
'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
|
||||
'categories': ['Science & Technology'],
|
||||
'tags': ['youtube-dl'],
|
||||
'duration': 10,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'start_time': 1,
|
||||
@ -578,6 +585,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'categories': ['Science & Technology'],
|
||||
'tags': ['youtube-dl'],
|
||||
'duration': 10,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
@ -1064,6 +1072,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://invidio.us/watch?v=BaW_jenozKc',
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
@ -1178,7 +1190,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
def _parse_sig_js(self, jscode):
|
||||
funcname = self._search_regex(
|
||||
(r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),
|
||||
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
||||
r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
|
||||
jscode, 'Initial JS player signature function name', group='sig')
|
||||
|
||||
jsi = JSInterpreter(jscode)
|
||||
@ -1527,6 +1542,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
def extract_view_count(v_info):
|
||||
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
|
||||
|
||||
player_response = {}
|
||||
|
||||
# Get video info
|
||||
embed_webpage = None
|
||||
if re.search(r'player-age-gate-content">', video_webpage) is not None:
|
||||
@ -1569,6 +1586,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if args.get('livestream') == '1' or args.get('live_playback') == 1:
|
||||
is_live = True
|
||||
sts = ytplayer_config.get('sts')
|
||||
if not player_response:
|
||||
pl_response = str_or_none(args.get('player_response'))
|
||||
if pl_response:
|
||||
pl_response = self._parse_json(pl_response, video_id, fatal=False)
|
||||
if isinstance(pl_response, dict):
|
||||
player_response = pl_response
|
||||
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||
# We also try looking in get_video_info since it may contain different dashmpd
|
||||
# URL that points to a DASH manifest with possibly different itag set (some itags
|
||||
@ -1597,6 +1620,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
if not video_info_webpage:
|
||||
continue
|
||||
get_video_info = compat_parse_qs(video_info_webpage)
|
||||
if not player_response:
|
||||
pl_response = get_video_info.get('player_response', [None])[0]
|
||||
if isinstance(pl_response, dict):
|
||||
player_response = pl_response
|
||||
add_dash_mpd(get_video_info)
|
||||
if view_count is None:
|
||||
view_count = extract_view_count(get_video_info)
|
||||
@ -1642,9 +1669,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'"token" parameter not in video info for unknown reason',
|
||||
video_id=video_id)
|
||||
|
||||
video_details = try_get(
|
||||
player_response, lambda x: x['videoDetails'], dict) or {}
|
||||
|
||||
# title
|
||||
if 'title' in video_info:
|
||||
video_title = video_info['title'][0]
|
||||
elif 'title' in player_response:
|
||||
video_title = video_details['title']
|
||||
else:
|
||||
self._downloader.report_warning('Unable to extract video title')
|
||||
video_title = '_'
|
||||
@ -1707,6 +1739,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
if view_count is None:
|
||||
view_count = extract_view_count(video_info)
|
||||
if view_count is None and video_details:
|
||||
view_count = int_or_none(video_details.get('viewCount'))
|
||||
|
||||
# Check for "rental" videos
|
||||
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
|
||||
@ -1887,7 +1921,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
|
||||
|
||||
# uploader
|
||||
video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
|
||||
video_uploader = try_get(
|
||||
video_info, lambda x: x['author'][0],
|
||||
compat_str) or str_or_none(video_details.get('author'))
|
||||
if video_uploader:
|
||||
video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
|
||||
else:
|
||||
@ -1905,6 +1941,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
else:
|
||||
self._downloader.report_warning('unable to extract uploader nickname')
|
||||
|
||||
channel_id = self._html_search_meta(
|
||||
'channelId', video_webpage, 'channel id')
|
||||
channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
|
||||
|
||||
# thumbnail image
|
||||
# We try first to get a high quality image:
|
||||
m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
|
||||
@ -1996,12 +2036,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
like_count = _extract_count('like')
|
||||
dislike_count = _extract_count('dislike')
|
||||
|
||||
if view_count is None:
|
||||
view_count = str_to_int(self._search_regex(
|
||||
r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
|
||||
'view count', default=None))
|
||||
|
||||
# subtitles
|
||||
video_subtitles = self.extract_subtitles(video_id, video_webpage)
|
||||
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
|
||||
|
||||
video_duration = try_get(
|
||||
video_info, lambda x: int_or_none(x['length_seconds'][0]))
|
||||
if not video_duration:
|
||||
video_duration = int_or_none(video_details.get('lengthSeconds'))
|
||||
if not video_duration:
|
||||
video_duration = parse_duration(self._html_search_meta(
|
||||
'duration', video_webpage, 'video duration'))
|
||||
@ -2076,6 +2123,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'uploader': video_uploader,
|
||||
'uploader_id': video_uploader_id,
|
||||
'uploader_url': video_uploader_url,
|
||||
'channel_id': channel_id,
|
||||
'channel_url': channel_url,
|
||||
'upload_date': upload_date,
|
||||
'license': video_license,
|
||||
'creator': video_creator or artist,
|
||||
@ -2114,7 +2163,11 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
(?:https?://)?
|
||||
(?:\w+\.)?
|
||||
(?:
|
||||
youtube\.com/
|
||||
(?:
|
||||
youtube\.com|
|
||||
invidio\.us
|
||||
)
|
||||
/
|
||||
(?:
|
||||
(?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
|
||||
\? (?:.*?[&;])*? (?:p|a|list)=
|
||||
@ -2123,7 +2176,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
|
||||
)
|
||||
(
|
||||
(?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,}
|
||||
(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
|
||||
# Top tracks, they can also include dots
|
||||
|(?:MC)[\w\.]*
|
||||
)
|
||||
@ -2227,6 +2280,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
|
||||
'categories': ['People & Blogs'],
|
||||
'tags': list,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
},
|
||||
@ -2261,6 +2315,13 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
}, {
|
||||
'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# music album playlist
|
||||
'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
@ -2403,7 +2464,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
|
||||
|
||||
class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
|
||||
IE_DESC = 'YouTube.com channels'
|
||||
_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'
|
||||
_VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
|
||||
_TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
|
||||
_VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
|
||||
IE_NAME = 'youtube:channel'
|
||||
@ -2424,6 +2485,9 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
|
||||
'id': 'UUs0ifCMCm1icqRbqhUINa0w',
|
||||
'title': 'Uploads from Deus Ex',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
|
@ -18,12 +18,12 @@ from ..utils import (
|
||||
)
|
||||
|
||||
|
||||
class ZattooBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'zattoo'
|
||||
_HOST_URL = 'https://zattoo.com'
|
||||
|
||||
class ZattooPlatformBaseIE(InfoExtractor):
|
||||
_power_guide_hash = None
|
||||
|
||||
def _host_url(self):
|
||||
return 'https://%s' % (self._API_HOST if hasattr(self, '_API_HOST') else self._HOST)
|
||||
|
||||
def _login(self):
|
||||
username, password = self._get_login_info()
|
||||
if not username or not password:
|
||||
@ -33,13 +33,13 @@ class ZattooBaseIE(InfoExtractor):
|
||||
|
||||
try:
|
||||
data = self._download_json(
|
||||
'%s/zapi/v2/account/login' % self._HOST_URL, None, 'Logging in',
|
||||
'%s/zapi/v2/account/login' % self._host_url(), None, 'Logging in',
|
||||
data=urlencode_postdata({
|
||||
'login': username,
|
||||
'password': password,
|
||||
'remember': 'true',
|
||||
}), headers={
|
||||
'Referer': '%s/login' % self._HOST_URL,
|
||||
'Referer': '%s/login' % self._host_url(),
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
})
|
||||
except ExtractorError as e:
|
||||
@ -53,7 +53,7 @@ class ZattooBaseIE(InfoExtractor):
|
||||
|
||||
def _real_initialize(self):
|
||||
webpage = self._download_webpage(
|
||||
self._HOST_URL, None, 'Downloading app token')
|
||||
self._host_url(), None, 'Downloading app token')
|
||||
app_token = self._html_search_regex(
|
||||
r'appToken\s*=\s*(["\'])(?P<token>(?:(?!\1).)+?)\1',
|
||||
webpage, 'app token', group='token')
|
||||
@ -62,7 +62,7 @@ class ZattooBaseIE(InfoExtractor):
|
||||
|
||||
# Will setup appropriate cookies
|
||||
self._request_webpage(
|
||||
'%s/zapi/v2/session/hello' % self._HOST_URL, None,
|
||||
'%s/zapi/v2/session/hello' % self._host_url(), None,
|
||||
'Opening session', data=urlencode_postdata({
|
||||
'client_app_token': app_token,
|
||||
'uuid': compat_str(uuid4()),
|
||||
@ -75,7 +75,7 @@ class ZattooBaseIE(InfoExtractor):
|
||||
|
||||
def _extract_cid(self, video_id, channel_name):
|
||||
channel_groups = self._download_json(
|
||||
'%s/zapi/v2/cached/channels/%s' % (self._HOST_URL,
|
||||
'%s/zapi/v2/cached/channels/%s' % (self._host_url(),
|
||||
self._power_guide_hash),
|
||||
video_id, 'Downloading channel list',
|
||||
query={'details': False})['channel_groups']
|
||||
@ -93,28 +93,30 @@ class ZattooBaseIE(InfoExtractor):
|
||||
|
||||
def _extract_cid_and_video_info(self, video_id):
|
||||
data = self._download_json(
|
||||
'%s/zapi/program/details' % self._HOST_URL,
|
||||
'%s/zapi/v2/cached/program/power_details/%s' % (
|
||||
self._host_url(), self._power_guide_hash),
|
||||
video_id,
|
||||
'Downloading video information',
|
||||
query={
|
||||
'program_id': video_id,
|
||||
'complete': True
|
||||
'program_ids': video_id,
|
||||
'complete': True,
|
||||
})
|
||||
|
||||
p = data['program']
|
||||
p = data['programs'][0]
|
||||
cid = p['cid']
|
||||
|
||||
info_dict = {
|
||||
'id': video_id,
|
||||
'title': p.get('title') or p['episode_title'],
|
||||
'description': p.get('description'),
|
||||
'thumbnail': p.get('image_url'),
|
||||
'title': p.get('t') or p['et'],
|
||||
'description': p.get('d'),
|
||||
'thumbnail': p.get('i_url'),
|
||||
'creator': p.get('channel_name'),
|
||||
'episode': p.get('episode_title'),
|
||||
'episode_number': int_or_none(p.get('episode_number')),
|
||||
'season_number': int_or_none(p.get('season_number')),
|
||||
'episode': p.get('et'),
|
||||
'episode_number': int_or_none(p.get('e_no')),
|
||||
'season_number': int_or_none(p.get('s_no')),
|
||||
'release_year': int_or_none(p.get('year')),
|
||||
'categories': try_get(p, lambda x: x['categories'], list),
|
||||
'categories': try_get(p, lambda x: x['c'], list),
|
||||
'tags': try_get(p, lambda x: x['g'], list)
|
||||
}
|
||||
|
||||
return cid, info_dict
|
||||
@ -126,11 +128,11 @@ class ZattooBaseIE(InfoExtractor):
|
||||
|
||||
if is_live:
|
||||
postdata_common.update({'timeshift': 10800})
|
||||
url = '%s/zapi/watch/live/%s' % (self._HOST_URL, cid)
|
||||
url = '%s/zapi/watch/live/%s' % (self._host_url(), cid)
|
||||
elif record_id:
|
||||
url = '%s/zapi/watch/recording/%s' % (self._HOST_URL, record_id)
|
||||
url = '%s/zapi/watch/recording/%s' % (self._host_url(), record_id)
|
||||
else:
|
||||
url = '%s/zapi/watch/recall/%s/%s' % (self._HOST_URL, cid, video_id)
|
||||
url = '%s/zapi/watch/recall/%s/%s' % (self._host_url(), cid, video_id)
|
||||
|
||||
formats = []
|
||||
for stream_type in ('dash', 'hls', 'hls5', 'hds'):
|
||||
@ -201,13 +203,13 @@ class ZattooBaseIE(InfoExtractor):
|
||||
return info_dict
|
||||
|
||||
|
||||
class QuicklineBaseIE(ZattooBaseIE):
|
||||
class QuicklineBaseIE(ZattooPlatformBaseIE):
|
||||
_NETRC_MACHINE = 'quickline'
|
||||
_HOST_URL = 'https://mobiltv.quickline.com'
|
||||
_HOST = 'mobiltv.quickline.com'
|
||||
|
||||
|
||||
class QuicklineIE(QuicklineBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?%s/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)' % re.escape(QuicklineBaseIE._HOST)
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste',
|
||||
@ -220,7 +222,7 @@ class QuicklineIE(QuicklineBaseIE):
|
||||
|
||||
|
||||
class QuicklineLiveIE(QuicklineBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?mobiltv\.quickline\.com/watch/(?P<id>[^/]+)'
|
||||
_VALID_URL = r'https?://(?:www\.)?%s/watch/(?P<id>[^/]+)' % re.escape(QuicklineBaseIE._HOST)
|
||||
|
||||
_TEST = {
|
||||
'url': 'https://mobiltv.quickline.com/watch/srf1',
|
||||
@ -236,8 +238,18 @@ class QuicklineLiveIE(QuicklineBaseIE):
|
||||
return self._extract_video(channel_name, video_id, is_live=True)
|
||||
|
||||
|
||||
class ZattooBaseIE(ZattooPlatformBaseIE):
|
||||
_NETRC_MACHINE = 'zattoo'
|
||||
_HOST = 'zattoo.com'
|
||||
|
||||
|
||||
def _make_valid_url(tmpl, host):
|
||||
return tmpl % re.escape(host)
|
||||
|
||||
|
||||
class ZattooIE(ZattooBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
|
||||
_VALID_URL_TEMPLATE = r'https?://(?:www\.)?%s/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
|
||||
_VALID_URL = _make_valid_url(_VALID_URL_TEMPLATE, ZattooBaseIE._HOST)
|
||||
|
||||
# Since regular videos are only available for 7 days and recorded videos
|
||||
# are only available for a specific user, we cannot have detailed tests.
|
||||
@ -269,3 +281,142 @@ class ZattooLiveIE(ZattooBaseIE):
|
||||
def _real_extract(self, url):
|
||||
channel_name = video_id = self._match_id(url)
|
||||
return self._extract_video(channel_name, video_id, is_live=True)
|
||||
|
||||
|
||||
class NetPlusIE(ZattooIE):
|
||||
_NETRC_MACHINE = 'netplus'
|
||||
_HOST = 'netplus.tv'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.netplus.tv/watch/abc/123-abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class MNetTVIE(ZattooIE):
|
||||
_NETRC_MACHINE = 'mnettv'
|
||||
_HOST = 'tvplus.m-net.de'
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://tvplus.m-net.de/watch/abc/123-abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class WalyTVIE(ZattooIE):
|
||||
_NETRC_MACHINE = 'walytv'
|
||||
_HOST = 'player.waly.tv'
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://player.waly.tv/watch/abc/123-abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class BBVTVIE(ZattooIE):
|
||||
_NETRC_MACHINE = 'bbvtv'
|
||||
_HOST = 'bbv-tv.net'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bbv-tv.net/watch/abc/123-abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class VTXTVIE(ZattooIE):
|
||||
_NETRC_MACHINE = 'vtxtv'
|
||||
_HOST = 'vtxtv.ch'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.vtxtv.ch/watch/abc/123-abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class MyVisionTVIE(ZattooIE):
|
||||
_NETRC_MACHINE = 'myvisiontv'
|
||||
_HOST = 'myvisiontv.ch'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.myvisiontv.ch/watch/abc/123-abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class GlattvisionTVIE(ZattooIE):
|
||||
_NETRC_MACHINE = 'glattvisiontv'
|
||||
_HOST = 'iptv.glattvision.ch'
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://iptv.glattvision.ch/watch/abc/123-abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class SAKTVIE(ZattooIE):
|
||||
_NETRC_MACHINE = 'saktv'
|
||||
_HOST = 'saktv.ch'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.saktv.ch/watch/abc/123-abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class EWETVIE(ZattooIE):
|
||||
_NETRC_MACHINE = 'ewetv'
|
||||
_HOST = 'tvonline.ewe.de'
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://tvonline.ewe.de/watch/abc/123-abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class QuantumTVIE(ZattooIE):
|
||||
_NETRC_MACHINE = 'quantumtv'
|
||||
_HOST = 'quantum-tv.com'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.quantum-tv.com/watch/abc/123-abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class OsnatelTVIE(ZattooIE):
|
||||
_NETRC_MACHINE = 'osnateltv'
|
||||
_HOST = 'tvonline.osnatel.de'
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://tvonline.osnatel.de/watch/abc/123-abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
|
||||
class EinsUndEinsTVIE(ZattooIE):
|
||||
_NETRC_MACHINE = '1und1tv'
|
||||
_HOST = '1und1.tv'
|
||||
_API_HOST = 'www.%s' % _HOST
|
||||
_VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.1und1.tv/watch/abc/123-abc',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
57
youtube_dl/extractor/zype.py
Normal file
57
youtube_dl/extractor/zype.py
Normal file
@ -0,0 +1,57 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ZypeIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://player\.zype\.com/embed/(?P<id>[\da-fA-F]+)\.js\?.*?api_key=[^&]+'
|
||||
_TEST = {
|
||||
'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false',
|
||||
'md5': 'eaee31d474c76a955bdaba02a505c595',
|
||||
'info_dict': {
|
||||
'id': '5b400b834b32992a310622b9',
|
||||
'ext': 'mp4',
|
||||
'title': 'Smoky Barbecue Favorites',
|
||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
||||
},
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return [
|
||||
mobj.group('url')
|
||||
for mobj in re.finditer(
|
||||
r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//player\.zype\.com/embed/[\da-fA-F]+\.js\?.*?api_key=.+?)\1',
|
||||
webpage)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._search_regex(
|
||||
r'video_title\s*[:=]\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
||||
'title', group='value')
|
||||
|
||||
m3u8_url = self._search_regex(
|
||||
r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1', webpage,
|
||||
'm3u8 url', group='url')
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id='hls')
|
||||
self._sort_formats(formats)
|
||||
|
||||
thumbnail = self._search_regex(
|
||||
r'poster\s*[:=]\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'thumbnail',
|
||||
default=False, group='url')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'thumbnail': thumbnail,
|
||||
'formats': formats,
|
||||
}
|
@ -49,7 +49,6 @@ from .compat import (
|
||||
compat_os_name,
|
||||
compat_parse_qs,
|
||||
compat_shlex_quote,
|
||||
compat_socket_create_connection,
|
||||
compat_str,
|
||||
compat_struct_pack,
|
||||
compat_struct_unpack,
|
||||
@ -882,13 +881,51 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
|
||||
kwargs['strict'] = True
|
||||
hc = http_class(*args, **compat_kwargs(kwargs))
|
||||
source_address = ydl_handler._params.get('source_address')
|
||||
|
||||
if source_address is not None:
|
||||
# This is to workaround _create_connection() from socket where it will try all
|
||||
# address data from getaddrinfo() including IPv6. This filters the result from
|
||||
# getaddrinfo() based on the source_address value.
|
||||
# This is based on the cpython socket.create_connection() function.
|
||||
# https://github.com/python/cpython/blob/master/Lib/socket.py#L691
|
||||
def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
|
||||
host, port = address
|
||||
err = None
|
||||
addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
|
||||
af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
|
||||
ip_addrs = [addr for addr in addrs if addr[0] == af]
|
||||
if addrs and not ip_addrs:
|
||||
ip_version = 'v4' if af == socket.AF_INET else 'v6'
|
||||
raise socket.error(
|
||||
"No remote IP%s addresses available for connect, can't use '%s' as source address"
|
||||
% (ip_version, source_address[0]))
|
||||
for res in ip_addrs:
|
||||
af, socktype, proto, canonname, sa = res
|
||||
sock = None
|
||||
try:
|
||||
sock = socket.socket(af, socktype, proto)
|
||||
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
|
||||
sock.settimeout(timeout)
|
||||
sock.bind(source_address)
|
||||
sock.connect(sa)
|
||||
err = None # Explicitly break reference cycle
|
||||
return sock
|
||||
except socket.error as _:
|
||||
err = _
|
||||
if sock is not None:
|
||||
sock.close()
|
||||
if err is not None:
|
||||
raise err
|
||||
else:
|
||||
raise socket.error('getaddrinfo returns an empty list')
|
||||
if hasattr(hc, '_create_connection'):
|
||||
hc._create_connection = _create_connection
|
||||
sa = (source_address, 0)
|
||||
if hasattr(hc, 'source_address'): # Python 2.7+
|
||||
hc.source_address = sa
|
||||
else: # Python 2.6
|
||||
def _hc_connect(self, *args, **kwargs):
|
||||
sock = compat_socket_create_connection(
|
||||
sock = _create_connection(
|
||||
(self.host, self.port), self.timeout, sa)
|
||||
if is_https:
|
||||
self.sock = ssl.wrap_socket(
|
||||
@ -2442,7 +2479,7 @@ def parse_codecs(codecs_str):
|
||||
vcodec, acodec = None, None
|
||||
for full_codec in splited_codecs:
|
||||
codec = full_codec.split('.')[0]
|
||||
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1'):
|
||||
if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01'):
|
||||
if not vcodec:
|
||||
vcodec = full_codec
|
||||
elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
|
||||
|
@ -1,3 +1,3 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2018.08.04'
|
||||
__version__ = '2018.11.18'
|
||||
|
Loading…
x
Reference in New Issue
Block a user