diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md deleted file mode 100644 index 5469c73cf..000000000 --- a/.github/ISSUE_TEMPLATE.md +++ /dev/null @@ -1,61 +0,0 @@ -## Please follow the guide below - -- You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly -- Put an `x` into all the boxes [ ] relevant to your *issue* (like this: `[x]`) -- Use the *Preview* tab to see what your issue will actually look like - ---- - -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2019.04.07*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **2019.04.07** - -### Before submitting an *issue* make sure you have: -- [ ] At least skimmed through the [README](https://github.com/ytdl-org/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/ytdl-org/youtube-dl#faq) and [BUGS](https://github.com/ytdl-org/youtube-dl#bugs) sections -- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones -- [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser - -### What is the purpose of your *issue*? -- [ ] Bug report (encountered problems with youtube-dl) -- [ ] Site support request (request for adding support for a new site) -- [ ] Feature request (request for a new functionality) -- [ ] Question -- [ ] Other - ---- - -### The following sections concretize particular purposed issues, you can erase any section (the contents between triple ---) not applicable to your *issue* - ---- - -### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows: - -Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl -v `), copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): - -``` -[debug] System config: [] -[debug] User config: [] -[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] -[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version 2019.04.07 -[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 -[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 -[debug] Proxy map: {} -... - -``` - ---- - -### If the purpose of this *issue* is a *site support request* please provide all kinds of example URLs support for which should be included (replace following example URLs by **yours**): -- Single video: https://www.youtube.com/watch?v=BaW_jenozKc -- Single video: https://youtu.be/BaW_jenozKc -- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc - -Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - ---- - -### Description of your *issue*, suggested solution and other information - -Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. -If work on your *issue* requires account credentials please provide them or explain how one can obtain them. diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md new file mode 100644 index 000000000..fb0d33b8f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/1_broken_site.md @@ -0,0 +1,63 @@ +--- +name: Broken site support +about: Report broken or misfunctioning site +title: '' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a broken site support +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've checked that all provided URLs are alive and playable in a browser +- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped +- [ ] I've searched the bugtracker for similar issues including closed ones + + +## Verbose log + + + +``` +PASTE VERBOSE LOG HERE +``` + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md new file mode 100644 index 000000000..3c95565a6 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md @@ -0,0 +1,54 @@ +--- +name: Site support request +about: Request support for a new site +title: '' +labels: 'site-support-request' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a new site support request +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've checked that all provided URLs are alive and playable in a browser +- [ ] I've checked that none of provided URLs violate any copyrights +- [ ] I've searched the bugtracker for similar site support requests including closed ones + + +## Example URLs + + + +- Single video: https://www.youtube.com/watch?v=BaW_jenozKc +- Single video: https://youtu.be/BaW_jenozKc +- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md new file mode 100644 index 000000000..7410776d7 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md @@ -0,0 +1,37 @@ +--- +name: Site feature request +about: Request a new functionality for a site +title: '' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a site feature request +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've searched the bugtracker for similar site feature requests including closed ones + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md new file mode 100644 index 000000000..cc52bcca6 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/4_bug_report.md @@ -0,0 +1,65 @@ +--- +name: Bug report +about: Report a bug unrelated to any particular site or extractor +title: '' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a broken site support issue +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've checked that all provided URLs are alive and playable in a browser +- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped +- [ ] I've searched the bugtracker for similar bug reports including closed ones +- [ ] I've read bugs section in FAQ + + +## Verbose log + + + +``` +PASTE VERBOSE LOG HERE +``` + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md new file mode 100644 index 000000000..bbd421b1a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/5_feature_request.md @@ -0,0 +1,38 @@ +--- +name: Feature request +about: Request a new functionality unrelated to any particular site or extractor +title: '' +labels: 'request' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a feature request +- [ ] I've verified that I'm running youtube-dl version **2019.07.02** +- [ ] I've searched the bugtracker for similar feature requests including closed ones + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE/6_question.md b/.github/ISSUE_TEMPLATE/6_question.md new file mode 100644 index 000000000..1fd7cd5dc --- /dev/null +++ b/.github/ISSUE_TEMPLATE/6_question.md @@ -0,0 +1,38 @@ +--- +name: Ask question +about: Ask youtube-dl related question +title: '' +labels: 'question' +--- + + + + +## Checklist + + + +- [ ] I'm asking a question +- [ ] I've looked through the README and FAQ for similar questions +- [ ] I've searched the bugtracker for similar questions including closed ones + + +## Question + + + +WRITE QUESTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl.md b/.github/ISSUE_TEMPLATE_tmpl.md deleted file mode 100644 index 8b7e73417..000000000 --- a/.github/ISSUE_TEMPLATE_tmpl.md +++ /dev/null @@ -1,61 +0,0 @@ -## Please follow the guide below - -- You will be asked some questions and requested to provide some information, please read them **carefully** and answer honestly -- Put an `x` into all the boxes [ ] relevant to your *issue* (like this: `[x]`) -- Use the *Preview* tab to see what your issue will actually look like - ---- - -### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *%(version)s*. If it's not, read [this FAQ entry](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. -- [ ] I've **verified** and **I assure** that I'm running youtube-dl **%(version)s** - -### Before submitting an *issue* make sure you have: -- [ ] At least skimmed through the [README](https://github.com/ytdl-org/youtube-dl/blob/master/README.md), **most notably** the [FAQ](https://github.com/ytdl-org/youtube-dl#faq) and [BUGS](https://github.com/ytdl-org/youtube-dl#bugs) sections -- [ ] [Searched](https://github.com/ytdl-org/youtube-dl/search?type=Issues) the bugtracker for similar issues including closed ones -- [ ] Checked that provided video/audio/playlist URLs (if any) are alive and playable in a browser - -### What is the purpose of your *issue*? -- [ ] Bug report (encountered problems with youtube-dl) -- [ ] Site support request (request for adding support for a new site) -- [ ] Feature request (request for a new functionality) -- [ ] Question -- [ ] Other - ---- - -### The following sections concretize particular purposed issues, you can erase any section (the contents between triple ---) not applicable to your *issue* - ---- - -### If the purpose of this *issue* is a *bug report*, *site support request* or you are not completely sure provide the full verbose output as follows: - -Add the `-v` flag to **your command line** you run youtube-dl with (`youtube-dl -v `), copy the **whole** output and insert it here. It should look similar to one below (replace it with **your** log inserted between triple ```): - -``` -[debug] System config: [] -[debug] User config: [] -[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] -[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 -[debug] youtube-dl version %(version)s -[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 -[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 -[debug] Proxy map: {} -... - -``` - ---- - -### If the purpose of this *issue* is a *site support request* please provide all kinds of example URLs support for which should be included (replace following example URLs by **yours**): -- Single video: https://www.youtube.com/watch?v=BaW_jenozKc -- Single video: https://youtu.be/BaW_jenozKc -- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc - -Note that **youtube-dl does not support sites dedicated to [copyright infringement](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. In order for site support request to be accepted all provided example URLs should not violate any copyrights. - ---- - -### Description of your *issue*, suggested solution and other information - -Explanation of your *issue* in arbitrary form goes here. Please make sure the [description is worded well enough to be understood](https://github.com/ytdl-org/youtube-dl#is-the-description-of-the-issue-itself-sufficient). Provide as much context and examples as possible. -If work on your *issue* requires account credentials please provide them or explain how one can obtain them. diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md new file mode 100644 index 000000000..c7600d5b5 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.md @@ -0,0 +1,63 @@ +--- +name: Broken site support +about: Report broken or misfunctioning site +title: '' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a broken site support +- [ ] I've verified that I'm running youtube-dl version **%(version)s** +- [ ] I've checked that all provided URLs are alive and playable in a browser +- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped +- [ ] I've searched the bugtracker for similar issues including closed ones + + +## Verbose log + + + +``` +PASTE VERBOSE LOG HERE +``` + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md new file mode 100644 index 000000000..d4988e639 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md @@ -0,0 +1,54 @@ +--- +name: Site support request +about: Request support for a new site +title: '' +labels: 'site-support-request' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a new site support request +- [ ] I've verified that I'm running youtube-dl version **%(version)s** +- [ ] I've checked that all provided URLs are alive and playable in a browser +- [ ] I've checked that none of provided URLs violate any copyrights +- [ ] I've searched the bugtracker for similar site support requests including closed ones + + +## Example URLs + + + +- Single video: https://www.youtube.com/watch?v=BaW_jenozKc +- Single video: https://youtu.be/BaW_jenozKc +- Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md new file mode 100644 index 000000000..65f0a32f3 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md @@ -0,0 +1,37 @@ +--- +name: Site feature request +about: Request a new functionality for a site +title: '' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a site feature request +- [ ] I've verified that I'm running youtube-dl version **%(version)s** +- [ ] I've searched the bugtracker for similar site feature requests including closed ones + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md new file mode 100644 index 000000000..41fb14b72 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.md @@ -0,0 +1,65 @@ +--- +name: Bug report +about: Report a bug unrelated to any particular site or extractor +title: '' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a broken site support issue +- [ ] I've verified that I'm running youtube-dl version **%(version)s** +- [ ] I've checked that all provided URLs are alive and playable in a browser +- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped +- [ ] I've searched the bugtracker for similar bug reports including closed ones +- [ ] I've read bugs section in FAQ + + +## Verbose log + + + +``` +PASTE VERBOSE LOG HERE +``` + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md new file mode 100644 index 000000000..b3431a7f0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.md @@ -0,0 +1,38 @@ +--- +name: Feature request +about: Request a new functionality unrelated to any particular site or extractor +title: '' +labels: 'request' +--- + + + + +## Checklist + + + +- [ ] I'm reporting a feature request +- [ ] I've verified that I'm running youtube-dl version **%(version)s** +- [ ] I've searched the bugtracker for similar feature requests including closed ones + + +## Description + + + +WRITE DESCRIPTION HERE diff --git a/.travis.yml b/.travis.yml index 82e81d078..6d16c2955 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,7 @@ python: - "3.6" - "pypy" - "pypy3" +dist: trusty env: - YTDL_TEST_SET=core - YTDL_TEST_SET=download diff --git a/ChangeLog b/ChangeLog index 421f247fd..5ce78b07a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,204 @@ +version 2019.07.02 + +Core ++ [utils] Introduce random_user_agent and use as default User-Agent (#21546) + +Extractors ++ [vevo] Add support for embed.vevo.com URLs (#21565) ++ [openload] Add support for oload.biz (#21574) +* [xiami] Update API base URL (#21575) +* [yourporn] Fix extraction (#21585) ++ [acast] Add support for URLs with episode id (#21444) ++ [dailymotion] Add support for DM.player embeds +* [soundcloud] Update client id + + +version 2019.06.27 + +Extractors ++ [go] Add support for disneynow.com (#21528) +* [mixer:vod] Relax URL regular expression (#21531, #21536) +* [drtv] Relax URL regular expression +* [fusion] Fix extraction (#17775, #21269) +- [nfb] Remove extractor (#21518) ++ [beeg] Add support for api/v6 v2 URLs (#21511) ++ [brightcove:new] Add support for playlists (#21331) ++ [openload] Add support for oload.life (#21495) +* [vimeo:channel,group] Make title extraction non fatal +* [vimeo:likes] Implement extrator in terms of channel extractor (#21493) ++ [pornhub] Add support for more paged video sources ++ [pornhub] Add support for downloading single pages and search pages (#15570) +* [pornhub] Rework extractors (#11922, #16078, #17454, #17936) ++ [youtube] Add another signature function pattern +* [tf1] Fix extraction (#21365, #21372) +* [crunchyroll] Move Accept-Language workaround to video extractor since + it causes playlists not to list any videos +* [crunchyroll:playlist] Fix and relax title extraction (#21291, #21443) + + +version 2019.06.21 + +Core +* [utils] Restrict parse_codecs and add theora as known vcodec (#21381) + +Extractors +* [youtube] Update signature function patterns (#21469, #21476) +* [youtube] Make --write-annotations non fatal (#21452) ++ [sixplay] Add support for rtlmost.hu (#21405) +* [youtube] Hardcode codec metadata for av01 video only formats (#21381) +* [toutv] Update client key (#21370) ++ [biqle] Add support for new embed domain +* [cbs] Improve DRM protected videos detection (#21339) + + +version 2019.06.08 + +Core +* [downloader/common] Improve rate limit (#21301) +* [utils] Improve strip_or_none +* [extractor/common] Strip src attribute for HTML5 entries code (#18485, + #21169) + +Extractors +* [ted] Fix playlist extraction (#20844, #21032) +* [vlive:playlist] Fix video extraction when no playlist is found (#20590) ++ [vlive] Add CH+ support (#16887, #21209) ++ [openload] Add support for oload.website (#21329) ++ [tvnow] Extract HD formats (#21201) ++ [redbulltv] Add support for rrn:content URLs (#21297) +* [youtube] Fix average rating extraction (#21304) ++ [bitchute] Extract HTML5 formats (#21306) +* [cbsnews] Fix extraction (#9659, #15397) +* [vvvvid] Relax URL regular expression (#21299) ++ [prosiebensat1] Add support for new API (#21272) ++ [vrv] Extract adaptive_hls formats (#21243) +* [viki] Switch to HTTPS (#21001) +* [LiveLeak] Check if the original videos exist (#21206, #21208) +* [rtp] Fix extraction (#15099) +* [youtube] Improve DRM protected videos detection (#1774) ++ [srgssrplay] Add support for popupvideoplayer URLs (#21155) ++ [24video] Add support for porno.24video.net (#21194) ++ [24video] Add support for 24video.site (#21193) +- [pornflip] Remove extractor +- [criterion] Remove extractor (#21195) +* [pornhub] Use HTTPS (#21061) +* [bitchute] Fix uploader extraction (#21076) +* [streamcloud] Reduce waiting time to 6 seconds (#21092) +- [novamov] Remove extractors (#21077) ++ [openload] Add support for oload.press (#21135) +* [vivo] Fix extraction (#18906, #19217) + + +version 2019.05.20 + +Core ++ [extractor/common] Move workaround for applying first Set-Cookie header + into a separate _apply_first_set_cookie_header method + +Extractors +* [safari] Fix authentication (#21090) +* [vk] Use _apply_first_set_cookie_header +* [vrt] Fix extraction (#20527) ++ [canvas] Add support for vrtnieuws and sporza site ids and extract + AES HLS formats ++ [vrv] Extract captions (#19238) +* [tele5] Improve video id extraction +* [tele5] Relax URL regular expression (#21020, #21063) +* [svtplay] Update API URL (#21075) ++ [yahoo:gyao] Add X-User-Agent header to dam proxy requests (#21071) + + +version 2019.05.11 + +Core +* [utils] Transliterate "þ" as "th" (#20897) + +Extractors ++ [cloudflarestream] Add support for videodelivery.net (#21049) ++ [byutv] Add support for DVR videos (#20574, #20676) ++ [gfycat] Add support for URLs with tags (#20696, #20731) ++ [openload] Add support for verystream.com (#20701, #20967) +* [youtube] Use sp field value for signature field name (#18841, #18927, + #21028) ++ [yahoo:gyao] Extend URL regular expression (#21008) +* [youtube] Fix channel id extraction (#20982, #21003) ++ [sky] Add support for news.sky.com (#13055) ++ [youtube:entrylistbase] Retry on 5xx HTTP errors (#20965) ++ [francetvinfo] Extend video id extraction (#20619, #20740) +* [4tube] Update token hosts (#20918) +* [hotstar] Move to API v2 (#20931) +* [fox] Fix API error handling under python 2 (#20925) ++ [redbulltv] Extend URL regular expression (#20922) + + +version 2019.04.30 + +Extractors +* [openload] Use real Chrome versions (#20902) +- [youtube] Remove info el for get_video_info request +* [youtube] Improve extraction robustness +- [dramafever] Remove extractor (#20868) +* [adn] Fix subtitle extraction (#12724) ++ [ccc] Extract creator (#20355) ++ [ccc:playlist] Add support for media.ccc.de playlists (#14601, #20355) ++ [sverigesradio] Add support for sverigesradio.se (#18635) ++ [cinemax] Add support for cinemax.com +* [sixplay] Try extracting non-DRM protected manifests (#20849) ++ [youtube] Extract Youtube Music Auto-generated metadata (#20599, #20742) +- [wrzuta] Remove extractor (#20684, #20801) +* [twitch] Prefer source format (#20850) ++ [twitcasting] Add support for private videos (#20843) +* [reddit] Validate thumbnail URL (#20030) +* [yandexmusic] Fix track URL extraction (#20820) + + +version 2019.04.24 + +Extractors +* [youtube] Fix extraction (#20758, #20759, #20761, #20762, #20764, #20766, + #20767, #20769, #20771, #20768, #20770) +* [toutv] Fix extraction and extract series info (#20757) ++ [vrv] Add support for movie listings (#19229) ++ [youtube] Print error when no data is available (#20737) ++ [soundcloud] Add support for new rendition and improve extraction (#20699) ++ [ooyala] Add support for geo verification proxy ++ [nrl] Add support for nrl.com (#15991) ++ [vimeo] Extract live archive source format (#19144) ++ [vimeo] Add support for live streams and improve info extraction (#19144) ++ [ntvcojp] Add support for cu.ntv.co.jp ++ [nhk] Extract RTMPT format ++ [nhk] Add support for audio URLs ++ [udemy] Add another course id extraction pattern (#20491) ++ [openload] Add support for oload.services (#20691) ++ [openload] Add support for openloed.co (#20691, #20693) +* [bravotv] Fix extraction (#19213) + + +version 2019.04.17 + +Extractors +* [openload] Randomize User-Agent (closes #20688) ++ [openload] Add support for oladblock domains (#20471) +* [adn] Fix subtitle extraction (#12724) ++ [aol] Add support for localized websites ++ [yahoo] Add support GYAO episode URLs ++ [yahoo] Add support for streaming.yahoo.co.jp (#5811, #7098) ++ [yahoo] Add support for gyao.yahoo.co.jp +* [aenetworks] Fix history topic extraction and extract more formats ++ [cbs] Extract smpte and vtt subtitles ++ [streamango] Add support for streamcherry.com (#20592) ++ [yourporn] Add support for sxyprn.com (#20646) +* [mgtv] Fix extraction (#20650) +* [linkedin:learning] Use urljoin for form action URL (#20431) ++ [gdc] Add support for kaltura embeds (#20575) +* [dispeak] Improve mp4 bitrate extraction +* [kaltura] Sanitize embed URLs +* [jwplatfom] Do not match manifest URLs (#20596) +* [aol] Restrict URL regular expression and improve format extraction ++ [tiktok] Add support for new URL schema (#20573) ++ [stv:player] Add support for player.stv.tv (#20586) + + version 2019.04.07 Core diff --git a/Makefile b/Makefile index 4a62f44bc..3e17365b8 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites clean: - rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp ISSUE_TEMPLATE.md.tmp youtube-dl youtube-dl.exe + rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe find . -name "*.pyc" -delete find . -name "*.class" -delete @@ -78,8 +78,12 @@ README.md: youtube_dl/*.py youtube_dl/*/*.py CONTRIBUTING.md: README.md $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md -.github/ISSUE_TEMPLATE.md: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl.md youtube_dl/version.py - $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl.md .github/ISSUE_TEMPLATE.md +issuetemplates: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md youtube_dl/version.py + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md .github/ISSUE_TEMPLATE/1_broken_site.md + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md .github/ISSUE_TEMPLATE/2_site_support_request.md + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md .github/ISSUE_TEMPLATE/4_bug_report.md + $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md .github/ISSUE_TEMPLATE/5_feature_request.md supportedsites: $(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md diff --git a/README.md b/README.md index 92c3a92a1..8c48a3012 100644 --- a/README.md +++ b/README.md @@ -700,7 +700,7 @@ Note that on Windows you may need to use double quotes instead of single. # Download best mp4 format available or any other best if no mp4 available $ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best' -# Download best format available but not better that 480p +# Download best format available but no better than 480p $ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]' # Download best video only format but no bigger than 50 MB diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py index 72b2ee422..740f04de0 100644 --- a/devscripts/check-porn.py +++ b/devscripts/check-porn.py @@ -45,12 +45,12 @@ for test in gettestcases(): RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST) - if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or - test['info_dict']['age_limit'] != 18): + if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] + or test['info_dict']['age_limit'] != 18): print('\nPotential missing age_limit check: {0}'.format(test['name'])) - elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and - test['info_dict']['age_limit'] == 18): + elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] + and test['info_dict']['age_limit'] == 18): print('\nPotential false negative: {0}'.format(test['name'])) else: diff --git a/devscripts/release.sh b/devscripts/release.sh index 4c413bf6d..f2411c927 100755 --- a/devscripts/release.sh +++ b/devscripts/release.sh @@ -78,8 +78,8 @@ sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py sed -i "s//$version/" ChangeLog /bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..." -make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites -git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py ChangeLog +make README.md CONTRIBUTING.md issuetemplates supportedsites +git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md .github/ISSUE_TEMPLATE/6_question.md docs/supportedsites.md youtube_dl/version.py ChangeLog git commit $gpg_sign_commits -m "release $version" /bin/echo -e "\n### Now tagging, signing and pushing..." diff --git a/docs/supportedsites.md b/docs/supportedsites.md index df272c479..55ae43144 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -46,6 +46,7 @@ - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **AnimeOnDemand** - **Anvato** + - **aol.com** - **APA** - **Aparat** - **AppleConnect** @@ -77,7 +78,6 @@ - **AudioBoom** - **audiomack** - **audiomack:album** - - **auroravid**: AuroraVid - **AWAAN** - **awaan:live** - **awaan:season** @@ -149,6 +149,7 @@ - **CBSInteractive** - **CBSLocal** - **cbsnews**: CBS News + - **cbsnews:embed** - **cbsnews:livevideo**: CBS News Live Videos - **CBSSports** - **CCMA** @@ -163,6 +164,7 @@ - **chirbit** - **chirbit:profile** - **Cinchcast** + - **Cinemax** - **CiscoLiveSearch** - **CiscoLiveSession** - **CJSW** @@ -172,7 +174,6 @@ - **Clipsyndicate** - **CloserToTruth** - **CloudflareStream** - - **cloudtime**: CloudTime - **Cloudy** - **Clubic** - **Clyp** @@ -192,7 +193,6 @@ - **Coub** - **Cracked** - **Crackle** - - **Criterion** - **CrooksAndLiars** - **crunchyroll** - **crunchyroll:playlist** @@ -200,6 +200,7 @@ - **CSpan**: C-SPAN - **CtsNews**: 華視新聞 - **CTVNews** + - **cu.ntv.co.jp**: Nippon Television Network - **Culturebox** - **CultureUnplugged** - **curiositystream** @@ -235,8 +236,6 @@ - **DouyuTV**: 斗鱼 - **DPlay** - **DPlayIt** - - **dramafever** - - **dramafever:series** - **DRBonanza** - **Dropbox** - **DrTuber** @@ -486,6 +485,7 @@ - **MatchTV** - **MDR**: MDR.DE and KiKA - **media.ccc.de** + - **media.ccc.de:lists** - **Medialaan** - **Mediaset** - **Mediasite** @@ -581,7 +581,6 @@ - **NextTV**: 壹電視 - **Nexx** - **NexxEmbed** - - **nfb**: National Film Board of Canada - **nfl.com** - **NhkVod** - **nhl.com** @@ -607,7 +606,6 @@ - **nowness** - **nowness:playlist** - **nowness:series** - - **nowvideo**: NowVideo - **Noz** - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **npo.nl:live** @@ -623,6 +621,7 @@ - **NRKTVEpisodes** - **NRKTVSeason** - **NRKTVSeries** + - **NRLTV** - **ntv.ru** - **Nuvid** - **NYTimes** @@ -632,7 +631,6 @@ - **OdaTV** - **Odnoklassniki** - **OktoberfestTV** - - **on.aol.com** - **OnDemandKorea** - **onet.pl** - **onet.tv** @@ -691,11 +689,11 @@ - **PopcornTV** - **PornCom** - **PornerBros** - - **PornFlip** - **PornHd** - **PornHub**: PornHub and Thumbzilla - - **PornHubPlaylist** - - **PornHubUserVideos** + - **PornHubPagedVideoList** + - **PornHubUser** + - **PornHubUserVideosUpload** - **Pornotube** - **PornoVoisines** - **PornoXO** @@ -732,6 +730,7 @@ - **RBMARadio** - **RDS**: RDS.ca - **RedBullTV** + - **RedBullTVRrnContent** - **Reddit** - **RedditR** - **RedTube** @@ -803,6 +802,7 @@ - **ShowRoomLive** - **Sina** - **SkylineWebcams** + - **SkyNews** - **skynewsarabia:article** - **skynewsarabia:video** - **SkySports** @@ -853,7 +853,10 @@ - **StreamCZ** - **StreetVoice** - **StretchInternet** + - **stv:player** - **SunPorno** + - **sverigesradio:episode** + - **sverigesradio:publication** - **SVT** - **SVTPage** - **SVTPlay**: SVT Play and Öppet arkiv @@ -994,6 +997,7 @@ - **Vbox7** - **VeeHD** - **Veoh** + - **verystream** - **Vessel** - **Vesti**: Вести.Ru - **Vevo** @@ -1017,7 +1021,6 @@ - **videomore:video** - **VideoPremium** - **VideoPress** - - **videoweed**: VideoWeed - **Vidio** - **VidLii** - **vidme** @@ -1064,7 +1067,7 @@ - **VoxMediaVolume** - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **Vrak** - - **VRT**: deredactie.be, sporza.be, cobra.be and cobra.canvas.be + - **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza - **VrtNU**: VrtNU.be - **vrv** - **vrv:series** @@ -1094,13 +1097,10 @@ - **Weibo** - **WeiboMobile** - **WeiqiTV**: WQTV - - **wholecloud**: WholeCloud - **Wimp** - **Wistia** - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **WorldStarHipHop** - - **wrzuta.pl** - - **wrzuta.pl:playlist** - **WSJ**: Wall Street Journal - **WSJArticle** - **WWE** @@ -1124,6 +1124,8 @@ - **XVideos** - **XXXYMovies** - **Yahoo**: Yahoo screen and movies + - **yahoo:gyao** + - **yahoo:gyao:player** - **YandexDisk** - **yandexmusic:album**: Яндекс.Музыка - Альбом - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист diff --git a/setup.cfg b/setup.cfg index af9a554c6..da78a9c47 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,4 +3,4 @@ universal = True [flake8] exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv -ignore = E402,E501,E731,E741 +ignore = E402,E501,E731,E741,W503 diff --git a/test/test_aes.py b/test/test_aes.py index 78a28751b..cc89fb6ab 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -44,16 +44,16 @@ class TestAES(unittest.TestCase): def test_decrypt_text(self): password = intlist_to_bytes(self.key).decode('utf-8') encrypted = base64.b64encode( - intlist_to_bytes(self.iv[:8]) + - b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae' + intlist_to_bytes(self.iv[:8]) + + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae' ).decode('utf-8') decrypted = (aes_decrypt_text(encrypted, password, 16)) self.assertEqual(decrypted, self.secret_msg) password = intlist_to_bytes(self.key).decode('utf-8') encrypted = base64.b64encode( - intlist_to_bytes(self.iv[:8]) + - b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83' + intlist_to_bytes(self.iv[:8]) + + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83' ).decode('utf-8') decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) diff --git a/test/test_swfinterp.py b/test/test_swfinterp.py index f1e899819..9f18055e6 100644 --- a/test/test_swfinterp.py +++ b/test/test_swfinterp.py @@ -34,8 +34,8 @@ def _make_testfunc(testfile): def test_func(self): as_file = os.path.join(TEST_DIR, testfile) swf_file = os.path.join(TEST_DIR, test_id + '.swf') - if ((not os.path.exists(swf_file)) or - os.path.getmtime(swf_file) < os.path.getmtime(as_file)): + if ((not os.path.exists(swf_file)) + or os.path.getmtime(swf_file) < os.path.getmtime(as_file)): # Recompile try: subprocess.check_call([ diff --git a/test/test_utils.py b/test/test_utils.py index ca6d832a4..659c6ece5 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -73,6 +73,7 @@ from youtube_dl.utils import ( smuggle_url, str_to_int, strip_jsonp, + strip_or_none, timeconvert, unescapeHTML, unified_strdate, @@ -183,7 +184,7 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_filename( 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True), - 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy') + 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYTHssaaaaaaaeceeeeiiiionooooooooeuuuuuythy') def test_sanitize_ids(self): self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') @@ -752,6 +753,18 @@ class TestUtil(unittest.TestCase): d = json.loads(stripped) self.assertEqual(d, {'status': 'success'}) + def test_strip_or_none(self): + self.assertEqual(strip_or_none(' abc'), 'abc') + self.assertEqual(strip_or_none('abc '), 'abc') + self.assertEqual(strip_or_none(' abc '), 'abc') + self.assertEqual(strip_or_none('\tabc\t'), 'abc') + self.assertEqual(strip_or_none('\n\tabc\n\t'), 'abc') + self.assertEqual(strip_or_none('abc'), 'abc') + self.assertEqual(strip_or_none(''), '') + self.assertEqual(strip_or_none(None), None) + self.assertEqual(strip_or_none(42), None) + self.assertEqual(strip_or_none([]), None) + def test_uppercase_escape(self): self.assertEqual(uppercase_escape('aä'), 'aä') self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') @@ -809,6 +822,15 @@ class TestUtil(unittest.TestCase): 'vcodec': 'av01.0.05M.08', 'acodec': 'none', }) + self.assertEqual(parse_codecs('theora, vorbis'), { + 'vcodec': 'theora', + 'acodec': 'vorbis', + }) + self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), { + 'vcodec': 'unknownvcodec', + 'acodec': 'unknownacodec', + }) + self.assertEqual(parse_codecs('unknown'), {}) def test_escape_rfc3986(self): reserved = "!*'();:@&=+$,/?#[]" diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 57f52f888..3e832fec2 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -400,9 +400,9 @@ class YoutubeDL(object): else: raise - if (sys.platform != 'win32' and - sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and - not params.get('restrictfilenames', False)): + if (sys.platform != 'win32' + and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] + and not params.get('restrictfilenames', False)): # Unicode filesystem API will throw errors (#1474, #13027) self.report_warning( 'Assuming --restrict-filenames since file system encoding ' @@ -440,9 +440,9 @@ class YoutubeDL(object): if re.match(r'^-[0-9A-Za-z_-]{10}$', a)] if idxs: correct_argv = ( - ['youtube-dl'] + - [a for i, a in enumerate(argv) if i not in idxs] + - ['--'] + [argv[i] for i in idxs] + ['youtube-dl'] + + [a for i, a in enumerate(argv) if i not in idxs] + + ['--'] + [argv[i] for i in idxs] ) self.report_warning( 'Long argument string detected. ' @@ -850,8 +850,8 @@ class YoutubeDL(object): if result_type in ('url', 'url_transparent'): ie_result['url'] = sanitize_url(ie_result['url']) extract_flat = self.params.get('extract_flat', False) - if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or - extract_flat is True): + if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) + or extract_flat is True): if self.params.get('forcejson', False): self.to_stdout(json.dumps(ie_result)) return ie_result @@ -1619,9 +1619,9 @@ class YoutubeDL(object): # https://github.com/ytdl-org/youtube-dl/issues/10083). incomplete_formats = ( # All formats are video-only or - all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or + all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) # all formats are audio-only - all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)) + or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)) ctx = { 'formats': formats, @@ -1947,8 +1947,8 @@ class YoutubeDL(object): else: assert fixup_policy in ('ignore', 'never') - if (info_dict.get('requested_formats') is None and - info_dict.get('container') == 'm4a_dash'): + if (info_dict.get('requested_formats') is None + and info_dict.get('container') == 'm4a_dash'): if fixup_policy == 'warn': self.report_warning( '%s: writing DASH m4a. ' @@ -1967,9 +1967,9 @@ class YoutubeDL(object): else: assert fixup_policy in ('ignore', 'never') - if (info_dict.get('protocol') == 'm3u8_native' or - info_dict.get('protocol') == 'm3u8' and - self.params.get('hls_prefer_native')): + if (info_dict.get('protocol') == 'm3u8_native' + or info_dict.get('protocol') == 'm3u8' + and self.params.get('hls_prefer_native')): if fixup_policy == 'warn': self.report_warning('%s: malformed AAC bitstream detected.' % ( info_dict['id'])) @@ -1995,10 +1995,10 @@ class YoutubeDL(object): def download(self, url_list): """Download a given list of URLs.""" outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) - if (len(url_list) > 1 and - outtmpl != '-' and - '%' not in outtmpl and - self.params.get('max_downloads') != 1): + if (len(url_list) > 1 + and outtmpl != '-' + and '%' not in outtmpl + and self.params.get('max_downloads') != 1): raise SameFileError(outtmpl) for url in url_list: @@ -2143,8 +2143,8 @@ class YoutubeDL(object): if res: res += ', ' res += '%s container' % fdict['container'] - if (fdict.get('vcodec') is not None and - fdict.get('vcodec') != 'none'): + if (fdict.get('vcodec') is not None + and fdict.get('vcodec') != 'none'): if res: res += ', ' res += fdict['vcodec'] diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 9d4859bcf..165c975dd 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -230,14 +230,14 @@ def _real_main(argv=None): if opts.allsubtitles and not opts.writeautomaticsub: opts.writesubtitles = True - outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or - (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or - (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or - (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or - (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or - (opts.useid and '%(id)s.%(ext)s') or - (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or - DEFAULT_OUTTMPL) + outtmpl = ((opts.outtmpl is not None and opts.outtmpl) + or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') + or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') + or (opts.usetitle and '%(title)s-%(id)s.%(ext)s') + or (opts.useid and '%(id)s.%(ext)s') + or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') + or DEFAULT_OUTTMPL) if not os.path.splitext(outtmpl)[1] and opts.extractaudio: parser.error('Cannot download a video and extract audio into the same' ' file! Use "{0}.%(ext)s" instead of "{0}" as the output' diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 7992a23ca..c75ab131b 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -2649,9 +2649,9 @@ else: try: args = shlex.split('中文') - assert (isinstance(args, list) and - isinstance(args[0], compat_str) and - args[0] == '中文') + assert (isinstance(args, list) + and isinstance(args[0], compat_str) + and args[0] == '中文') compat_shlex_split = shlex.split except (AssertionError, UnicodeEncodeError): # Working around shlex issue with unicode strings on some python 2 diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 5979833c0..1cdba89cd 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -176,7 +176,9 @@ class FileDownloader(object): return speed = float(byte_counter) / elapsed if speed > rate_limit: - time.sleep(max((byte_counter // rate_limit) - elapsed, 0)) + sleep_time = float(byte_counter) / rate_limit - elapsed + if sleep_time > 0: + time.sleep(sleep_time) def temp_name(self, filename): """Returns a temporary filename for the given filename.""" @@ -330,15 +332,15 @@ class FileDownloader(object): """ nooverwrites_and_exists = ( - self.params.get('nooverwrites', False) and - os.path.exists(encodeFilename(filename)) + self.params.get('nooverwrites', False) + and os.path.exists(encodeFilename(filename)) ) if not hasattr(filename, 'write'): continuedl_and_exists = ( - self.params.get('continuedl', True) and - os.path.isfile(encodeFilename(filename)) and - not self.params.get('nopart', False) + self.params.get('continuedl', True) + and os.path.isfile(encodeFilename(filename)) + and not self.params.get('nopart', False) ) # Check file already present diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 9b15a0e15..8dd3c2eeb 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -238,8 +238,8 @@ def write_metadata_tag(stream, metadata): def remove_encrypted_media(media): - return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and - 'drmAdditionalHeaderSetId' not in e.attrib, + return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib + and 'drmAdditionalHeaderSetId' not in e.attrib, media)) @@ -267,8 +267,8 @@ class F4mFD(FragmentFD): media = doc.findall(_add_ns('media')) if not media: self.report_error('No media found') - for e in (doc.findall(_add_ns('drmAdditionalHeader')) + - doc.findall(_add_ns('drmAdditionalHeaderSet'))): + for e in (doc.findall(_add_ns('drmAdditionalHeader')) + + doc.findall(_add_ns('drmAdditionalHeaderSet'))): # If id attribute is missing it's valid for all media nodes # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute if 'id' not in e.attrib: diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 917f6dc01..f2e5733b6 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -219,8 +219,8 @@ class FragmentFD(FileDownloader): frag_total_bytes = s.get('total_bytes') or 0 if not ctx['live']: estimated_size = ( - (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) / - (state['fragment_index'] + 1) * total_frags) + (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) + / (state['fragment_index'] + 1) * total_frags) state['total_bytes_estimate'] = estimated_size if s['status'] == 'finished': diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 419e73576..b59aad73f 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -76,12 +76,12 @@ class HlsFD(FragmentFD): return fd.real_download(filename, info_dict) def is_ad_fragment_start(s): - return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or - s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) + return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s + or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) def is_ad_fragment_end(s): - return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s or - s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) + return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s + or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) media_frags = 0 ad_frags = 0 diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index 08670ee3c..3c72ea18b 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -46,8 +46,8 @@ class HttpFD(FileDownloader): is_test = self.params.get('test', False) chunk_size = self._TEST_FILE_SIZE if is_test else ( - info_dict.get('downloader_options', {}).get('http_chunk_size') or - self.params.get('http_chunk_size') or 0) + info_dict.get('downloader_options', {}).get('http_chunk_size') + or self.params.get('http_chunk_size') or 0) ctx.open_mode = 'wb' ctx.resume_len = 0 @@ -123,11 +123,11 @@ class HttpFD(FileDownloader): content_len = int_or_none(content_range_m.group(3)) accept_content_len = ( # Non-chunked download - not ctx.chunk_size or + not ctx.chunk_size # Chunked download and requested piece or # its part is promised to be served - content_range_end == range_end or - content_len < range_end) + or content_range_end == range_end + or content_len < range_end) if accept_content_len: ctx.data_len = content_len return @@ -152,8 +152,8 @@ class HttpFD(FileDownloader): raise else: # Examine the reported length - if (content_length is not None and - (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)): + if (content_length is not None + and (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)): # The file had already been fully downloaded. # Explanation to the above condition: in issue #175 it was revealed that # YouTube sometimes adds or removes a few bytes from the end of the file, diff --git a/youtube_dl/extractor/acast.py b/youtube_dl/extractor/acast.py index c4362be88..b17c792d2 100644 --- a/youtube_dl/extractor/acast.py +++ b/youtube_dl/extractor/acast.py @@ -7,6 +7,7 @@ import functools from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + clean_html, float_or_none, int_or_none, try_get, @@ -27,7 +28,7 @@ class ACastIE(InfoExtractor): ''' _TESTS = [{ 'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna', - 'md5': 'a02393c74f3bdb1801c3ec2695577ce0', + 'md5': '16d936099ec5ca2d5869e3a813ee8dc4', 'info_dict': { 'id': '2a92b283-1a75-4ad8-8396-499c641de0d9', 'ext': 'mp3', @@ -46,28 +47,37 @@ class ACastIE(InfoExtractor): }, { 'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22', 'only_matching': True, + }, { + 'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9', + 'only_matching': True, }] def _real_extract(self, url): channel, display_id = re.match(self._VALID_URL, url).groups() s = self._download_json( - 'https://play-api.acast.com/stitch/%s/%s' % (channel, display_id), - display_id)['result'] + 'https://feeder.acast.com/api/v1/shows/%s/episodes/%s' % (channel, display_id), + display_id) media_url = s['url'] + if re.search(r'[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}', display_id): + episode_url = s.get('episodeUrl') + if episode_url: + display_id = episode_url + else: + channel, display_id = re.match(self._VALID_URL, s['link']).groups() cast_data = self._download_json( 'https://play-api.acast.com/splash/%s/%s' % (channel, display_id), display_id)['result'] e = cast_data['episode'] - title = e['name'] + title = e.get('name') or s['title'] return { 'id': compat_str(e['id']), 'display_id': display_id, 'url': media_url, 'title': title, - 'description': e.get('description') or e.get('summary'), + 'description': e.get('summary') or clean_html(e.get('description') or s.get('description')), 'thumbnail': e.get('image'), - 'timestamp': unified_timestamp(e.get('publishingDate')), - 'duration': float_or_none(s.get('duration') or e.get('duration')), + 'timestamp': unified_timestamp(e.get('publishingDate') or s.get('publishDate')), + 'duration': float_or_none(e.get('duration') or s.get('duration')), 'filesize': int_or_none(e.get('contentLength')), 'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str), 'series': try_get(cast_data, lambda x: x['show']['name'], compat_str), diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py index 9f8a71262..5e7c0724e 100644 --- a/youtube_dl/extractor/addanime.py +++ b/youtube_dl/extractor/addanime.py @@ -59,9 +59,9 @@ class AddAnimeIE(InfoExtractor): parsed_url = compat_urllib_parse_urlparse(url) av_val = av_res + len(parsed_url.netloc) confirm_url = ( - parsed_url.scheme + '://' + parsed_url.netloc + - action + '?' + - compat_urllib_parse_urlencode({ + parsed_url.scheme + '://' + parsed_url.netloc + + action + '?' + + compat_urllib_parse_urlencode({ 'jschl_vc': vc, 'jschl_answer': compat_str(av_val)})) self._download_webpage( confirm_url, video_id, diff --git a/youtube_dl/extractor/adn.py b/youtube_dl/extractor/adn.py index 1e04a55a6..c95ad2173 100644 --- a/youtube_dl/extractor/adn.py +++ b/youtube_dl/extractor/adn.py @@ -60,14 +60,20 @@ class ADNIE(InfoExtractor): enc_subtitles = self._download_webpage( urljoin(self._BASE_URL, sub_path), - video_id, 'Downloading subtitles data', fatal=False) + video_id, 'Downloading subtitles location', fatal=False) or '{}' + subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location') + if subtitle_location: + enc_subtitles = self._download_webpage( + urljoin(self._BASE_URL, subtitle_location), + video_id, 'Downloading subtitles data', fatal=False, + headers={'Origin': 'https://animedigitalnetwork.fr'}) if not enc_subtitles: return None # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js dec_subtitles = intlist_to_bytes(aes_cbc_decrypt( bytes_to_intlist(compat_b64decode(enc_subtitles[24:])), - bytes_to_intlist(binascii.unhexlify(self._K + '083db5aebd9353b4')), + bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')), bytes_to_intlist(compat_b64decode(enc_subtitles[:24])) )) subtitles_json = self._parse_json( diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py index 1cf2dcbf3..38dca1b0a 100644 --- a/youtube_dl/extractor/adobepass.py +++ b/youtube_dl/extractor/adobepass.py @@ -25,6 +25,11 @@ MSO_INFO = { 'username_field': 'username', 'password_field': 'password', }, + 'ATT': { + 'name': 'AT&T U-verse', + 'username_field': 'userid', + 'password_field': 'password', + }, 'ATTOTT': { 'name': 'DIRECTV NOW', 'username_field': 'email', diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index 85ec6392d..611b948f5 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -1,14 +1,15 @@ +# coding: utf-8 from __future__ import unicode_literals import re from .theplatform import ThePlatformIE from ..utils import ( + extract_attributes, + ExtractorError, + int_or_none, smuggle_url, update_url_query, - unescapeHTML, - extract_attributes, - get_element_by_attribute, ) from ..compat import ( compat_urlparse, @@ -19,6 +20,43 @@ class AENetworksBaseIE(ThePlatformIE): _THEPLATFORM_KEY = 'crazyjava' _THEPLATFORM_SECRET = 's3cr3t' + def _extract_aen_smil(self, smil_url, video_id, auth=None): + query = {'mbr': 'true'} + if auth: + query['auth'] = auth + TP_SMIL_QUERY = [{ + 'assetTypes': 'high_video_ak', + 'switch': 'hls_high_ak' + }, { + 'assetTypes': 'high_video_s3' + }, { + 'assetTypes': 'high_video_s3', + 'switch': 'hls_ingest_fastly' + }] + formats = [] + subtitles = {} + last_e = None + for q in TP_SMIL_QUERY: + q.update(query) + m_url = update_url_query(smil_url, q) + m_url = self._sign_url(m_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET) + try: + tp_formats, tp_subtitles = self._extract_theplatform_smil( + m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes'])) + except ExtractorError as e: + last_e = e + continue + formats.extend(tp_formats) + subtitles = self._merge_subtitles(subtitles, tp_subtitles) + if last_e and not formats: + raise last_e + self._sort_formats(formats) + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + } + class AENetworksIE(AENetworksBaseIE): IE_NAME = 'aenetworks' @@ -33,22 +71,25 @@ class AENetworksIE(AENetworksBaseIE): (?: shows/(?P[^/]+(?:/[^/]+){0,2})| movies/(?P[^/]+)(?:/full-movie)?| - specials/(?P[^/]+)/full-special| + specials/(?P[^/]+)/(?:full-special|preview-)| collections/[^/]+/(?P[^/]+) ) ''' _TESTS = [{ 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', - 'md5': 'a97a65f7e823ae10e9244bc5433d5fe6', 'info_dict': { 'id': '22253814', 'ext': 'mp4', - 'title': 'Winter Is Coming', + 'title': 'Winter is Coming', 'description': 'md5:641f424b7a19d8e24f26dea22cf59d74', 'timestamp': 1338306241, 'upload_date': '20120529', 'uploader': 'AENE-NEW', }, + 'params': { + # m3u8 download + 'skip_download': True, + }, 'add_ie': ['ThePlatform'], }, { 'url': 'http://www.history.com/shows/ancient-aliens/season-1', @@ -84,6 +125,9 @@ class AENetworksIE(AENetworksBaseIE): }, { 'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward', 'only_matching': True + }, { + 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story', + 'only_matching': True }] _DOMAIN_TO_REQUESTOR_ID = { 'history.com': 'HISTORY', @@ -124,11 +168,6 @@ class AENetworksIE(AENetworksBaseIE): return self.playlist_result( entries, self._html_search_meta('aetn:SeasonId', webpage)) - query = { - 'mbr': 'true', - 'assetTypes': 'high_video_ak', - 'switch': 'hls_high_ak', - } video_id = self._html_search_meta('aetn:VideoID', webpage) media_url = self._search_regex( [r"media_url\s*=\s*'(?P[^']+)'", @@ -138,64 +177,39 @@ class AENetworksIE(AENetworksBaseIE): theplatform_metadata = self._download_theplatform_metadata(self._search_regex( r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id) info = self._parse_theplatform_metadata(theplatform_metadata) + auth = None if theplatform_metadata.get('AETN$isBehindWall'): requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain] resource = self._get_mvpd_resource( requestor_id, theplatform_metadata['title'], theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'), theplatform_metadata['ratings'][0]['rating']) - query['auth'] = self._extract_mvpd_auth( + auth = self._extract_mvpd_auth( url, video_id, requestor_id, resource) info.update(self._search_json_ld(webpage, video_id, fatal=False)) - media_url = update_url_query(media_url, query) - media_url = self._sign_url(media_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET) - formats, subtitles = self._extract_theplatform_smil(media_url, video_id) - self._sort_formats(formats) - info.update({ - 'id': video_id, - 'formats': formats, - 'subtitles': subtitles, - }) + info.update(self._extract_aen_smil(media_url, video_id, auth)) return info class HistoryTopicIE(AENetworksBaseIE): IE_NAME = 'history:topic' IE_DESC = 'History.com Topic' - _VALID_URL = r'https?://(?:www\.)?history\.com/topics/(?:[^/]+/)?(?P[^/]+)(?:/[^/]+(?:/(?P[^/?#]+))?)?' + _VALID_URL = r'https?://(?:www\.)?history\.com/topics/[^/]+/(?P[\w+-]+?)-video' _TESTS = [{ - 'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false', + 'url': 'https://www.history.com/topics/valentines-day/history-of-valentines-day-video', 'info_dict': { 'id': '40700995724', 'ext': 'mp4', - 'title': "Bet You Didn't Know: Valentine's Day", + 'title': "History of Valentine’s Day", 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', 'timestamp': 1375819729, 'upload_date': '20130806', - 'uploader': 'AENE-NEW', }, 'params': { # m3u8 download 'skip_download': True, }, 'add_ie': ['ThePlatform'], - }, { - 'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/videos', - 'info_dict': - { - 'id': 'world-war-i-history', - 'title': 'World War I History', - }, - 'playlist_mincount': 23, - }, { - 'url': 'http://www.history.com/topics/world-war-i-history/videos', - 'only_matching': True, - }, { - 'url': 'http://www.history.com/topics/world-war-i/world-war-i-history', - 'only_matching': True, - }, { - 'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/speeches', - 'only_matching': True, }] def theplatform_url_result(self, theplatform_url, video_id, query): @@ -215,27 +229,19 @@ class HistoryTopicIE(AENetworksBaseIE): } def _real_extract(self, url): - topic_id, video_display_id = re.match(self._VALID_URL, url).groups() - if video_display_id: - webpage = self._download_webpage(url, video_display_id) - release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups() - release_url = unescapeHTML(release_url) - - return self.theplatform_url_result( - release_url, video_id, { - 'mbr': 'true', - 'switch': 'hls', - 'assetTypes': 'high_video_ak', - }) - else: - webpage = self._download_webpage(url, topic_id) - entries = [] - for episode_item in re.findall(r']*>', webpage): - video_attributes = extract_attributes(episode_item) - entries.append(self.theplatform_url_result( - video_attributes['data-release-url'], video_attributes['data-id'], { - 'mbr': 'true', - 'switch': 'hls', - 'assetTypes': 'high_video_ak', - })) - return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage)) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + r']+src="[^"]+\btpid=(\d+)', webpage, 'tpid') + result = self._download_json( + 'https://feeds.video.aetnd.com/api/v2/history/videos', + video_id, query={'filter[id]': video_id})['results'][0] + title = result['title'] + info = self._extract_aen_smil(result['publicUrl'], video_id) + info.update({ + 'title': title, + 'description': result.get('description'), + 'duration': int_or_none(result.get('duration')), + 'timestamp': int_or_none(result.get('added'), 1000), + }) + return info diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index dffa9733d..e87994a6a 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -17,7 +17,7 @@ from ..utils import ( class AolIE(InfoExtractor): IE_NAME = 'aol.com' - _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.com/video/(?:[^/]+/)*)(?P[0-9a-f]+)' + _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P[0-9a-f]+)' _TESTS = [{ # video with 5min ID @@ -64,6 +64,18 @@ class AolIE(InfoExtractor): }, { 'url': 'https://www.aol.com/video/playlist/PL8245/5ca79d19d21f1a04035db606/', 'only_matching': True, + }, { + 'url': 'https://www.aol.ca/video/view/u-s-woman-s-family-arrested-for-murder-first-pinned-on-panhandler-police/5c7ccf45bc03931fa04b2fe1/', + 'only_matching': True, + }, { + 'url': 'https://www.aol.co.uk/video/view/-one-dead-and-22-hurt-in-bus-crash-/5cb3a6f3d21f1a072b457347/', + 'only_matching': True, + }, { + 'url': 'https://www.aol.de/video/view/eva-braun-privataufnahmen-von-hitlers-geliebter-werden-digitalisiert/5cb2d49de98ab54c113d3d5d/', + 'only_matching': True, + }, { + 'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index ffc321821..2bd3bfe8a 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -4,17 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_parse_qs, - compat_str, - compat_urllib_parse_urlparse, -) +from ..compat import compat_str from ..utils import ( ExtractorError, - find_xpath_attr, - get_element_by_attribute, int_or_none, - NO_DEFAULT, qualities, try_get, unified_strdate, @@ -25,59 +18,7 @@ from ..utils import ( # add tests. -class ArteTvIE(InfoExtractor): - _VALID_URL = r'https?://videos\.arte\.tv/(?Pfr|de|en|es)/.*-(?P.*?)\.html' - IE_NAME = 'arte.tv' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - lang = mobj.group('lang') - video_id = mobj.group('id') - - ref_xml_url = url.replace('/videos/', '/do_delegate/videos/') - ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml') - ref_xml_doc = self._download_xml( - ref_xml_url, video_id, note='Downloading metadata') - config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang) - config_xml_url = config_node.attrib['ref'] - config = self._download_xml( - config_xml_url, video_id, note='Downloading configuration') - - formats = [{ - 'format_id': q.attrib['quality'], - # The playpath starts at 'mp4:', if we don't manually - # split the url, rtmpdump will incorrectly parse them - 'url': q.text.split('mp4:', 1)[0], - 'play_path': 'mp4:' + q.text.split('mp4:', 1)[1], - 'ext': 'flv', - 'quality': 2 if q.attrib['quality'] == 'hd' else 1, - } for q in config.findall('./urls/url')] - self._sort_formats(formats) - - title = config.find('.//name').text - thumbnail = config.find('.//firstThumbnailUrl').text - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'formats': formats, - } - - class ArteTVBaseIE(InfoExtractor): - @classmethod - def _extract_url_info(cls, url): - mobj = re.match(cls._VALID_URL, url) - lang = mobj.group('lang') - query = compat_parse_qs(compat_urllib_parse_urlparse(url).query) - if 'vid' in query: - video_id = query['vid'][0] - else: - # This is not a real id, it can be for example AJT for the news - # http://www.arte.tv/guide/fr/emissions/AJT/arte-journal - video_id = mobj.group('id') - return video_id, lang - def _extract_from_json_url(self, json_url, video_id, lang, title=None): info = self._download_json(json_url, video_id) player_info = info['videoJsonPlayer'] @@ -108,13 +49,15 @@ class ArteTVBaseIE(InfoExtractor): 'upload_date': unified_strdate(upload_date_str), 'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'), } - qfunc = qualities(['HQ', 'MQ', 'EQ', 'SQ']) + qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ']) LANGS = { 'fr': 'F', 'de': 'A', 'en': 'E[ANG]', 'es': 'E[ESP]', + 'it': 'E[ITA]', + 'pl': 'E[POL]', } langcode = LANGS.get(lang, lang) @@ -126,8 +69,8 @@ class ArteTVBaseIE(InfoExtractor): l = re.escape(langcode) # Language preference from most to least priority - # Reference: section 5.6.3 of - # http://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-05.pdf + # Reference: section 6.8 of + # https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf PREFERENCES = ( # original version in requested language, without subtitles r'VO{0}$'.format(l), @@ -193,274 +136,59 @@ class ArteTVBaseIE(InfoExtractor): class ArteTVPlus7IE(ArteTVBaseIE): IE_NAME = 'arte.tv:+7' - _VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/(?:[^/]+/)?(?Pfr|de|en|es)/(?:videos/)?(?:[^/]+/)*(?P[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?Pfr|de|en|es|it|pl)/videos/(?P\d{6}-\d{3}-[AF])' _TESTS = [{ - 'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D', - 'only_matching': True, - }, { - 'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22', - 'only_matching': True, - }, { - 'url': 'http://www.arte.tv/de/videos/048696-000-A/der-kluge-bauch-unser-zweites-gehirn', - 'only_matching': True, + 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/', + 'info_dict': { + 'id': '088501-000-A', + 'ext': 'mp4', + 'title': 'Mexico: Stealing Petrol to Survive', + 'upload_date': '20190628', + }, }] - @classmethod - def suitable(cls, url): - return False if ArteTVPlaylistIE.suitable(url) else super(ArteTVPlus7IE, cls).suitable(url) - def _real_extract(self, url): - video_id, lang = self._extract_url_info(url) - webpage = self._download_webpage(url, video_id) - return self._extract_from_webpage(webpage, video_id, lang) - - def _extract_from_webpage(self, webpage, video_id, lang): - patterns_templates = (r'arte_vp_url=["\'](.*?%s.*?)["\']', r'data-url=["\']([^"]+%s[^"]+)["\']') - ids = (video_id, '') - # some pages contain multiple videos (like - # http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D), - # so we first try to look for json URLs that contain the video id from - # the 'vid' parameter. - patterns = [t % re.escape(_id) for _id in ids for t in patterns_templates] - json_url = self._html_search_regex( - patterns, webpage, 'json vp url', default=None) - if not json_url: - def find_iframe_url(webpage, default=NO_DEFAULT): - return self._html_search_regex( - r']+src=(["\'])(?P.+\bjson_url=.+?)\1', - webpage, 'iframe url', group='url', default=default) - - iframe_url = find_iframe_url(webpage, None) - if not iframe_url: - embed_url = self._html_search_regex( - r'arte_vp_url_oembed=\'([^\']+?)\'', webpage, 'embed url', default=None) - if embed_url: - player = self._download_json( - embed_url, video_id, 'Downloading player page') - iframe_url = find_iframe_url(player['html']) - # en and es URLs produce react-based pages with different layout (e.g. - # http://www.arte.tv/guide/en/053330-002-A/carnival-italy?zone=world) - if not iframe_url: - program = self._search_regex( - r'program\s*:\s*({.+?["\']embed_html["\'].+?}),?\s*\n', - webpage, 'program', default=None) - if program: - embed_html = self._parse_json(program, video_id) - if embed_html: - iframe_url = find_iframe_url(embed_html['embed_html']) - if iframe_url: - json_url = compat_parse_qs( - compat_urllib_parse_urlparse(iframe_url).query)['json_url'][0] - if json_url: - title = self._search_regex( - r']+title=(["\'])(?P.+?)\1', - webpage, 'title', default=None, group='title') - return self._extract_from_json_url(json_url, video_id, lang, title=title) - # Different kind of embed URL (e.g. - # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium) - entries = [ - self.url_result(url) - for _, url in re.findall(r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', webpage)] - return self.playlist_result(entries) - - -# It also uses the arte_vp_url url from the webpage to extract the information -class ArteTVCreativeIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:creative' - _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://creative.arte.tv/fr/episode/osmosis-episode-1', - 'info_dict': { - 'id': '057405-001-A', - 'ext': 'mp4', - 'title': 'OSMOSIS - N\'AYEZ PLUS PEUR D\'AIMER (1)', - 'upload_date': '20150716', - }, - }, { - 'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion', - 'playlist_count': 11, - 'add_ie': ['Youtube'], - }, { - 'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde', - 'only_matching': True, - }] - - -class ArteTVInfoIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:info' - _VALID_URL = r'https?://info\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://info.arte.tv/fr/service-civique-un-cache-misere', - 'info_dict': { - 'id': '067528-000-A', - 'ext': 'mp4', - 'title': 'Service civique, un cache misère ?', - 'upload_date': '20160403', - }, - }] - - -class ArteTVFutureIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:future' - _VALID_URL = r'https?://future\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://future.arte.tv/fr/info-sciences/les-ecrevisses-aussi-sont-anxieuses', - 'info_dict': { - 'id': '050940-028-A', - 'ext': 'mp4', - 'title': 'Les écrevisses aussi peuvent être anxieuses', - 'upload_date': '20140902', - }, - }, { - 'url': 'http://future.arte.tv/fr/la-science-est-elle-responsable', - 'only_matching': True, - }] - - -class ArteTVDDCIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:ddc' - _VALID_URL = r'https?://ddc\.arte\.tv/(?P<lang>emission|folge)/(?P<id>[^/?#&]+)' - - _TESTS = [] - - def _real_extract(self, url): - video_id, lang = self._extract_url_info(url) - if lang == 'folge': - lang = 'de' - elif lang == 'emission': - lang = 'fr' - webpage = self._download_webpage(url, video_id) - scriptElement = get_element_by_attribute('class', 'visu_video_block', webpage) - script_url = self._html_search_regex(r'src="(.*?)"', scriptElement, 'script url') - javascriptPlayerGenerator = self._download_webpage(script_url, video_id, 'Download javascript player generator') - json_url = self._search_regex(r"json_url=(.*)&rendering_place.*", javascriptPlayerGenerator, 'json url') - return self._extract_from_json_url(json_url, video_id, lang) - - -class ArteTVConcertIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:concert' - _VALID_URL = r'https?://concert\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://concert.arte.tv/de/notwist-im-pariser-konzertclub-divan-du-monde', - 'md5': '9ea035b7bd69696b67aa2ccaaa218161', - 'info_dict': { - 'id': '186', - 'ext': 'mp4', - 'title': 'The Notwist im Pariser Konzertclub "Divan du Monde"', - 'upload_date': '20140128', - 'description': 'md5:486eb08f991552ade77439fe6d82c305', - }, - }] - - -class ArteTVCinemaIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:cinema' - _VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)' - - _TESTS = [{ - 'url': 'http://cinema.arte.tv/fr/article/les-ailes-du-desir-de-julia-reck', - 'md5': 'a5b9dd5575a11d93daf0e3f404f45438', - 'info_dict': { - 'id': '062494-000-A', - 'ext': 'mp4', - 'title': 'Film lauréat du concours web - "Les ailes du désir" de Julia Reck', - 'upload_date': '20150807', - }, - }] - - -class ArteTVMagazineIE(ArteTVPlus7IE): - IE_NAME = 'arte.tv:magazine' - _VALID_URL = r'https?://(?:www\.)?arte\.tv/magazine/[^/]+/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - # Embedded via <iframe src="http://www.arte.tv/arte_vp/index.php?json_url=..." - 'url': 'http://www.arte.tv/magazine/trepalium/fr/entretien-avec-le-realisateur-vincent-lannoo-trepalium', - 'md5': '2a9369bcccf847d1c741e51416299f25', - 'info_dict': { - 'id': '065965-000-A', - 'ext': 'mp4', - 'title': 'Trepalium - Extrait Ep.01', - 'upload_date': '20160121', - }, - }, { - # Embedded via <iframe src="http://www.arte.tv/guide/fr/embed/054813-004-A/medium" - 'url': 'http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium', - 'md5': 'fedc64fc7a946110fe311634e79782ca', - 'info_dict': { - 'id': '054813-004_PLUS7-F', - 'ext': 'mp4', - 'title': 'Trepalium (4/6)', - 'description': 'md5:10057003c34d54e95350be4f9b05cb40', - 'upload_date': '20160218', - }, - }, { - 'url': 'http://www.arte.tv/magazine/metropolis/de/frank-woeste-german-paris-metropolis', - 'only_matching': True, - }] + lang, video_id = re.match(self._VALID_URL, url).groups() + return self._extract_from_json_url( + 'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id), + video_id, lang) class ArteTVEmbedIE(ArteTVPlus7IE): IE_NAME = 'arte.tv:embed' _VALID_URL = r'''(?x) - http://www\.arte\.tv - /(?:playerv2/embed|arte_vp/index)\.php\?json_url= + https://www\.arte\.tv + /player/v3/index\.php\?json_url= (?P<json_url> - http://arte\.tv/papi/tvguide/videos/stream/player/ - (?P<lang>[^/]+)/(?P<id>[^/]+)[^&]* + https?://api\.arte\.tv/api/player/v1/config/ + (?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF]) ) ''' _TESTS = [] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - lang = mobj.group('lang') - json_url = mobj.group('json_url') + json_url, lang, video_id = re.match(self._VALID_URL, url).groups() return self._extract_from_json_url(json_url, video_id, lang) -class TheOperaPlatformIE(ArteTVPlus7IE): - IE_NAME = 'theoperaplatform' - _VALID_URL = r'https?://(?:www\.)?theoperaplatform\.eu/(?P<lang>fr|de|en|es)/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://www.theoperaplatform.eu/de/opera/verdi-otello', - 'md5': '970655901fa2e82e04c00b955e9afe7b', - 'info_dict': { - 'id': '060338-009-A', - 'ext': 'mp4', - 'title': 'Verdi - OTELLO', - 'upload_date': '20160927', - }, - }] - - class ArteTVPlaylistIE(ArteTVBaseIE): IE_NAME = 'arte.tv:playlist' - _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/[^#]*#collection/(?P<id>PL-\d+)' + _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})' _TESTS = [{ - 'url': 'http://www.arte.tv/guide/de/plus7/?country=DE#collection/PL-013263/ARTETV', + 'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/', 'info_dict': { - 'id': 'PL-013263', - 'title': 'Areva & Uramin', - 'description': 'md5:a1dc0312ce357c262259139cfd48c9bf', + 'id': 'RC-016954', + 'title': 'Earn a Living', + 'description': 'md5:d322c55011514b3a7241f7fb80d494c2', }, 'playlist_mincount': 6, - }, { - 'url': 'http://www.arte.tv/guide/de/playlists?country=DE#collection/PL-013190/ARTETV', - 'only_matching': True, }] def _real_extract(self, url): - playlist_id, lang = self._extract_url_info(url) + lang, playlist_id = re.match(self._VALID_URL, url).groups() collection = self._download_json( 'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos' % (lang, playlist_id), playlist_id) diff --git a/youtube_dl/extractor/beampro.py b/youtube_dl/extractor/beampro.py index 2eaec1ab4..86abdae00 100644 --- a/youtube_dl/extractor/beampro.py +++ b/youtube_dl/extractor/beampro.py @@ -99,8 +99,8 @@ class BeamProLiveIE(BeamProBaseIE): class BeamProVodIE(BeamProBaseIE): IE_NAME = 'Mixer:vod' - _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>\d+)' - _TEST = { + _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>[^?#&]+)' + _TESTS = [{ 'url': 'https://mixer.com/willow8714?vod=2259830', 'md5': 'b2431e6e8347dc92ebafb565d368b76b', 'info_dict': { @@ -119,7 +119,13 @@ class BeamProVodIE(BeamProBaseIE): 'params': { 'skip_download': True, }, - } + }, { + 'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw', + 'only_matching': True, + }, { + 'url': 'https://mixer.com/streamer?vod=Rh3LY0VAqkGpEQUe2pN-ig', + 'only_matching': True, + }] @staticmethod def _extract_format(vod, vod_type): diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py index 192f11ea6..c15a0ac8f 100644 --- a/youtube_dl/extractor/beeg.py +++ b/youtube_dl/extractor/beeg.py @@ -1,7 +1,10 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( + compat_str, + compat_urlparse, +) from ..utils import ( int_or_none, unified_timestamp, @@ -11,6 +14,7 @@ from ..utils import ( class BeegIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P<id>\d+)' _TESTS = [{ + # api/v6 v1 'url': 'http://beeg.com/5416503', 'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820', 'info_dict': { @@ -24,6 +28,10 @@ class BeegIE(InfoExtractor): 'tags': list, 'age_limit': 18, } + }, { + # api/v6 v2 + 'url': 'https://beeg.com/1941093077?t=911-1391', + 'only_matching': True, }, { 'url': 'https://beeg.porn/video/5416503', 'only_matching': True, @@ -41,11 +49,22 @@ class BeegIE(InfoExtractor): r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version', default='1546225636701') + qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + t = qs.get('t', [''])[0].split('-') + if len(t) > 1: + query = { + 'v': 2, + 's': t[0], + 'e': t[1], + } + else: + query = {'v': 1} + for api_path in ('', 'api.'): video = self._download_json( 'https://%sbeeg.com/api/v6/%s/video/%s' % (api_path, beeg_version, video_id), video_id, - fatal=api_path == 'api.') + fatal=api_path == 'api.', query=query) if video: break diff --git a/youtube_dl/extractor/biqle.py b/youtube_dl/extractor/biqle.py index 3707dc97f..af21e3ee5 100644 --- a/youtube_dl/extractor/biqle.py +++ b/youtube_dl/extractor/biqle.py @@ -42,7 +42,7 @@ class BIQLEIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) embed_url = self._proto_relative_url(self._search_regex( - r'<iframe.+?src="((?:https?:)?//daxab\.com/[^"]+)".*?></iframe>', + r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>', webpage, 'embed url')) if VKIE.suitable(embed_url): return self.url_result(embed_url, VKIE.ie_key(), video_id) diff --git a/youtube_dl/extractor/bitchute.py b/youtube_dl/extractor/bitchute.py index 4f39424f5..430663fbf 100644 --- a/youtube_dl/extractor/bitchute.py +++ b/youtube_dl/extractor/bitchute.py @@ -55,6 +55,11 @@ class BitChuteIE(InfoExtractor): formats = [ {'url': format_url} for format_url in orderedSet(format_urls)] + + if not formats: + formats = self._parse_html5_media_entries( + url, webpage, video_id)[0]['formats'] + self._check_formats(formats, video_id) self._sort_formats(formats) @@ -65,8 +70,9 @@ class BitChuteIE(InfoExtractor): webpage, default=None) or self._html_search_meta( 'twitter:image:src', webpage, 'thumbnail') uploader = self._html_search_regex( - r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>', webpage, - 'uploader', fatal=False) + (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>', + r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'), + webpage, 'uploader', fatal=False) return { 'id': video_id, diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dl/extractor/blinkx.py index 3b8eabe8f..db5e12b21 100644 --- a/youtube_dl/extractor/blinkx.py +++ b/youtube_dl/extractor/blinkx.py @@ -32,8 +32,8 @@ class BlinkxIE(InfoExtractor): video_id = self._match_id(url) display_id = video_id[:8] - api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' + - 'video=%s' % video_id) + api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' + + 'video=%s' % video_id) data_json = self._download_webpage(api_url, display_id) data = json.loads(data_json)['api']['results'][0] duration = None diff --git a/youtube_dl/extractor/bravotv.py b/youtube_dl/extractor/bravotv.py index a25d500e4..b9715df00 100644 --- a/youtube_dl/extractor/bravotv.py +++ b/youtube_dl/extractor/bravotv.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .adobepass import AdobePassIE from ..utils import ( smuggle_url, @@ -12,16 +14,16 @@ from ..utils import ( class BravoTVIE(AdobePassIE): _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)' _TESTS = [{ - 'url': 'http://www.bravotv.com/last-chance-kitchen/season-5/videos/lck-ep-12-fishy-finale', - 'md5': '9086d0b7ef0ea2aabc4781d75f4e5863', + 'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is', + 'md5': 'e34684cfea2a96cd2ee1ef3a60909de9', 'info_dict': { - 'id': 'zHyk1_HU_mPy', + 'id': 'epL0pmK1kQlT', 'ext': 'mp4', - 'title': 'LCK Ep 12: Fishy Finale', - 'description': 'S13/E12: Two eliminated chefs have just 12 minutes to cook up a delicious fish dish.', + 'title': 'The Top Chef Season 16 Winner Is...', + 'description': 'Find out who takes the title of Top Chef!', 'uploader': 'NBCU-BRAV', - 'upload_date': '20160302', - 'timestamp': 1456945320, + 'upload_date': '20190314', + 'timestamp': 1552591860, } }, { 'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1', @@ -32,30 +34,38 @@ class BravoTVIE(AdobePassIE): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) settings = self._parse_json(self._search_regex( - r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'), + r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'), display_id) info = {} query = { 'mbr': 'true', } account_pid, release_pid = [None] * 2 - tve = settings.get('sharedTVE') + tve = settings.get('ls_tve') if tve: query['manifest'] = 'm3u' - account_pid = 'HNK2IC' - release_pid = tve['release_pid'] + mobj = re.search(r'<[^>]+id="pdk-player"[^>]+data-url=["\']?(?:https?:)?//player\.theplatform\.com/p/([^/]+)/(?:[^/]+/)*select/([^?#&"\']+)', webpage) + if mobj: + account_pid, tp_path = mobj.groups() + release_pid = tp_path.strip('/').split('/')[-1] + else: + account_pid = 'HNK2IC' + tp_path = release_pid = tve['release_pid'] if tve.get('entitlement') == 'auth': - adobe_pass = settings.get('adobePass', {}) + adobe_pass = settings.get('tve_adobe_auth', {}) resource = self._get_mvpd_resource( adobe_pass.get('adobePassResourceId', 'bravo'), tve['title'], release_pid, tve.get('rating')) query['auth'] = self._extract_mvpd_auth( url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource) else: - shared_playlist = settings['shared_playlist'] + shared_playlist = settings['ls_playlist'] account_pid = shared_playlist['account_pid'] metadata = shared_playlist['video_metadata'][shared_playlist['default_clip']] - release_pid = metadata['release_pid'] + tp_path = release_pid = metadata.get('release_pid') + if not release_pid: + release_pid = metadata['guid'] + tp_path = 'media/guid/2140479951/' + release_pid info.update({ 'title': metadata['title'], 'description': metadata.get('description'), @@ -67,7 +77,7 @@ class BravoTVIE(AdobePassIE): '_type': 'url_transparent', 'id': release_pid, 'url': smuggle_url(update_url_query( - 'http://link.theplatform.com/s/%s/%s' % (account_pid, release_pid), + 'http://link.theplatform.com/s/%s/%s' % (account_pid, tp_path), query), {'force_smil_url': True}), 'ie_key': 'ThePlatform', }) diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index c0345e2c3..58ec5c979 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -483,7 +483,7 @@ class BrightcoveLegacyIE(InfoExtractor): class BrightcoveNewIE(AdobePassIE): IE_NAME = 'brightcove:new' - _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+|ref:[^&]+)' + _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*(?P<content_type>video|playlist)Id=(?P<video_id>\d+|ref:[^&]+)' _TESTS = [{ 'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001', 'md5': 'c8100925723840d4b0d243f7025703be', @@ -516,6 +516,21 @@ class BrightcoveNewIE(AdobePassIE): # m3u8 download 'skip_download': True, } + }, { + # playlist stream + 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001', + 'info_dict': { + 'id': '5718313430001', + 'title': 'No Audio Playlist', + }, + 'playlist_count': 7, + 'params': { + # m3u8 download + 'skip_download': True, + } + }, { + 'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001', + 'only_matching': True, }, { # ref: prefixed video id 'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442', @@ -715,7 +730,7 @@ class BrightcoveNewIE(AdobePassIE): 'ip_blocks': smuggled_data.get('geo_ip_blocks'), }) - account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups() + account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage( 'http://players.brightcove.net/%s/%s_%s/index.min.js' @@ -736,7 +751,7 @@ class BrightcoveNewIE(AdobePassIE): r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1', webpage, 'policy key', group='pk') - api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' % (account_id, video_id) + api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) headers = { 'Accept': 'application/json;pk=%s' % policy_key, } @@ -771,5 +786,12 @@ class BrightcoveNewIE(AdobePassIE): 'tveToken': tve_token, }) + if content_type == 'playlist': + return self.playlist_result( + [self._parse_brightcove_metadata(vid, vid.get('id'), headers) + for vid in json_data.get('videos', []) if vid.get('id')], + json_data.get('id'), json_data.get('name'), + json_data.get('description')) + return self._parse_brightcove_metadata( json_data, video_id, headers=headers) diff --git a/youtube_dl/extractor/byutv.py b/youtube_dl/extractor/byutv.py index 4bf4efe1f..562c83af9 100644 --- a/youtube_dl/extractor/byutv.py +++ b/youtube_dl/extractor/byutv.py @@ -3,11 +3,13 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import parse_duration class BYUtvIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?byutv\.org/(?:watch|player)/(?!event/)(?P<id>[0-9a-f-]+)(?:/(?P<display_id>[^/?#&]+))?' _TESTS = [{ + # ooyalaVOD 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d/studio-c-season-5-episode-5', 'info_dict': { 'id': 'ZvanRocTpW-G5_yZFeltTAMv6jxOU9KH', @@ -22,6 +24,20 @@ class BYUtvIE(InfoExtractor): 'skip_download': True, }, 'add_ie': ['Ooyala'], + }, { + # dvr + 'url': 'https://www.byutv.org/player/8f1dab9b-b243-47c8-b525-3e2d021a3451/byu-softball-pacific-vs-byu-41219---game-2', + 'info_dict': { + 'id': '8f1dab9b-b243-47c8-b525-3e2d021a3451', + 'display_id': 'byu-softball-pacific-vs-byu-41219---game-2', + 'ext': 'mp4', + 'title': 'Pacific vs. BYU (4/12/19)', + 'description': 'md5:1ac7b57cb9a78015910a4834790ce1f3', + 'duration': 11645, + }, + 'params': { + 'skip_download': True + }, }, { 'url': 'http://www.byutv.org/watch/6587b9a3-89d2-42a6-a7f7-fd2f81840a7d', 'only_matching': True, @@ -35,24 +51,42 @@ class BYUtvIE(InfoExtractor): video_id = mobj.group('id') display_id = mobj.group('display_id') or video_id - ep = self._download_json( - 'https://api.byutv.org/api3/catalog/getvideosforcontent', video_id, - query={ + info = self._download_json( + 'https://api.byutv.org/api3/catalog/getvideosforcontent', + display_id, query={ 'contentid': video_id, 'channel': 'byutv', 'x-byutv-context': 'web$US', }, headers={ 'x-byutv-context': 'web$US', 'x-byutv-platformkey': 'xsaaw9c7y5', - })['ooyalaVOD'] + }) + ep = info.get('ooyalaVOD') + if ep: + return { + '_type': 'url_transparent', + 'ie_key': 'Ooyala', + 'url': 'ooyala:%s' % ep['providerId'], + 'id': video_id, + 'display_id': display_id, + 'title': ep.get('title'), + 'description': ep.get('description'), + 'thumbnail': ep.get('imageThumbnail'), + } + + ep = info['dvr'] + title = ep['title'] + formats = self._extract_m3u8_formats( + ep['videoUrl'], video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls') + self._sort_formats(formats) return { - '_type': 'url_transparent', - 'ie_key': 'Ooyala', - 'url': 'ooyala:%s' % ep['providerId'], 'id': video_id, 'display_id': display_id, - 'title': ep.get('title'), + 'title': title, 'description': ep.get('description'), 'thumbnail': ep.get('imageThumbnail'), + 'duration': parse_duration(ep.get('length')), + 'formats': formats, } diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py index 174fd9e2b..c506bc5dd 100644 --- a/youtube_dl/extractor/canvas.py +++ b/youtube_dl/extractor/canvas.py @@ -17,7 +17,7 @@ from ..utils import ( class CanvasIE(InfoExtractor): - _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrtvideo)/assets/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475', 'md5': '90139b746a0a9bd7bb631283f6e2a64e', @@ -35,6 +35,10 @@ class CanvasIE(InfoExtractor): 'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e', 'only_matching': True, }] + _HLS_ENTRY_PROTOCOLS_MAP = { + 'HLS': 'm3u8_native', + 'HLS_AES': 'm3u8', + } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -52,9 +56,9 @@ class CanvasIE(InfoExtractor): format_url, format_type = target.get('url'), target.get('type') if not format_url or not format_type: continue - if format_type == 'HLS': + if format_type in self._HLS_ENTRY_PROTOCOLS_MAP: formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', entry_protocol='m3u8_native', + format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type], m3u8_id=format_type, fatal=False)) elif format_type == 'HDS': formats.extend(self._extract_f4m_formats( diff --git a/youtube_dl/extractor/cbs.py b/youtube_dl/extractor/cbs.py index 1799d63ea..4a19a73d2 100644 --- a/youtube_dl/extractor/cbs.py +++ b/youtube_dl/extractor/cbs.py @@ -13,13 +13,17 @@ from ..utils import ( class CBSBaseIE(ThePlatformFeedIE): def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): - closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL') - return { - 'en': [{ - 'ext': 'ttml', - 'url': closed_caption_e.attrib['value'], - }] - } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else [] + subtitles = {} + for k, ext in [('sMPTE-TTCCURL', 'tt'), ('ClosedCaptionURL', 'ttml'), ('webVTTCaptionURL', 'vtt')]: + cc_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', k) + if cc_e is not None: + cc_url = cc_e.get('value') + if cc_url: + subtitles.setdefault(subtitles_lang, []).append({ + 'ext': ext, + 'url': cc_url, + }) + return subtitles class CBSIE(CBSBaseIE): @@ -65,7 +69,7 @@ class CBSIE(CBSBaseIE): last_e = None for item in items_data.findall('.//item'): asset_type = xpath_text(item, 'assetType') - if not asset_type or asset_type in asset_types or asset_type in ('HLS_FPS', 'DASH_CENC'): + if not asset_type or asset_type in asset_types or 'HLS_FPS' in asset_type or 'DASH_CENC' in asset_type: continue asset_types.append(asset_type) query = { diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dl/extractor/cbsnews.py index 51df15fac..345debcf0 100644 --- a/youtube_dl/extractor/cbsnews.py +++ b/youtube_dl/extractor/cbsnews.py @@ -1,40 +1,62 @@ # coding: utf-8 from __future__ import unicode_literals +import re +import zlib + from .common import InfoExtractor from .cbs import CBSIE +from ..compat import ( + compat_b64decode, + compat_urllib_parse_unquote, +) from ..utils import ( parse_duration, ) +class CBSNewsEmbedIE(CBSIE): + IE_NAME = 'cbsnews:embed' + _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/embed/video[^#]*#(?P<id>.+)' + _TESTS = [{ + 'url': 'https://www.cbsnews.com/embed/video/?v=1.c9b5b61492913d6660db0b2f03579ef25e86307a#1Vb7b9s2EP5XBAHbT6Gt98PAMKTJ0se6LVjWYWtdGBR1stlIpEBSTtwi%2F%2FvuJNkNhmHdGxgM2NL57vjd6zt%2B8PngdN%2Fyg79qeGvhzN%2FLGrS%2F%2BuBLB531V28%2B%2BO7Qg7%2Fy97r2z3xZ42NW8yLhDbA0S0KWlHnIijwKWJBHZZnHBa8Cgbpdf%2F89NM9Hi9fXifhpr8sr%2FlP848tn%2BTdXycX25zh4cdX%2FvHl6PmmPqnWQv9w8Ed%2B9GjYRim07bFEqdG%2BZVHuwTm65A7bVRrYtR5lAyMox7pigF6W4k%2By91mjspGsJ%2BwVae4%2BsvdnaO1p73HkXs%2FVisUDTGm7R8IcdnOROeq%2B19qT1amhA1VJtPenoTUgrtfKc9m7Rq8dP7nnjwOB7wg7ADdNt7VX64DWAWlKhPtmDEq22g4GF99x6Dk9E8OSsankHXqPNKDxC%2FdK7MLKTircTDgsI3mmj4OBdSq64dy7fd1x577RU1rt4cvMtOaulFYOd%2FLewRWvDO9lIgXFpZSnkZmjbv5SxKTPoQXClFbpsf%2Fhbbpzs0IB3vb8KkyzJQ%2BywOAgCrMpgRrz%2BKk4fvb7kFbR4XJCu0gAdtNO7woCwZTu%2BBUs9bam%2Fds71drVerpeisgrubLjAB4nnOSkWQnfr5W6o1ku5Xpr1MgrCbL0M0vUyDtfLLK15WiYp47xKWSLyjFVpwVmVJSLIoCjSOFkv3W7oKsVliwZJcB9nwXpZ5GEQQwY8jNKqKCBrgjTLeFxgdCIpazojDgnRtn43J6kG7nZ6cAbxh0EeFFk4%2B1u867cY5u4344n%2FxXjCqAjucdTHgLKojNKmSfO8KRsOFY%2FzKEYCKEJBzv90QA9nfm9gL%2BHulaFqUkz9ULUYxl62B3U%2FRVNLA8IhggaPycOoBuwOCESciDQVSSUgiOMsROB%2FhKfwCKOzEk%2B4k6rWd4uuT%2FwTDz7K7t3d3WLO8ISD95jSPQbayBacthbz86XVgxHwhex5zawzgDOmtp%2F3GPcXn0VXHdSS029%2Fj99UC%2FwJUvyKQ%2FzKyixIEVlYJOn4RxxuaH43Ty9fbJ5OObykHH435XAzJTHeOF4hhEUXD8URe%2FQ%2FBT%2BMpf8d5GN02Ox%2FfiGsl7TA7POu1xZ5%2BbTzcAVKMe48mqcC21hkacVEVScM26liVVBnrKkC4CLKyzAvHu0lhEaTKMFwI3a4SN9MsrfYzdBLq2vkwRD1gVviLT8kY9h2CHH6Y%2Bix6609weFtey4ESp60WtyeWMy%2BsmBuhsoKIyuoT%2Bq2R%2FrW5qi3g%2FvzS2j40DoixDP8%2BKP0yUdpXJ4l6Vla%2Bg9vce%2BC4yM5YlUcbA%2F0jLKdpmTwvsdN5z88nAIe08%2F0HgxeG1iv%2B6Hlhjh7uiW0SDzYNI92L401uha3JKYk268UVRzdOzNQvAaJqoXzAc80dAV440NZ1WVVAAMRYQ2KrGJFmDUsq8saWSnjvIj8t78y%2FRa3JRnbHVfyFpfwoDiGpPgjzekyUiKNlU3OMlwuLMmzgvEojllYVE2Z1HhImvsnk%2BuhusTEoB21PAtSFodeFK3iYhXEH9WOG2%2FkOE833sfeG%2Ff5cfHtEFNXgYes0%2FXj7aGivUgJ9XpusCtoNcNYVVnJVrrDo0OmJAutHCpuZul4W9lLcfy7BnuLPT02%2ByXsCTk%2B9zhzswIN04YueNSK%2BPtM0jS88QdLqSLJDTLsuGZJNolm2yO0PXh3UPnz9Ix5bfIAqxPjvETQsDCEiPG4QbqNyhBZISxybLnZYCrW5H3Axp690%2F0BJdXtDZ5ITuM4xj3f4oUHGzc5JeJmZKpp%2FjwKh4wMV%2FV1yx3emLoR0MwbG4K%2F%2BZgVep3PnzXGDHZ6a3i%2Fk%2BJrONDN13%2Bnq6tBTYk4o7cLGhBtqCC4KwacGHpEVuoH5JNro%2FE6JfE6d5RydbiR76k%2BW5wioDHBIjw1euhHjUGRB0y5A97KoaPx6MlL%2BwgboUVtUFRI%2FLemgTpdtF59ii7pab08kuPcfWzs0l%2FRI5takWnFpka0zOgWRtYcuf9aIxZMxlwr6IiGpsb6j2DQUXPl%2FimXI599Ev7fWjoPD78A', + 'only_matching': True, + }] + + def _real_extract(self, url): + item = self._parse_json(zlib.decompress(compat_b64decode( + compat_urllib_parse_unquote(self._match_id(url))), + -zlib.MAX_WBITS), None)['video']['items'][0] + return self._extract_video_info(item['mpxRefId'], 'cbsnews') + + class CBSNewsIE(CBSIE): IE_NAME = 'cbsnews' IE_DESC = 'CBS News' - _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)' + _VALID_URL = r'https?://(?:www\.)?cbsnews\.com/(?:news|video)/(?P<id>[\da-z_-]+)' _TESTS = [ { # 60 minutes 'url': 'http://www.cbsnews.com/news/artificial-intelligence-positioned-to-be-a-game-changer/', 'info_dict': { - 'id': '_B6Ga3VJrI4iQNKsir_cdFo9Re_YJHE_', - 'ext': 'mp4', - 'title': 'Artificial Intelligence', - 'description': 'md5:8818145f9974431e0fb58a1b8d69613c', + 'id': 'Y_nf_aEg6WwO9OLAq0MpKaPgfnBUxfW4', + 'ext': 'flv', + 'title': 'Artificial Intelligence, real-life applications', + 'description': 'md5:a7aaf27f1b4777244de8b0b442289304', 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 1606, + 'duration': 317, 'uploader': 'CBSI-NEW', - 'timestamp': 1498431900, - 'upload_date': '20170625', + 'timestamp': 1476046464, + 'upload_date': '20161009', }, 'params': { - # m3u8 download + # rtmp download 'skip_download': True, }, }, { - 'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', + 'url': 'https://www.cbsnews.com/video/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', 'info_dict': { 'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y', 'ext': 'mp4', @@ -60,37 +82,29 @@ class CBSNewsIE(CBSIE): # 48 hours 'url': 'http://www.cbsnews.com/news/maria-ridulph-murder-will-the-nations-oldest-cold-case-to-go-to-trial-ever-get-solved/', 'info_dict': { - 'id': 'QpM5BJjBVEAUFi7ydR9LusS69DPLqPJ1', - 'ext': 'mp4', 'title': 'Cold as Ice', - 'description': 'Can a childhood memory of a friend\'s murder solve a 1957 cold case? "48 Hours" correspondent Erin Moriarty has the latest.', - 'upload_date': '20170604', - 'timestamp': 1496538000, - 'uploader': 'CBSI-NEW', - }, - 'params': { - 'skip_download': True, + 'description': 'Can a childhood memory solve the 1957 murder of 7-year-old Maria Ridulph?', }, + 'playlist_mincount': 7, }, ] def _real_extract(self, url): - video_id = self._match_id(url) + display_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, display_id) - video_info = self._parse_json(self._html_search_regex( - r'(?:<ul class="media-list items" id="media-related-items"[^>]*><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'', - webpage, 'video JSON info', default='{}'), video_id, fatal=False) - - if video_info: - item = video_info['item'] if 'item' in video_info else video_info - else: - state = self._parse_json(self._search_regex( - r'data-cbsvideoui-options=(["\'])(?P<json>{.+?})\1', webpage, - 'playlist JSON info', group='json'), video_id)['state'] - item = state['playlist'][state['pid']] + entries = [] + for embed_url in re.findall(r'<iframe[^>]+data-src="(https?://(?:www\.)?cbsnews\.com/embed/video/[^#]*#[^"]+)"', webpage): + entries.append(self.url_result(embed_url, CBSNewsEmbedIE.ie_key())) + if entries: + return self.playlist_result( + entries, playlist_title=self._html_search_meta(['og:title', 'twitter:title'], webpage), + playlist_description=self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage)) + item = self._parse_json(self._html_search_regex( + r'CBSNEWS\.defaultPayload\s*=\s*({.+})', + webpage, 'video JSON info'), display_id)['items'][0] return self._extract_video_info(item['mpxRefId'], 'cbsnews') diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py index 734702144..36e6dff72 100644 --- a/youtube_dl/extractor/ccc.py +++ b/youtube_dl/extractor/ccc.py @@ -1,9 +1,12 @@ +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( int_or_none, parse_iso8601, + try_get, + url_or_none, ) @@ -18,11 +21,13 @@ class CCCIE(InfoExtractor): 'id': '1839', 'ext': 'mp4', 'title': 'Introduction to Processor Design', + 'creator': 'byterazor', 'description': 'md5:df55f6d073d4ceae55aae6f2fd98a0ac', 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20131228', 'timestamp': 1388188800, 'duration': 3710, + 'tags': list, } }, { 'url': 'https://media.ccc.de/v/32c3-7368-shopshifting#download', @@ -68,6 +73,7 @@ class CCCIE(InfoExtractor): 'id': event_id, 'display_id': display_id, 'title': event_data['title'], + 'creator': try_get(event_data, lambda x: ', '.join(x['persons'])), 'description': event_data.get('description'), 'thumbnail': event_data.get('thumb_url'), 'timestamp': parse_iso8601(event_data.get('date')), @@ -75,3 +81,31 @@ class CCCIE(InfoExtractor): 'tags': event_data.get('tags'), 'formats': formats, } + + +class CCCPlaylistIE(InfoExtractor): + IE_NAME = 'media.ccc.de:lists' + _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/c/(?P<id>[^/?#&]+)' + _TESTS = [{ + 'url': 'https://media.ccc.de/c/30c3', + 'info_dict': { + 'title': '30C3', + 'id': '30c3', + }, + 'playlist_count': 135, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url).lower() + + conf = self._download_json( + 'https://media.ccc.de/public/conferences/' + playlist_id, + playlist_id) + + entries = [] + for e in conf['events']: + event_url = url_or_none(e.get('frontend_link')) + if event_url: + entries.append(self.url_result(event_url, ie=CCCIE.ie_key())) + + return self.playlist_result(entries, playlist_id, conf.get('title')) diff --git a/youtube_dl/extractor/cinemax.py b/youtube_dl/extractor/cinemax.py new file mode 100644 index 000000000..7f89d33de --- /dev/null +++ b/youtube_dl/extractor/cinemax.py @@ -0,0 +1,29 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .hbo import HBOBaseIE + + +class CinemaxIE(HBOBaseIE): + _VALID_URL = r'https?://(?:www\.)?cinemax\.com/(?P<path>[^/]+/video/[0-9a-z-]+-(?P<id>\d+))' + _TESTS = [{ + 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903', + 'md5': '82e0734bba8aa7ef526c9dd00cf35a05', + 'info_dict': { + 'id': '20126903', + 'ext': 'mp4', + 'title': 'S1 Ep 1: Recap', + }, + 'expected_warnings': ['Unknown MIME type application/mp4 in DASH manifest'], + }, { + 'url': 'https://www.cinemax.com/warrior/video/s1-ep-1-recap-20126903.embed', + 'only_matching': True, + }] + + def _real_extract(self, url): + path, video_id = re.match(self._VALID_URL, url).groups() + info = self._extract_info('https://www.cinemax.com/%s.xml' % path, video_id) + info['id'] = video_id + return info diff --git a/youtube_dl/extractor/cloudflarestream.py b/youtube_dl/extractor/cloudflarestream.py index e6d92cca2..8ff2c6531 100644 --- a/youtube_dl/extractor/cloudflarestream.py +++ b/youtube_dl/extractor/cloudflarestream.py @@ -10,8 +10,8 @@ class CloudflareStreamIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - (?:watch\.)?cloudflarestream\.com/| - embed\.cloudflarestream\.com/embed/[^/]+\.js\?.*?\bvideo= + (?:watch\.)?(?:cloudflarestream\.com|videodelivery\.net)/| + embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo= ) (?P<id>[\da-f]+) ''' @@ -31,6 +31,9 @@ class CloudflareStreamIE(InfoExtractor): }, { 'url': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/manifest/video.mpd', 'only_matching': True, + }, { + 'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e', + 'only_matching': True, }] @staticmethod @@ -38,7 +41,7 @@ class CloudflareStreamIE(InfoExtractor): return [ mobj.group('url') for mobj in re.finditer( - r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.cloudflarestream\.com/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1', + r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1', webpage)] def _real_extract(self, url): diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 0889288f0..9c3e9eec6 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -67,6 +67,7 @@ from ..utils import ( sanitized_Request, sanitize_filename, str_or_none, + strip_or_none, unescapeHTML, unified_strdate, unified_timestamp, @@ -117,7 +118,7 @@ class InfoExtractor(object): unfragmented media) - URL of the MPD manifest or base URL representing the media if MPD manifest - is parsed froma string (in case of + is parsed from a string (in case of fragmented media) for MSS - URL of the ISM manifest. * manifest_url @@ -542,11 +543,11 @@ class InfoExtractor(object): raise ExtractorError('An extractor error has occurred.', cause=e) def __maybe_fake_ip_and_retry(self, countries): - if (not self._downloader.params.get('geo_bypass_country', None) and - self._GEO_BYPASS and - self._downloader.params.get('geo_bypass', True) and - not self._x_forwarded_for_ip and - countries): + if (not self._downloader.params.get('geo_bypass_country', None) + and self._GEO_BYPASS + and self._downloader.params.get('geo_bypass', True) + and not self._x_forwarded_for_ip + and countries): country_code = random.choice(countries) self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code) if self._x_forwarded_for_ip: @@ -682,8 +683,8 @@ class InfoExtractor(object): def __check_blocked(self, content): first_block = content[:512] - if ('<title>Access to this site is blocked' in content and - 'Websense' in first_block): + if ('Access to this site is blocked' in content + and 'Websense' in first_block): msg = 'Access to this webpage has been blocked by Websense filtering software in your network.' blocked_iframe = self._html_search_regex( r'