From 4249f66ce6f8a11f8917606d713c54ee2ad4e49d Mon Sep 17 00:00:00 2001 From: Alexander Kirk Date: Fri, 15 May 2015 18:16:10 +0200 Subject: [PATCH] [voxnow] branch out voxnow support from rtlnow --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/rtlnow.py | 15 ------ youtube_dl/extractor/voxnow.py | 84 ++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 15 deletions(-) create mode 100644 youtube_dl/extractor/voxnow.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8ec0c1032..9c0a579fa 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -643,6 +643,7 @@ from .vk import ( ) from .vodlocker import VodlockerIE from .voicerepublic import VoiceRepublicIE +from .voxnow import VOXnowIE from .vporn import VpornIE from .vrt import VRTIE from .vube import VubeIE diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index 785a8045e..f3859f408 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -20,7 +20,6 @@ class RTLnowIE(InfoExtractor): (?P rtl-now\.rtl\.de| rtl2now\.rtl2\.de| - (?:www\.)?voxnow\.de| (?:www\.)?rtlnitronow\.de| (?:www\.)?superrtlnow\.de| (?:www\.)?n-tvnow\.de) @@ -61,20 +60,6 @@ class RTLnowIE(InfoExtractor): }, 'skip': 'Only works from Germany', }, - { - 'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', - 'info_dict': { - 'id': '13883', - 'ext': 'flv', - 'title': 'Voxtours - Südafrika-Reporter II', - 'description': 'md5:de7f8d56be6fd4fed10f10f57786db00', - 'upload_date': '20090627', - 'duration': 1800, - }, - 'params': { - 'skip_download': True, - }, - }, { 'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1', 'info_dict': { diff --git a/youtube_dl/extractor/voxnow.py b/youtube_dl/extractor/voxnow.py new file mode 100644 index 000000000..015a9c4ef --- /dev/null +++ b/youtube_dl/extractor/voxnow.py @@ -0,0 +1,84 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + clean_html, + unified_strdate, + int_or_none, +) + + +class VOXnowIE(InfoExtractor): + """Information Extractor for VOX NOW""" + _VALID_URL = r'''(?x) + (?:https?://)? + (?P + (?P + (?:www\.)?voxnow\.de + ) + /+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\? + (?:container_id|film_id)=(?P[0-9]+)& + player=1(?:&season=[0-9]+)?(?:&.*)? + )''' + + _TEST = { + 'url': 'http://www.voxnow.de/der-hundeprofi/bulldogge-bruno-schaeferhuendin-mona.php?container_id=136867&player=1&season=6', + 'info_dict': { + 'id': '136867', + 'ext': 'mp4', + 'title': "Der Hundeprofi - Bulldogge 'Bruno' / Schäferhündin 'Mona'", + 'description': 'md5:eb5b500f3e97c476614a0c1989841060', + 'upload_date': '20150509', + 'duration': 3077, + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_page_url = 'http://%s/' % mobj.group('domain') + video_id = mobj.group('video_id') + + webpage = self._download_webpage('http://' + mobj.group('url'), video_id) + + title = self._og_search_title(webpage) + description = self._og_search_description(webpage) + thumbnail = self._og_search_thumbnail(webpage, default=None) + + upload_date = unified_strdate(self._html_search_meta('uploadDate', webpage, 'upload date')) + + mobj = re.search(r'', webpage) + duration = int(mobj.group('seconds')) if mobj else None + + playerdata_url = self._html_search_regex( + r"'playerdata': '(?P[^']+)'", webpage, 'playerdata_url') + + playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML') + + filename = playerdata.find('./playlist/videoinfo/filename').text + manifest = self._download_xml(filename, video_id, 'Downloading manifest') + + formats = [] + for media in manifest.findall('{http://ns.adobe.com/f4m/2.0}media'): + fmt = { + 'url': media.attrib['href'].replace('hds', 'hls').replace('f4m', 'm3u8'), + 'ext': 'mp4', + 'format_id': 'hls', + } + formats.append(fmt) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'duration': duration, + 'formats': formats, + }