From bdc660277db25aeb97302bce8d7c163713286039 Mon Sep 17 00:00:00 2001 From: Yurifag Date: Sun, 21 Feb 2016 17:22:01 +0100 Subject: [PATCH] [powerwatch] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/powerwatch.py | 63 ++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 youtube_dl/extractor/powerwatch.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1ae606f1e..5b3cc84ca 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -560,6 +560,7 @@ from .pornhub import ( from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE +from .powerwatch import PowerwatchIE from .primesharetv import PrimeShareTVIE from .promptfile import PromptFileIE from .prosiebensat1 import ProSiebenSat1IE diff --git a/youtube_dl/extractor/powerwatch.py b/youtube_dl/extractor/powerwatch.py new file mode 100644 index 000000000..f1329f64a --- /dev/null +++ b/youtube_dl/extractor/powerwatch.py @@ -0,0 +1,63 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_urllib_parse +from ..utils import ( + ExtractorError, + sanitized_Request, +) + +class PowerwatchIE(InfoExtractor): + _VALID_URL = r'http://powerwatch\.pw/(?P\w+)' + + _TEST = { + 'url': 'http://powerwatch.pw/duecjibvicbu', + 'md5': 'bf7965f70675be5e1a1749be3b8d20ba', + 'info_dict': { + 'id': 'duecjibvicbu', + 'ext': 'mp4', + 'title': 'Big Buck Bunny trailer', + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + if '>File Not Found<' in webpage: + raise ExtractorError('Video %s was not found' % video_id, expected=True) + + self._sleep(5, video_id) + + download_form = self._hidden_inputs(webpage) + request = sanitized_Request(url, compat_urllib_parse.urlencode(download_form)) + request.add_header('Content-Type', 'application/x-www-form-urlencoded') + + video_page = self._download_webpage(request, video_id, 'Downloading video page') + + self.report_extraction(video_id) + + title = self._html_search_regex( + r'h4-fine[^>]*>([^<]+)<', video_page, 'title') + thumbnail = self._search_regex( + r'image:\s*"([^"]+)"', video_page, 'thumbnail URL', fatal=False) + video_urls = list(re.findall( + r'file:\s*"([^"]+)"', video_page)) + + formats = [] + for video_url in video_urls: + formats.append({ + 'url': video_url, + 'ext': 'mp4', + }) + + self._sort_formats(formats) + + return { + 'formats': formats, + 'id': video_id, + 'title': title, + 'thumbnail': thumbnail, + }