From 358d69a4c43ee6f285bcd299f2ee439ca36e7182 Mon Sep 17 00:00:00 2001 From: ddmgy Date: Fri, 16 Mar 2018 03:12:18 -0400 Subject: [PATCH] [hentaihaven] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/hentaihaven.py | 66 +++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 youtube_dl/extractor/hentaihaven.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3bde40eb3..50859803c 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -430,6 +430,7 @@ from .hearthisat import HearThisAtIE from .heise import HeiseIE from .hellporno import HellPornoIE from .helsinki import HelsinkiIE +from .hentaihaven import HentaiHavenIE from .hentaistigma import HentaiStigmaIE from .hgtv import HGTVComShowIE from .hidive import HiDiveIE diff --git a/youtube_dl/extractor/hentaihaven.py b/youtube_dl/extractor/hentaihaven.py new file mode 100644 index 000000000..7b6e98b07 --- /dev/null +++ b/youtube_dl/extractor/hentaihaven.py @@ -0,0 +1,66 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import int_or_none + +class HentaiHavenIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?hentaihaven\.org/(?P[^/]+)' + _TEST = { + 'url': 'http://hentaihaven.org/oideyo-shiritsu-yarimari-gakuen-episode-1-hd', + 'md5': 'd9e1ed0558c0c29267743c463c34e71b', + 'info_dict': { + 'id': 'oideyo-shiritsu-yarimari-gakuen-episode-1-hd', + 'title': 'Oideyo! Shiritsu Yarimari Gakuen – Episode 1', + 'ext': 'mp4', + 'age_limit': 18, + 'formats': [ + { + 'url': 'http://hh.cx/files/34/[HH] Oideyo! Shiritsu Yarimari Gakuen - Episode 1 [DVD] [3FFC5997].mp4', + 'ext': 'mp4', + 'height': 720, + }, + { + 'url': 'http://hh.cx/files/04/[HH] Oideyo! Shiritsu Yarimari Gakuen - Episode 1 [SD].mp4', + 'ext': 'mp4', + 'height': 480, + }, + { + 'url': 'http://hh.cx/files/1a/[HH]direct_PID48840[SD].mp4', + 'ext': 'mp4', + 'height': 360, + }, + ] + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex( + r']+class=(["\'])*entry-title\1*[^>]*>(?P[^<]+)', + webpage, 'title', group='title') + + btn_re = re.compile(r'(<a[^>]+class="btn"*[^>]+>)') + res_re = re.compile(r'<span[^>]+class=["\']*text-white["\']*[^>]*>([^<]+)') + + formats = [] + for (btn, res) in zip(btn_re.findall(webpage), res_re.findall(webpage)): + format_info = {} + format_info['url'] = self._html_search_regex( + r'<a[^>]+href=["\']([^"\']+)["\'][^>]+>', + btn, 'href') + format_info['ext'] = 'mp4' + format_info['height'] = int_or_none(res[:-1]) + formats.append(format_info) + + return { + 'id': video_id, + 'title': title, + 'age_limit': 18, + 'formats': formats, + } \ No newline at end of file