From 659660f4784bbe7dff394d8930519b85f70f056f Mon Sep 17 00:00:00 2001 From: Ashley Harvey Date: Tue, 26 Nov 2019 13:54:58 -0800 Subject: [PATCH] [VertiPorn] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/vertiporn.py | 41 ++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 youtube_dl/extractor/vertiporn.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index cf4bb8f20..c7432d6d0 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1265,6 +1265,7 @@ from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veehd import VeeHDIE from .veoh import VeohIE +from .vertiporn import VertiPornIE from .vesti import VestiIE from .vevo import ( VevoIE, diff --git a/youtube_dl/extractor/vertiporn.py b/youtube_dl/extractor/vertiporn.py new file mode 100644 index 000000000..cc7eabc87 --- /dev/null +++ b/youtube_dl/extractor/vertiporn.py @@ -0,0 +1,41 @@ +# coding: utf-8 +from __future__ import unicode_literals +from urlparse import urlparse +from .common import InfoExtractor + + +class VertiPornIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?vertiporn\.com/video/(?P[0-9]+)/(?P)' + _TEST = { + 'url': 'https://www.vertiporn.com/video/83/blowjob-teen-pov', + 'md5': '3c154a5183f3f04b516e20600ff5337c', + 'info_dict': { + 'id': '83', + 'ext': 'mp4', + 'title': 'Blowjob Teen POV - VertiPorn.com', + # 'thumbnail': r're:^https?://.*\.jpg$', + # TODO more properties, either as: + # * A value + # * MD5 checksum; start the string with md5: + # * A regular expression; start the string with re: + # * Any Python type (for example int or float) + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'<title>(.+?)', webpage, 'title') + video_string = self._html_search_regex(r'.*', webpage, 'video') + video_url = urlparse(url).scheme + "://" + urlparse(url).netloc + video_string.strip('..') + + return { + 'id': video_id, + 'title': title, + 'ext': 'mp4', + 'url': video_url, + + # 'description': self._og_search_description(webpage), + # 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), + # TODO more properties (see youtube_dl/extractor/common.py) + }