From 1306767d3662cb2fc78c59ff8a4d8ee9eb130946 Mon Sep 17 00:00:00 2001 From: JChris246 <43832407+JChris246@users.noreply.github.com> Date: Sat, 2 Feb 2019 13:32:34 -0400 Subject: [PATCH 1/2] Delete vporn.py --- youtube_dl/extractor/vporn.py | 113 ---------------------------------- 1 file changed, 113 deletions(-) delete mode 100644 youtube_dl/extractor/vporn.py diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py deleted file mode 100644 index 9a11ebfc0..000000000 --- a/youtube_dl/extractor/vporn.py +++ /dev/null @@ -1,113 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - parse_duration, - parse_resolution, - str_to_int, -) - - -class VpornIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P[^/]+)/(?P\d+)' - _TESTS = [ - { - 'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/', - 'md5': 'facf37c1b86546fa0208058546842c55', - 'info_dict': { - 'id': '497944', - 'display_id': 'violet-on-her-th-birthday', - 'ext': 'mp4', - 'title': 'Violet on her 19th birthday', - 'description': 'Violet dances in front of the camera which is sure to get you horny.', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'kileyGrope', - 'categories': ['Masturbation', 'Teen'], - 'duration': 393, - 'age_limit': 18, - 'view_count': int, - }, - 'skip': 'video removed', - }, - { - 'url': 'http://www.vporn.com/female/hana-shower/523564/', - 'md5': 'ced35a4656198a1664cf2cda1575a25f', - 'info_dict': { - 'id': '523564', - 'display_id': 'hana-shower', - 'ext': 'mp4', - 'title': 'Hana Shower', - 'description': 'Hana showers at the bathroom.', - 'thumbnail': r're:^https?://.*\.jpg$', - 'uploader': 'Hmmmmm', - 'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female', '720p'], - 'duration': 588, - 'age_limit': 18, - 'view_count': int, - } - }, - ] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - display_id = mobj.group('display_id') - - webpage = self._download_webpage(url, display_id) - - errmsg = 'This video has been deleted due to Copyright Infringement or by the account owner!' - if errmsg in webpage: - raise ExtractorError('%s said: %s' % (self.IE_NAME, errmsg), expected=True) - - title = self._html_search_regex( - r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip() - - description = self._search_regex(r'[^>]*class="(?:sidebar-box)"[^>]*>[\n]

(.*?)

', - webpage, 'description', fatal=False) - - thumbnail = self._search_regex(r']+poster="([^"])"', webpage, 'thumbnail', default=None) or self._search_regex(r'posterurl\s=\s\'([^\']+)', webpage, 'thumbnail', fatal=False) - - uploader = self._search_regex(r'class="avatarname">(.*?)', - webpage, 'uploader', fatal=False) - - categories = re.findall(r']*class="tags links"[^>]*>([^<]+)', webpage) - - duration = parse_duration(self._search_regex( - r'class="durat-img"[^>]*>\s*(\d+ min \d+ sec)', - webpage, 'duration', fatal=False)) - - view_count = str_to_int(self._search_regex( - r'class="view-count">[\n]([\d,\.]+) [Vv]iews[\n]<', - webpage, 'view count', fatal=False)) - - comment_count = str_to_int(self._html_search_regex( - r"'Comments \(([\d,\.]+)\)'", - webpage, 'comment count', default=None)) - - formats = [] - for mobj in re.finditer(r']+src="([^"]+)"[^>]+label="([^"]+)[^>]*>', webpage): - f = parse_resolution(mobj.group(2)) - f.update({ - 'url': mobj.group(1), - 'format_id': mobj.group(2), - }) - formats.append(f) - self._sort_formats(formats) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'categories': categories, - 'duration': duration, - 'view_count': view_count, - 'comment_count': comment_count, - 'age_limit': 18, - 'formats': formats, - } From ba6f2017240c977ce6dfcc5856516299cb977947 Mon Sep 17 00:00:00 2001 From: JChris246 <43832407+JChris246@users.noreply.github.com> Date: Sat, 2 Feb 2019 13:33:13 -0400 Subject: [PATCH 2/2] fixed tag parsing --- youtube_dl/extractor/pornhub.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index be93d5d48..1804ba15d 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -307,10 +307,13 @@ class PornHubIE(PornHubBaseIE): r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P{[^}]+})', webpage, 'page parameters', group='data', default='{}'), video_id, transform_source=js_to_json, fatal=False) - tags = categories = None + tags = None if page_params: tags = page_params.get('tags', '').split(',') - categories = page_params.get('categories', '').split(',') + + categories = [] + for mobj in re.finditer(r']+Category[^>]*>([^<]+)', webpage): + categories.append(mobj.group(1)) return { 'id': video_id,