From d472ea41924544e32c45720952f98739e8a8b2bf Mon Sep 17 00:00:00 2001 From: JChris246 <43832407+JChris246@users.noreply.github.com> Date: Sat, 2 Feb 2019 13:13:48 -0400 Subject: [PATCH] Fixed extractor --- youtube_dl/extractor/vporn.py | 48 ++++++++++++++--------------------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/vporn.py b/youtube_dl/extractor/vporn.py index 858ac9e71..9a11ebfc0 100644 --- a/youtube_dl/extractor/vporn.py +++ b/youtube_dl/extractor/vporn.py @@ -6,8 +6,8 @@ from .common import InfoExtractor from ..utils import ( ExtractorError, parse_duration, + parse_resolution, str_to_int, - urljoin, ) @@ -64,47 +64,37 @@ class VpornIE(InfoExtractor): title = self._html_search_regex( r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip() - description = self._html_search_regex( - r'class="(?:descr|description_txt)">(.*?)', - webpage, 'description', fatal=False) - thumbnail = urljoin('http://www.vporn.com', self._html_search_regex( - r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', - default=None)) - uploader = self._html_search_regex( - r'(?s)Uploaded by:.*?]*>(.+?)', - webpage, 'uploader', fatal=False) + description = self._search_regex(r'[^>]*class="(?:sidebar-box)"[^>]*>[\n]

(.*?)

', + webpage, 'description', fatal=False) - categories = re.findall(r']*>([^<]+)', webpage) + thumbnail = self._search_regex(r']+poster="([^"])"', webpage, 'thumbnail', default=None) or self._search_regex(r'posterurl\s=\s\'([^\']+)', webpage, 'thumbnail', fatal=False) + + uploader = self._search_regex(r'class="avatarname">(.*?)', + webpage, 'uploader', fatal=False) + + categories = re.findall(r']*class="tags links"[^>]*>([^<]+)', webpage) duration = parse_duration(self._search_regex( - r'Runtime:\s*\s*(\d+ min \d+ sec)', + r'class="durat-img"[^>]*>\s*(\d+ min \d+ sec)', webpage, 'duration', fatal=False)) view_count = str_to_int(self._search_regex( - r'class="views">([\d,\.]+) [Vv]iews<', + r'class="view-count">[\n]([\d,\.]+) [Vv]iews[\n]<', webpage, 'view count', fatal=False)) + comment_count = str_to_int(self._html_search_regex( r"'Comments \(([\d,\.]+)\)'", webpage, 'comment count', default=None)) formats = [] - - for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage): - video_url = video[1] - fmt = { - 'url': video_url, - 'format_id': video[0], - } - m = re.search(r'_(?P\d+)x(?P\d+)_(?P\d+)k\.mp4$', video_url) - if m: - fmt.update({ - 'width': int(m.group('width')), - 'height': int(m.group('height')), - 'vbr': int(m.group('vbr')), - }) - formats.append(fmt) - + for mobj in re.finditer(r']+src="([^"]+)"[^>]+label="([^"]+)[^>]*>', webpage): + f = parse_resolution(mobj.group(2)) + f.update({ + 'url': mobj.group(1), + 'format_id': mobj.group(2), + }) + formats.append(f) self._sort_formats(formats) return {