From 5fb6b05b3299b705ae8177c53309af0ee8cd4491 Mon Sep 17 00:00:00 2001 From: Kasper Dilday Date: Wed, 20 Mar 2019 23:17:19 +0100 Subject: [PATCH 1/2] [youporn] fixed upload_date and commment_count (as noted by tests) --- youtube_dl/extractor/youporn.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index d4eccb4b2..743fa2cda 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -145,9 +145,8 @@ class YouPornIE(InfoExtractor): r'(?s)]+class=["\']submitByLink["\'][^>]*>(.+?)', webpage, 'uploader', fatal=False) upload_date = unified_strdate(self._html_search_regex( - [r'Date\s+[Aa]dded:\s*([^<]+)', - r'(?s)]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)'], - webpage, 'upload date', fatal=False)) + r']+class=["\']video-uploaded["\'][^>]*>[^<]+([^<]+)', + webpage, 'upload date', fatal=False)) age_limit = self._rta_search(webpage) @@ -158,9 +157,7 @@ class YouPornIE(InfoExtractor): view_count = str_to_int(self._search_regex( r'(?s)]+class=(["\']).*?\bvideoInfoViews\b.*?\1[^>]*>.*?(?P[\d,.]+)<', webpage, 'view count', fatal=False, group='count')) - comment_count = str_to_int(self._search_regex( - r'>All [Cc]omments? \(([\d,.]+)\)', - webpage, 'comment count', fatal=False)) + comment_count = len(re.findall(r']+class=([\"\']).*?videoComment\b.*?\1', webpage)) def extract_tag_box(regex, title): tag_box = self._search_regex(regex, webpage, title, default=None) From 558d03f5bc8f07204a040c69cc44b4ea14ffa4e8 Mon Sep 17 00:00:00 2001 From: Kasper Dilday Date: Wed, 20 Mar 2019 20:01:03 +0100 Subject: [PATCH 2/2] [youporn] fix url metadata detection (width and bits) to allow best video selection --- youtube_dl/extractor/youporn.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py index 743fa2cda..3d06b4ed0 100644 --- a/youtube_dl/extractor/youporn.py +++ b/youtube_dl/extractor/youporn.py @@ -57,6 +57,20 @@ class YouPornIE(InfoExtractor): 'params': { 'skip_download': True, }, + }, { + # Different URL (videoUrl) structure, has file extension in path + 'url': 'https://www.youporn.com/watch/13922959/femdom-principal/', + 'info_dict': { + 'id': '13822959', + 'display_id': 'femdom-principal', + 'ext': 'mp4', + 'format': '720p-4000k - 720p', + 'height': 720, + 'tbr': 4000 + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): @@ -119,8 +133,9 @@ class YouPornIE(InfoExtractor): # Video URL's path looks like this: # /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4 # /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4 + # /videos/201703/11/109285532/720P_4000K_109285532.mp4?rate=248k&burst=1400k&validfrom=1553107800&validto=1553122200&hash=NzBS4CUWB2RpgA9thDRS0Ouw5PM%3D # We will benefit from it by extracting some metadata - mobj = re.search(r'(?P\d{3,4})[pP]_(?P\d+)[kK]_\d+/', video_url) + mobj = re.search(r'(?P\d{3,4})[pP]_(?P\d+)[kK]_\d+', video_url) if mobj: height = int(mobj.group('height')) bitrate = int(mobj.group('bitrate'))