From 975ae5e30f1026dceafc44950fff3e2c8393118e Mon Sep 17 00:00:00 2001 From: Andy Savicki Date: Wed, 16 Nov 2016 00:40:22 +0300 Subject: [PATCH 1/6] [ruleporn] Add support for non Nuevo urls, add tests, strip title --- youtube_dl/extractor/ruleporn.py | 99 ++++++++++++++++++++++++++------ 1 file changed, 82 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/ruleporn.py b/youtube_dl/extractor/ruleporn.py index ebf9808d5..e6db29d25 100644 --- a/youtube_dl/extractor/ruleporn.py +++ b/youtube_dl/extractor/ruleporn.py @@ -1,11 +1,12 @@ from __future__ import unicode_literals +import re + from .nuevo import NuevoBaseIE - class RulePornIE(NuevoBaseIE): _VALID_URL = r'https?://(?:www\.)?ruleporn\.com/(?:[^/?#&]+/)*(?P[^/?#&]+)' - _TEST = { + _TESTS = [{ 'url': 'http://ruleporn.com/brunette-nympho-chick-takes-her-boyfriend-in-every-angle/', 'md5': '86861ebc624a1097c7c10eaf06d7d505', 'info_dict': { @@ -16,29 +17,93 @@ class RulePornIE(NuevoBaseIE): 'description': 'md5:6d28be231b981fff1981deaaa03a04d5', 'age_limit': 18, 'duration': 635.1, - } - } + }, + }, { + 'url': 'http://ruleporn.com/short-sweet-amateur-milf-sex-action/', + 'md5': '9ec215fe7ecc19323eba42d0f16af054', + 'info_dict': { + 'id': '777084', + 'display_id': 'short-sweet-amateur-milf-sex-action', + 'ext': 'mp4', + 'title': 'Short but sweet amateur MILF sex action', + 'description': 'md5:a20fabf0f267839dfcde0b56a418147f', + 'age_limit': 18, + 'duration': 182, + }, + }, { + 'url': 'http://ruleporn.com/horny-bruentte-teen-getting-penetrated-in-a-doggy/', + 'md5': '6f3eebefd27d1b9d28f1366d951aec56', + 'info_dict': { + 'id': '975925', + 'display_id': 'horny-bruentte-teen-getting-penetrated-in-a-doggy', + 'ext': 'mp4', + 'title': 'Horny Brunette Teen Getting Penetrated In A Doggy', + 'description': 'md5:2c22da523c47418e254343f9ca454758', + 'age_limit': 18, + 'duration': 112, + }, + }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - video_id = self._search_regex( - r'lovehomeporn\.com/embed/(\d+)', webpage, 'video id') - title = self._search_regex( - r']+title=(["\'])(?P.+?)\1', + r']+title=(["\'])\s?(?P.+?)\1', webpage, 'title', group='url') description = self._html_search_meta('description', webpage) - info = self._extract_nuevo( - 'http://lovehomeporn.com/media/nuevo/econfig.php?key=%s&rp=true' % video_id, - video_id) - info.update({ - 'display_id': display_id, - 'title': title, - 'description': description, - 'age_limit': 18 - }) + video_url = self._search_regex( + r'\s+[^\"\' ]+))', webpage, 'video url') + + mobj = re.search(r'lovehomeporn\.com/embed/(\d+)', video_url) + + if mobj: + + video_id = mobj.group(1) + info = self._extract_nuevo( + 'http://lovehomeporn.com/media/nuevo/econfig.php?key=%s&rp=true' % video_id, + video_id) + + info.update({ + 'display_id': display_id, + 'title': title, + 'description': description, + 'age_limit': 18 + }) + + else: + + video_page = self._download_webpage(video_url, display_id) + + js_str = self._search_regex( + r'\s+var\s+item\s+=\s+({[^}]+});', video_page, 'js_str') + + js_obj = self._parse_json(js_str, display_id) + + video_id = '%s' % js_obj.get("id") + duration = js_obj.get("video_duration") + thumbnail = js_obj.get("url_thumb") + + formats = [] + for element_name, format_id in (('url_mp4_lowres', 'ld'), ('url_mp4', 'sd'), ('url_orig', 'hd')): + video_url = js_obj.get(element_name) + formats.append({ + 'url': video_url, + 'format_id': format_id + }) + self._check_formats(formats, video_id) + + info = { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'age_limit': 18, + 'formats': formats + } + return info From b1a0ada1430494e0753bd12fba14ba3c63fb195b Mon Sep 17 00:00:00 2001 From: Andy Savicki Date: Wed, 16 Nov 2016 02:22:00 +0300 Subject: [PATCH 2/6] [funnyordie] Improve regex for bitrates extraction, add check formats for bitrate URLs --- youtube_dl/extractor/funnyordie.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 8c5ffc9e8..39553e2e5 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -51,10 +51,7 @@ class FunnyOrDieIE(InfoExtractor): formats = [] - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - - bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)[,/]', m3u8_url)] + bitrates = [int(bitrate) for bitrate in re.findall(r'v(\d+)[,/]', m3u8_url)] bitrates.sort() for bitrate in bitrates: @@ -65,6 +62,11 @@ class FunnyOrDieIE(InfoExtractor): 'vbr': bitrate, }) + self._check_formats(formats, video_id) + + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + subtitles = {} for src, src_lang in re.findall(r' Date: Thu, 17 Nov 2016 03:13:20 +0300 Subject: [PATCH 3/6] [ruleporn] Add requested changes for pull request #11207 --- youtube_dl/extractor/ruleporn.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/youtube_dl/extractor/ruleporn.py b/youtube_dl/extractor/ruleporn.py index e6db29d25..3195644e9 100644 --- a/youtube_dl/extractor/ruleporn.py +++ b/youtube_dl/extractor/ruleporn.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .nuevo import NuevoBaseIE +from ..utils import int_or_none class RulePornIE(NuevoBaseIE): _VALID_URL = r'https?://(?:www\.)?ruleporn\.com/(?:[^/?#&]+/)*(?P[^/?#&]+)' @@ -57,11 +58,11 @@ class RulePornIE(NuevoBaseIE): video_url = self._search_regex( r'\s+[^\"\' ]+))', webpage, 'video url') - mobj = re.search(r'lovehomeporn\.com/embed/(\d+)', video_url) + video_id = self._search_regex( + r'lovehomeporn\.com/embed/(\d+)', video_url, 'video_id', default=None) - if mobj: + if video_id: - video_id = mobj.group(1) info = self._extract_nuevo( 'http://lovehomeporn.com/media/nuevo/econfig.php?key=%s&rp=true' % video_id, video_id) @@ -78,13 +79,13 @@ class RulePornIE(NuevoBaseIE): video_page = self._download_webpage(video_url, display_id) js_str = self._search_regex( - r'\s+var\s+item\s+=\s+({[^}]+});', video_page, 'js_str') + r'var\s+item\s+=\s+({.+(?=};)});', video_page, 'js_str') js_obj = self._parse_json(js_str, display_id) - video_id = '%s' % js_obj.get("id") - duration = js_obj.get("video_duration") - thumbnail = js_obj.get("url_thumb") + video_id = '%s' % js_obj.get('id') + duration = int_or_none(js_obj.get('video_duration')) + thumbnail = js_obj.get('url_thumb') formats = [] for element_name, format_id in (('url_mp4_lowres', 'ld'), ('url_mp4', 'sd'), ('url_orig', 'hd')): From a80949fbfdc5334dd313ca3331610ebb4e6976bd Mon Sep 17 00:00:00 2001 From: Andy Savicki Date: Sat, 19 Nov 2016 20:06:10 +0300 Subject: [PATCH 4/6] [ruleporn] Undo related changes (funnyordie) for PR #11207 --- youtube_dl/extractor/funnyordie.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 39553e2e5..9611f9ac9 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -51,7 +51,10 @@ class FunnyOrDieIE(InfoExtractor): formats = [] - bitrates = [int(bitrate) for bitrate in re.findall(r'v(\d+)[,/]', m3u8_url)] + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) + + bitrates = [int(bitrate) for bitrate in re.findall(r'[,/]v(\d+)[,/]', m3u8_url)] bitrates.sort() for bitrate in bitrates: @@ -62,11 +65,6 @@ class FunnyOrDieIE(InfoExtractor): 'vbr': bitrate, }) - self._check_formats(formats, video_id) - - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - subtitles = {} for src, src_lang in re.findall(r' Date: Sat, 19 Nov 2016 20:14:32 +0300 Subject: [PATCH 5/6] Add new line at end of funnyordie.py --- youtube_dl/extractor/funnyordie.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 9611f9ac9..8c5ffc9e8 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -83,4 +83,4 @@ class FunnyOrDieIE(InfoExtractor): 'thumbnail': post.get('picture'), 'formats': formats, 'subtitles': subtitles, - } \ No newline at end of file + } From 86bd65df8d62b3a90a7757b0e58ffd1f4c7d4519 Mon Sep 17 00:00:00 2001 From: Andy Savicki Date: Tue, 22 Nov 2016 00:23:00 +0300 Subject: [PATCH 6/6] [ruleporn] fix flake8 warnings (except E501 - line too long) --- youtube_dl/extractor/ruleporn.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/youtube_dl/extractor/ruleporn.py b/youtube_dl/extractor/ruleporn.py index 3195644e9..64fb2694c 100644 --- a/youtube_dl/extractor/ruleporn.py +++ b/youtube_dl/extractor/ruleporn.py @@ -1,10 +1,9 @@ from __future__ import unicode_literals -import re - from .nuevo import NuevoBaseIE from ..utils import int_or_none + class RulePornIE(NuevoBaseIE): _VALID_URL = r'https?://(?:www\.)?ruleporn\.com/(?:[^/?#&]+/)*(?P[^/?#&]+)' _TESTS = [{ @@ -18,7 +17,7 @@ class RulePornIE(NuevoBaseIE): 'description': 'md5:6d28be231b981fff1981deaaa03a04d5', 'age_limit': 18, 'duration': 635.1, - }, + }, }, { 'url': 'http://ruleporn.com/short-sweet-amateur-milf-sex-action/', 'md5': '9ec215fe7ecc19323eba42d0f16af054', @@ -62,7 +61,6 @@ class RulePornIE(NuevoBaseIE): r'lovehomeporn\.com/embed/(\d+)', video_url, 'video_id', default=None) if video_id: - info = self._extract_nuevo( 'http://lovehomeporn.com/media/nuevo/econfig.php?key=%s&rp=true' % video_id, video_id) @@ -72,22 +70,21 @@ class RulePornIE(NuevoBaseIE): 'title': title, 'description': description, 'age_limit': 18 - }) + }) else: - video_page = self._download_webpage(video_url, display_id) js_str = self._search_regex( r'var\s+item\s+=\s+({.+(?=};)});', video_page, 'js_str') js_obj = self._parse_json(js_str, display_id) - + video_id = '%s' % js_obj.get('id') duration = int_or_none(js_obj.get('video_duration')) thumbnail = js_obj.get('url_thumb') - formats = [] + formats = [] for element_name, format_id in (('url_mp4_lowres', 'ld'), ('url_mp4', 'sd'), ('url_orig', 'hd')): video_url = js_obj.get(element_name) formats.append({ @@ -95,7 +92,7 @@ class RulePornIE(NuevoBaseIE): 'format_id': format_id }) self._check_formats(formats, video_id) - + info = { 'id': video_id, 'display_id': display_id,