From 041bad8508aa06a0784dc26608afeae8c51d0646 Mon Sep 17 00:00:00 2001 From: jonathanjones6fe Date: Sun, 3 Feb 2019 13:39:44 -0400 Subject: [PATCH 1/2] [pornhd] added like count to info dict --- youtube_dl/extractor/pornhd.py | 4 ++++ youtube_dl/extractor/pornhub.py | 22 ++++++++++------------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py index b52879c7a..534f06277 100644 --- a/youtube_dl/extractor/pornhd.py +++ b/youtube_dl/extractor/pornhd.py @@ -85,6 +85,9 @@ class PornHdIE(InfoExtractor): r"poster'?\s*:\s*([\"'])(?P(?:(?!\1).)+)\1", webpage, 'thumbnail', fatal=False, group='url') + like_count = int_or_none(self._search_regex( + r'class="save-count">(\d+)<', webpage, 'like_count', fatal=False)) + return { 'id': video_id, 'display_id': display_id, @@ -94,4 +97,5 @@ class PornHdIE(InfoExtractor): 'view_count': view_count, 'formats': formats, 'age_limit': 18, + 'like_count': like_count, } diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 428324ef0..27b938ed2 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -16,6 +16,7 @@ from .openload import PhantomJSwrapper from ..utils import ( ExtractorError, int_or_none, + js_to_json, orderedSet, remove_quotes, str_to_int, @@ -302,17 +303,14 @@ class PornHubIE(PornHubBaseIE): comment_count = self._extract_count( r'All Comments\s*\(([\d,.]+)\)', webpage, 'comment') - def _get_items(class_name): - div = self._search_regex( - r'
([\S\s]+?)
', - webpage, class_name, default=None) - if div: - return [a for a in re.findall(r']+>([^<]+)', div)] - else: - return None - - categories = _get_items('categoriesWrapper') - tags = _get_items('tagsWrapper') + page_params = self._parse_json(self._search_regex( + r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P{[^}]+})', + webpage, 'page parameters', group='data', default='{}'), + video_id, transform_source=js_to_json, fatal=False) + tags = categories = None + if page_params: + tags = page_params.get('tags', '').split(',') + categories = page_params.get('categories', '').split(',') return { 'id': video_id, @@ -448,4 +446,4 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE): break entries.extend(page_entries) - return self.playlist_result(entries, user_id) +return self.playlist_result(entries, user_id) From cddea1467a1b9f24e4294a4b8f1989873380ce1e Mon Sep 17 00:00:00 2001 From: jonathanjones6fe Date: Sun, 3 Feb 2019 13:42:36 -0400 Subject: [PATCH 2/2] . --- youtube_dl/extractor/pornhub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 27b938ed2..b9cc459d4 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -446,4 +446,4 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE): break entries.extend(page_entries) -return self.playlist_result(entries, user_id) + return self.playlist_result(entries, user_id)