1
0
mirror of https://github.com/l1ving/youtube-dl synced 2020-11-18 19:53:54 -08:00

Split Thumbzilla out into it's own extractor.

This simplifies the pornhub extractor and makes it easier to maintain in
the future.
This commit is contained in:
Tristan Waddington 2020-03-07 15:20:41 -08:00
parent d332ec725d
commit 87f50e3feb
No known key found for this signature in database
GPG Key ID: 19AEB0C1E96BC8EB
3 changed files with 63 additions and 7 deletions

View File

@ -1129,6 +1129,7 @@ from .thisamericanlife import ThisAmericanLifeIE
from .thisav import ThisAVIE from .thisav import ThisAVIE
from .thisoldhouse import ThisOldHouseIE from .thisoldhouse import ThisOldHouseIE
from .threeqsdn import ThreeQSDNIE from .threeqsdn import ThreeQSDNIE
from .thumbzilla import ThumbzillaIE
from .tiktok import ( from .tiktok import (
TikTokIE, TikTokIE,
TikTokUserIE, TikTokUserIE,

View File

@ -51,10 +51,7 @@ class PornHubIE(PornHubBaseIE):
IE_DESC = 'PornHub and Thumbzilla' IE_DESC = 'PornHub and Thumbzilla'
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)
(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
(?P<id>[\da-z]+) (?P<id>[\da-z]+)
''' '''
_TESTS = [{ _TESTS = [{
@ -140,9 +137,6 @@ class PornHubIE(PornHubBaseIE):
# private video # private video
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7', 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
'only_matching': True,
}, { }, {
'url': 'http://www.pornhub.com/video/show?viewkey=648719015', 'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
'only_matching': True, 'only_matching': True,

View File

@ -0,0 +1,61 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urllib_request
from .openload import PhantomJSwrapper
from .pornhub import PornHubIE
from ..utils import ExtractorError
class ThumbzillaIE(InfoExtractor):
"""
ThumbzillaIE is a frontend for other 'Tube' sites (mostly PornHub). ThumbzillaIE will
parse the video and delegate to the appropriate extractor via a url_result.
"""
IE_NAME = 'thumbzilla'
IE_DESC = 'Thumbzilla'
_VALID_URL = r'https?://(?P<host>(?:www\.)?thumbzilla\.com)/video/(?P<id>[\da-z]+)'
_TEST = {
'url': 'https://www.thumbzilla.com/video/ph5c8e8f15b40ff/hot-skinny-girl-gives-you',
'info_dict': {
'id': 'ph5c8e8f15b40ff',
'ext': 'mp4',
'upload_date': '20190317',
'age_limit': 18,
'uploader': 'lizashultz',
'title': 'Hot skinny girl gives you.',
}
}
def _download_webpage_handle(self, *args, **kwargs):
def dl(*args, **kwargs):
return super(ThumbzillaIE, self)._download_webpage_handle(*args, **kwargs)
webpage, urlh = dl(*args, **kwargs)
if any(re.search(p, webpage) for p in (
r'<body\b[^>]+\bonload=["\']go\(\)',
r'document\.cookie\s*=\s*["\']RNKEY=',
r'document\.location\.reload\(true\)')):
url_or_request = args[0]
url = (url_or_request.get_full_url()
if isinstance(url_or_request, compat_urllib_request.Request)
else url_or_request)
phantom = PhantomJSwrapper(self, required_version='2.0')
phantom.get(url, html=webpage)
webpage, urlh = dl(*args, **kwargs)
return webpage, urlh
def _real_extract(self, url):
host, video_id = re.match(self._VALID_URL, url).groups()
if video_id.startswith('ph'):
return self.url_result('https://pornhub.com/view_video.php?viewkey=%s' % video_id,
video_id=video_id, ie=PornHubIE.ie_key())
else:
raise ExtractorError('Unsupported video')