mirror of
https://github.com/l1ving/youtube-dl
synced 2020-11-18 19:53:54 -08:00
Split Thumbzilla out into it's own extractor.
This simplifies the pornhub extractor and makes it easier to maintain in the future.
This commit is contained in:
parent
d332ec725d
commit
87f50e3feb
@ -1129,6 +1129,7 @@ from .thisamericanlife import ThisAmericanLifeIE
|
|||||||
from .thisav import ThisAVIE
|
from .thisav import ThisAVIE
|
||||||
from .thisoldhouse import ThisOldHouseIE
|
from .thisoldhouse import ThisOldHouseIE
|
||||||
from .threeqsdn import ThreeQSDNIE
|
from .threeqsdn import ThreeQSDNIE
|
||||||
|
from .thumbzilla import ThumbzillaIE
|
||||||
from .tiktok import (
|
from .tiktok import (
|
||||||
TikTokIE,
|
TikTokIE,
|
||||||
TikTokUserIE,
|
TikTokUserIE,
|
||||||
|
@ -51,10 +51,7 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
IE_DESC = 'PornHub and Thumbzilla'
|
IE_DESC = 'PornHub and Thumbzilla'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)
|
||||||
(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
|
||||||
(?:www\.)?thumbzilla\.com/video/
|
|
||||||
)
|
|
||||||
(?P<id>[\da-z]+)
|
(?P<id>[\da-z]+)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
@ -140,9 +137,6 @@ class PornHubIE(PornHubBaseIE):
|
|||||||
# private video
|
# private video
|
||||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
|
'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
|
||||||
'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
|
'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
61
youtube_dl/extractor/thumbzilla.py
Normal file
61
youtube_dl/extractor/thumbzilla.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_request
|
||||||
|
from .openload import PhantomJSwrapper
|
||||||
|
from .pornhub import PornHubIE
|
||||||
|
from ..utils import ExtractorError
|
||||||
|
|
||||||
|
|
||||||
|
class ThumbzillaIE(InfoExtractor):
|
||||||
|
"""
|
||||||
|
ThumbzillaIE is a frontend for other 'Tube' sites (mostly PornHub). ThumbzillaIE will
|
||||||
|
parse the video and delegate to the appropriate extractor via a url_result.
|
||||||
|
"""
|
||||||
|
IE_NAME = 'thumbzilla'
|
||||||
|
IE_DESC = 'Thumbzilla'
|
||||||
|
_VALID_URL = r'https?://(?P<host>(?:www\.)?thumbzilla\.com)/video/(?P<id>[\da-z]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.thumbzilla.com/video/ph5c8e8f15b40ff/hot-skinny-girl-gives-you',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ph5c8e8f15b40ff',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'upload_date': '20190317',
|
||||||
|
'age_limit': 18,
|
||||||
|
'uploader': 'lizashultz',
|
||||||
|
'title': 'Hot skinny girl gives you.',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _download_webpage_handle(self, *args, **kwargs):
|
||||||
|
def dl(*args, **kwargs):
|
||||||
|
return super(ThumbzillaIE, self)._download_webpage_handle(*args, **kwargs)
|
||||||
|
|
||||||
|
webpage, urlh = dl(*args, **kwargs)
|
||||||
|
|
||||||
|
if any(re.search(p, webpage) for p in (
|
||||||
|
r'<body\b[^>]+\bonload=["\']go\(\)',
|
||||||
|
r'document\.cookie\s*=\s*["\']RNKEY=',
|
||||||
|
r'document\.location\.reload\(true\)')):
|
||||||
|
url_or_request = args[0]
|
||||||
|
url = (url_or_request.get_full_url()
|
||||||
|
if isinstance(url_or_request, compat_urllib_request.Request)
|
||||||
|
else url_or_request)
|
||||||
|
phantom = PhantomJSwrapper(self, required_version='2.0')
|
||||||
|
phantom.get(url, html=webpage)
|
||||||
|
webpage, urlh = dl(*args, **kwargs)
|
||||||
|
|
||||||
|
return webpage, urlh
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
host, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
|
if video_id.startswith('ph'):
|
||||||
|
return self.url_result('https://pornhub.com/view_video.php?viewkey=%s' % video_id,
|
||||||
|
video_id=video_id, ie=PornHubIE.ie_key())
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unsupported video')
|
Loading…
x
Reference in New Issue
Block a user