1
0
mirror of https://github.com/l1ving/youtube-dl synced 2020-11-18 19:53:54 -08:00

[slutload] Fix extraction for desktop and mobile site

Did a separate extractor for the mobile site,
because its structure differs from the desktop site.
This commit is contained in:
Gabor Miklos 2018-07-18 12:33:06 +03:00
parent 79367a9820
commit dbd71aa288
2 changed files with 54 additions and 18 deletions

View File

@ -988,7 +988,10 @@ from .skynewsarabia import (
from .skysports import SkySportsIE from .skysports import SkySportsIE
from .slideshare import SlideshareIE from .slideshare import SlideshareIE
from .slideslive import SlidesLiveIE from .slideslive import SlidesLiveIE
from .slutload import SlutloadIE from .slutload import (
SlutloadIE,
SlutloadMobileIE
)
from .smotri import ( from .smotri import (
SmotriIE, SmotriIE,
SmotriCommunityIE, SmotriCommunityIE,

View File

@ -1,13 +1,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
class SlutloadIE(InfoExtractor): class SlutloadIE(InfoExtractor):
_VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$' _VALID_URL = r'^https?://(?:www\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
_TESTS = [{ _TEST = {
'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
'md5': '868309628ba00fd488cf516a113fd717', 'md5': '868309628ba00fd488cf516a113fd717',
'info_dict': { 'info_dict': {
@ -17,27 +15,62 @@ class SlutloadIE(InfoExtractor):
'age_limit': 18, 'age_limit': 18,
'thumbnail': r're:https?://.*?\.jpg' 'thumbnail': r're:https?://.*?\.jpg'
} }
}, { }
# mobile site
'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
desktop_url = re.sub(r'^(https?://)mobile\.', r'\1', url) webpage = self._download_webpage(url, video_id)
webpage = self._download_webpage(desktop_url, video_id)
video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
webpage, 'title').strip()
video_title = self._html_search_regex(
r'<h1><strong>([^<]+)</strong>',
webpage, 'title').strip()
video_url = self._html_search_regex( video_url = self._html_search_regex(
r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"', r'(?s)<video id=["\']desktop-player["\'].+?<source src=["\']([^"\']+)["\']',
webpage, 'video URL') webpage, 'video URL')
thumbnail = self._html_search_regex( thumbnail = self._html_search_regex(
r'(?s)<div id="vidPlayer"\s+.*?previewer-file="([^"]+)"', r'(?s)<video id=["\']desktop-player["\'].+?poster=["\']([^"\']+)["\']',
webpage, 'thumbnail', fatal=False) webpage, 'thumbnail, fatal=False'
)
return {
'id': video_id,
'url': video_url,
'title': video_title,
'thumbnail': thumbnail,
'age_limit': 18
}
class SlutloadMobileIE(InfoExtractor):
_VALID_URL = r'^https?://mobile\.slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
_TEST = {
'url': 'http://mobile.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
'md5': '868309628ba00fd488cf516a113fd717',
'info_dict': {
'id': 'TD73btpBqSxc',
'ext': 'mp4',
'age_limit': 18,
'title': 'virginie baisee en cam',
'thumbnail': r're:https?://.*?\.jpg'
}
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_title = self._html_search_regex(
r'<div class=["\']videoHd[^"\']+["\']>([^<]+)</div>',
webpage, 'title').strip()
video_url = self._html_search_regex(
r'(?s)<video id=["\']html5video["\'].+?src=["\']([^"\']+)["\']',
webpage, 'video URL')
thumbnail = self._html_search_regex(
r'(?s)<video id=["\']html5video["\'].+?poster=["\']([^"\']+)["\']',
webpage, 'thumbnail, fatal=False'
)
return { return {
'id': video_id, 'id': video_id,