[slutload] Fix extraction for desktop and mobile site

Did a separate extractor for the mobile site, because its structure differs from the desktop site.
2020-11-18 19:53:54 -08:00 · 2018-07-18 12:33:06 +03:00 · 2018-07-18 12:33:06 +03:00 · dbd71aa288
commit dbd71aa288
parent 79367a9820
2 changed files with 54 additions and 18 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -988,7 +988,10 @@ from .skynewsarabia import (
 from .skysports import SkySportsIE
 from .slideshare import SlideshareIE
 from .slideslive import SlidesLiveIE
-from .slutload import SlutloadIE
+from .slutload import (
+    SlutloadIE,
+    SlutloadMobileIE
+)
 from .smotri import (
    SmotriIE,
    SmotriCommunityIE,
--- a/youtube_dl/extractor/slutload.py
+++ b/youtube_dl/extractor/slutload.py
@ -1,13 +1,11 @@
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor


 class SlutloadIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
-    _TESTS = [{
+    _VALID_URL = r'^https?://(?:www\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
+    _TEST = {
        'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
        'md5': '868309628ba00fd488cf516a113fd717',
        'info_dict': {
@ -17,27 +15,62 @@ class SlutloadIE(InfoExtractor):
            'age_limit': 18,
            'thumbnail': r're:https?://.*?\.jpg'
        }
-    }, {
-        # mobile site
-        'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/',
-        'only_matching': True,
-    }]
+    }

    def _real_extract(self, url):
        video_id = self._match_id(url)

-        desktop_url = re.sub(r'^(https?://)mobile\.', r'\1', url)
-        webpage = self._download_webpage(desktop_url, video_id)
-
-        video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>',
-                                              webpage, 'title').strip()
+        webpage = self._download_webpage(url, video_id)

+        video_title = self._html_search_regex(
+            r'<h1><strong>([^<]+)</strong>',
+            webpage, 'title').strip()
        video_url = self._html_search_regex(
-            r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"',
+            r'(?s)<video id=["\']desktop-player["\'].+?<source src=["\']([^"\']+)["\']',
            webpage, 'video URL')
        thumbnail = self._html_search_regex(
-            r'(?s)<div id="vidPlayer"\s+.*?previewer-file="([^"]+)"',
-            webpage, 'thumbnail', fatal=False)
+            r'(?s)<video id=["\']desktop-player["\'].+?poster=["\']([^"\']+)["\']',
+            webpage, 'thumbnail, fatal=False'
+        )
+
+        return {
+            'id': video_id,
+            'url': video_url,
+            'title': video_title,
+            'thumbnail': thumbnail,
+            'age_limit': 18
+        }
+
+
+class SlutloadMobileIE(InfoExtractor):
+    _VALID_URL = r'^https?://mobile\.slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$'
+    _TEST = {
+        'url': 'http://mobile.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
+        'md5': '868309628ba00fd488cf516a113fd717',
+        'info_dict': {
+            'id': 'TD73btpBqSxc',
+            'ext': 'mp4',
+            'age_limit': 18,
+            'title': 'virginie baisee en cam',
+            'thumbnail': r're:https?://.*?\.jpg'
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        video_title = self._html_search_regex(
+            r'<div class=["\']videoHd[^"\']+["\']>([^<]+)</div>',
+            webpage, 'title').strip()
+        video_url = self._html_search_regex(
+            r'(?s)<video id=["\']html5video["\'].+?src=["\']([^"\']+)["\']',
+            webpage, 'video URL')
+        thumbnail = self._html_search_regex(
+            r'(?s)<video id=["\']html5video["\'].+?poster=["\']([^"\']+)["\']',
+            webpage, 'thumbnail, fatal=False'
+        )

        return {
            'id': video_id,