From 37828c2173d9ed8f6aa5c938b0cb5830d10b1fc9 Mon Sep 17 00:00:00 2001 From: Paul Tobias Date: Tue, 25 Aug 2015 20:19:18 +0200 Subject: [PATCH] [bdsmstreak] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/bdsmstreak.py | 66 ++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 youtube_dl/extractor/bdsmstreak.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index d59882598..e1c5be847 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -47,6 +47,7 @@ from .bbc import ( BBCCoUkIE, BBCIE, ) +from .bdsmstreak import BdsmstreakIE from .beeg import BeegIE from .behindkink import BehindKinkIE from .beatportpro import BeatportProIE diff --git a/youtube_dl/extractor/bdsmstreak.py b/youtube_dl/extractor/bdsmstreak.py new file mode 100644 index 000000000..12145ba2f --- /dev/null +++ b/youtube_dl/extractor/bdsmstreak.py @@ -0,0 +1,66 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + float_or_none, + xpath_text, + xpath_with_ns, +) + + +class BdsmstreakIE(InfoExtractor): + IE_NAME = 'bdsmstreak' + _VALID_URL = r'https?://(?:www\.)?bdsmstreak\.com/video/(?P[0-9]+)/' + _API_URL = 'http://www.bdsmstreak.com/media/nuevo/playlist.php?key={0:}' + _TEST = { + 'url': 'http://www.bdsmstreak.com/video/21668/ride-the-horse', + 'md5': 'a0b91a1579ce92af6b064f312646c00b', + 'info_dict': { + 'id': '21668', + 'ext': 'mp4', + 'title': 'Ride the horse', + 'thumbnail': 'http://www.bdsmstreak.com/media/videos/tmb/21668/20.jpg', + 'duration': 1302.15, + 'age_limit': 18, + } + } + + # This is similar to `InfoExtractor._parse_xspf()`, but the tag names are different + # This method is being called from `InfoExtractor._extract_xspf_playlist()` + def _parse_xspf(self, playlist, playlist_id): + NS_MAP = { + 'xspf': 'http://xspf.org/ns/0/', + } + + entries = [] + for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)): + title = xpath_text( + track, xpath_with_ns('./xspf:title', NS_MAP), default=playlist_id) + thumbnail = xpath_text( + track, xpath_with_ns('./xspf:thumb', NS_MAP)) + duration = float_or_none( + xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP))) + # TODO: 2 formats, flv and mp4 + formats = [{ + 'url': xpath_text( + track, xpath_with_ns('./xspf:html5', NS_MAP) + ) + }] + self._sort_formats(formats) + + entries.append({ + 'id': playlist_id, + 'title': title, + # 'description': description, # The description is in the webpage itself, but we don't even download that + 'thumbnail': thumbnail, + 'duration': duration, + 'formats': formats, + 'age_limit': 18, + }) + return entries + + def _real_extract(self, url): + playlist_id = self._match_id(url) + playlist_url = self._API_URL.format(playlist_id) + entries = self._extract_xspf_playlist(playlist_url, playlist_id) + return self.playlist_result(entries, playlist_id)