From 35f6a37332ce5a7485de0a19594d5a75de40b130 Mon Sep 17 00:00:00 2001 From: Monptitjojo Date: Mon, 27 Oct 2014 19:51:23 +0100 Subject: [PATCH] [piwiplus] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/piwiplus.py | 71 ++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 youtube_dl/extractor/piwiplus.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3979b8270..be8379682 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -282,6 +282,7 @@ from .patreon import PatreonIE from .pbs import PBSIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE +from .piwiplus import PiwiplusIE from .planetaplay import PlanetaPlayIE from .played import PlayedIE from .playfm import PlayFMIE diff --git a/youtube_dl/extractor/piwiplus.py b/youtube_dl/extractor/piwiplus.py new file mode 100644 index 000000000..c977f3b1d --- /dev/null +++ b/youtube_dl/extractor/piwiplus.py @@ -0,0 +1,71 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + unified_strdate, + url_basename, +) + + +class PiwiplusIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.piwiplus\.fr/.*?/(?P.*)|player\.piwiplus\.fr/#/(?P[0-9]+))' + _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/teletoon/%s' + IE_NAME = 'piwiplus.fr' + + _TEST = { + 'url': 'http://www.piwiplus.fr/videos-piwi/pid1405-le-labyrinthe-boing-super-ranger.html?vid=1108190', + 'md5': '0f55da10e76cab297760f355401897c5', + 'info_dict': { + 'id': '922470', + 'ext': 'flv', + 'title': 'Le labyrinthe - Boing super ranger', + 'upload_date': '20140724', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.groupdict().get('id') + + # Beware, some subclasses do not define an id group + display_id = url_basename(mobj.group('path')) + + if video_id is None: + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex(r']*?videoId="(\d+)"', webpage, 'video id') + + info_url = self._VIDEO_INFO_TEMPLATE % video_id + doc = self._download_xml(info_url, video_id, 'Downloading video XML') + + video_info = [video for video in doc if video.find('ID').text == video_id][0] + media = video_info.find('MEDIA') + infos = video_info.find('INFOS') + + preferences = ['MOBILE', 'BAS_DEBIT', 'HAUT_DEBIT', 'HD', 'HLS', 'HDS'] + + formats = [ + { + 'url': fmt.text + '?hdcore=2.11.3' if fmt.tag == 'HDS' else fmt.text, + 'format_id': fmt.tag, + 'ext': 'mp4' if fmt.tag == 'HLS' else 'flv', + 'preference': preferences.index(fmt.tag) if fmt.tag in preferences else -1, + } for fmt in media.find('VIDEOS') if fmt.text + ] + self._sort_formats(formats) + + return { + 'id': video_id, + 'display_id': display_id, + 'title': '%s - %s' % (infos.find('TITRAGE/TITRE').text, + infos.find('TITRAGE/SOUS_TITRE').text), + 'upload_date': unified_strdate(infos.find('PUBLICATION/DATE').text), + 'thumbnail': media.find('IMAGES/GRAND').text, + 'description': infos.find('DESCRIPTION').text, + 'view_count': int(infos.find('NB_VUES').text), + 'like_count': int(infos.find('NB_LIKES').text), + 'comment_count': int(infos.find('NB_COMMENTS').text), + 'formats': formats, + } \ No newline at end of file