diff --git a/youtube_dl/extractor/servus.py b/youtube_dl/extractor/servus.py index 9401bf2cf..e34cfd561 100644 --- a/youtube_dl/extractor/servus.py +++ b/youtube_dl/extractor/servus.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import ExtractorError, JSON_LD_RE class ServusIE(InfoExtractor): @@ -19,13 +20,14 @@ class ServusIE(InfoExtractor): _TESTS = [{ # new URL schema 'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/', - 'md5': '3e1dd16775aa8d5cbef23628cfffc1f4', + 'md5': '9f825d6ec14b3d8bebc5b23d094e1e51', 'info_dict': { 'id': 'AA-1T6VBU5PW1W12', 'ext': 'mp4', 'title': 'Die GrĂ¼nen aus Sicht des Volkes', 'description': 'md5:1247204d85783afe3682644398ff2ec4', - 'thumbnail': r're:^https?://.*\.jpg', + 'upload_date': '20170911', + 'timestamp': 1505147648, } }, { # old URL schema @@ -46,24 +48,20 @@ class ServusIE(InfoExtractor): video_id = self._match_id(url).upper() webpage = self._download_webpage(url, video_id) - title = self._search_regex( - (r'videoLabel\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', - r'<h\d+[^>]+\bclass=["\']heading--(?:one|two)["\'][^>]*>(?P<title>[^<]+)'), - webpage, 'title', default=None, - group='title') or self._og_search_title(webpage) - title = re.sub(r'\s*-\s*Servus TV\s*$', '', title) - description = self._og_search_description(webpage) - thumbnail = self._og_search_thumbnail(webpage) + if 'rbmh-video-player-trigger' not in webpage: + raise ExtractorError('Video not available (maybe not aired yet)', expected=True, video_id=video_id) - formats = self._extract_m3u8_formats( - 'https://stv.rbmbtnx.net/api/v1/manifests/%s.m3u8' % video_id, - video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') - self._sort_formats(formats) + info = {} + for match in re.finditer(JSON_LD_RE, webpage): + json_ld = match.group('json_ld') + info = self._json_ld(json_ld, video_id) + if info: + break + else: + raise ExtractorError('Could not extract video URL', video_id=video_id) - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'formats': formats, - } + info['id'] = video_id + info['formats'] = self._extract_m3u8_formats(info['url'], video_id, 'mp4') + self._sort_formats(info['formats']) + + return info