From 4b99952e6cf9f767ec6dd5ff0ce89864a52fd703 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20H=C3=B6pfl?= Date: Wed, 13 Feb 2019 16:29:43 +0100 Subject: [PATCH] Fixes #18906: Fixes title extraction for vivo.sx. --- youtube_dl/extractor/shared.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index 931a0f70e..4326fc820 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -1,6 +1,11 @@ from __future__ import unicode_literals -from .common import InfoExtractor +import re + +from .common import ( + InfoExtractor, + unescapeHTML +) from ..compat import compat_b64decode from ..utils import ( ExtractorError, @@ -22,8 +27,7 @@ class SharedBaseIE(InfoExtractor): video_url = self._extract_video_url(webpage, video_id, url) - title = compat_b64decode(self._html_search_meta( - 'full:title', webpage, 'title')).decode('utf-8') + title = self._extract_title(webpage) filesize = int_or_none(self._html_search_meta( 'full:size', webpage, 'file size', fatal=False)) @@ -35,6 +39,10 @@ class SharedBaseIE(InfoExtractor): 'title': title, } + def _extract_title(self, webpage): + return compat_b64decode(self._html_search_meta( + 'full:title', webpage, 'title')).decode('utf-8') + class SharedIE(SharedBaseIE): IE_DESC = 'shared.sx' @@ -86,6 +94,14 @@ class VivoIE(SharedBaseIE): }, } + def _extract_title(self, webpage): + data_title = self._search_regex( + r'data-name\s*=\s*(["\'])(?P(?:.(?!\1))*.)\1', webpage, + 'title', default=None, group='title') + if data_title: + return unescapeHTML(re.sub(r"\.[a-z0-9]{3,4}$", "", data_title)) + return None + def _extract_video_url(self, webpage, video_id, *args): def decode_url(encoded_url): return compat_b64decode(encoded_url).decode('utf-8')