From d491aaabc122f39b4f0d8f927ca6a7ada4deb795 Mon Sep 17 00:00:00 2001 From: tetra-eder Date: Thu, 10 Aug 2017 11:19:33 +0200 Subject: [PATCH 1/2] [vzaar] update to generic extractor for embedded urls --- youtube_dl/extractor/generic.py | 18 +++++++++++++++++- youtube_dl/extractor/vzaar.py | 10 +++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 34e814988..176e35b38 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -98,7 +98,7 @@ from .wistia import WistiaIE from .mediaset import MediasetIE from .joj import JojIE from .megaphone import MegaphoneIE - +from .vzaar import VzaarIE class GenericIE(InfoExtractor): IE_DESC = 'Generic downloader that works on some sites' @@ -1840,6 +1840,16 @@ class GenericIE(InfoExtractor): 'title': 'Стас Намин: «Мы нарушили девственность Кремля»', }, }, + { + # vzaar embed + 'url': 'http://www.xruniversity.com/bdsm-lets-begin-melissa-moore/', + 'md5': 'cddc9fb8a8644a0a7742149eee95080b', + 'info_dict': { + 'id': '11002506', + 'ext': 'mp4', + 'title': 'XR-U SHOW: Ready Player Fuck - EP. 61', + }, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2781,6 +2791,12 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key()) + # Look for vzaar embeds + vzaar_urls = VzaarIE._extract_urls(webpage) + if vzaar_urls: + return self.playlist_from_matches( + vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key()) + # Look for Rutube embeds rutube_urls = RutubeIE._extract_urls(webpage) if rutube_urls: diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py index b270f08d1..b4d7903ed 100644 --- a/youtube_dl/extractor/vzaar.py +++ b/youtube_dl/extractor/vzaar.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( int_or_none, @@ -9,7 +11,7 @@ from ..utils import ( class VzaarIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P\d+)' + _VALID_URL = r'(https?://)?(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P\d+)' _TESTS = [{ 'url': 'https://vzaar.com/videos/1152805', 'md5': 'bde5ddfeb104a6c56a93a06b04901dbf', @@ -28,6 +30,12 @@ class VzaarIE(InfoExtractor): }, }] + @staticmethod + def _extract_urls(webpage): + return re.findall( + r']+src=["\']//((?:view.vzaar\.com)/[0-9]+)', + webpage) + def _real_extract(self, url): video_id = self._match_id(url) video_data = self._download_json( From 8ef6979b10519d62aa5b3ab527eab26f1890e9a3 Mon Sep 17 00:00:00 2001 From: tetra-eder Date: Fri, 11 Aug 2017 12:13:14 +0200 Subject: [PATCH 2/2] add requested changes --- youtube_dl/extractor/generic.py | 1 + youtube_dl/extractor/vzaar.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 176e35b38..51acead66 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -100,6 +100,7 @@ from .joj import JojIE from .megaphone import MegaphoneIE from .vzaar import VzaarIE + class GenericIE(InfoExtractor): IE_DESC = 'Generic downloader that works on some sites' _VALID_URL = r'.*' diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py index b4d7903ed..02fcd52c7 100644 --- a/youtube_dl/extractor/vzaar.py +++ b/youtube_dl/extractor/vzaar.py @@ -11,7 +11,7 @@ from ..utils import ( class VzaarIE(InfoExtractor): - _VALID_URL = r'(https?://)?(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P\d+)' + _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P\d+)' _TESTS = [{ 'url': 'https://vzaar.com/videos/1152805', 'md5': 'bde5ddfeb104a6c56a93a06b04901dbf', @@ -33,7 +33,7 @@ class VzaarIE(InfoExtractor): @staticmethod def _extract_urls(webpage): return re.findall( - r']+src=["\']//((?:view.vzaar\.com)/[0-9]+)', + r']+src=["\']((?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)', webpage) def _real_extract(self, url):