From 1364b2b37d244573c332daa911e9cbdafb1c6ec6 Mon Sep 17 00:00:00 2001 From: Tim Landscheidt Date: Tue, 11 Dec 2018 17:53:48 +0000 Subject: [PATCH] [theadambuxtonpodcast] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/theadambuxtonpodcast.py | 42 ++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 youtube_dl/extractor/theadambuxtonpodcast.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6a5d12ab1..d1f0f8069 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1120,6 +1120,7 @@ from .tennistv import TennisTVIE from .testurl import TestURLIE from .tf1 import TF1IE from .tfo import TFOIE +from .theadambuxtonpodcast import TheAdamBuxtonPodcastIE from .theintercept import TheInterceptIE from .theplatform import ( ThePlatformIE, diff --git a/youtube_dl/extractor/theadambuxtonpodcast.py b/youtube_dl/extractor/theadambuxtonpodcast.py new file mode 100644 index 000000000..f5c37fbd4 --- /dev/null +++ b/youtube_dl/extractor/theadambuxtonpodcast.py @@ -0,0 +1,42 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import url_or_none + + +class TheAdamBuxtonPodcastIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?adam-buxton.co.uk/podcasts/(?P.+)' + _TESTS = [{ + 'url': 'http://adam-buxton.co.uk/podcasts/ep32-tash-demetriou', + 'md5': '917af1ea59de06a4b4b138036303f57d', + 'info_dict': { + 'id': '289871919', + 'ext': 'mp3', + 'title': 'EP.32 - TASH DEMETRIOU', + 'upload_date': '20161025', + 'description': 'md5:7b752d1651e7ec5974dbefb9b6c9e018', + 'uploader': 'Adam Buxton' + } + }, { + 'url': 'http://adam-buxton.co.uk/podcasts/ep77-tim-key', + 'md5': '5e825b21eebf3b10beebf851ce429cdb', + 'info_dict': { + 'id': '45398950-21a3-4cff-8b77-833741e58686', + 'ext': 'mp3', + 'title': 'EP.77 - TIM KEY', + 'timestamp': 1528367852, + 'description': 'md5:b6b062c595de3149324bb0845783b864', + 'upload_date': '20180607' + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + embedded_url = url_or_none(self._search_regex(r'(?s)]+class="view-content".*?]+src="([^"]+)"', webpage, 'iframe url', fatal=False)) + + return { + '_type': 'url', + 'url': embedded_url + }