From 6af62a164c8199dd2dbe5d3c3c05b4ae2dff36fd Mon Sep 17 00:00:00 2001 From: Julien Lhermitte Date: Sun, 7 Apr 2019 14:13:47 -0400 Subject: [PATCH 1/2] fix(aol): fix the aol downloader --- youtube_dl/extractor/aol.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index cb9279193..467efb9bb 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -9,6 +9,9 @@ from ..utils import ( int_or_none, url_or_none, ) +from urllib.parse import urlparse +from urllib.request import urlopen +import lxml.html class AolIE(InfoExtractor): @@ -62,8 +65,31 @@ class AolIE(InfoExtractor): 'only_matching': True, }] + def _download_page(self, url): + return urlopen(url).read() + + def _download_and_extract_video_id_from_page(self, url): + page_bytes = self._download_page(url) + tree = lxml.html.fromstring(page_bytes) + src_xpath = tree.xpath("//script[contains(@src, " + "'delivery.vidible.tv')]") + src_tag = src_xpath[0].attrib.get('src') + parsed_vid_url = urlparse(src_tag) + vid_url_path = parsed_vid_url.path + vid_url_params = vid_url_path.split('/') + return self._find_vid_param(vid_url_params) + + def _find_vid_param(self, vid_url_params: [str]): + for param in vid_url_params: + if param.startswith('vid='): + return param.split('=')[1] + return None + def _real_extract(self, url): - video_id = self._match_id(url) + # video_id = self._match_id(url) + print(f'getting video_id') + video_id = self._download_and_extract_video_id_from_page(url) + print(f'video_id: {video_id}') response = self._download_json( 'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id, From d4651f39265d83447181ea1d2ecb79ced0f42b73 Mon Sep 17 00:00:00 2001 From: Julien Lhermitte Date: Sun, 7 Apr 2019 14:35:57 -0400 Subject: [PATCH 2/2] use regex instead --- youtube_dl/extractor/aol.py | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/youtube_dl/extractor/aol.py b/youtube_dl/extractor/aol.py index 467efb9bb..4f4ae6171 100644 --- a/youtube_dl/extractor/aol.py +++ b/youtube_dl/extractor/aol.py @@ -9,9 +9,7 @@ from ..utils import ( int_or_none, url_or_none, ) -from urllib.parse import urlparse from urllib.request import urlopen -import lxml.html class AolIE(InfoExtractor): @@ -69,27 +67,15 @@ class AolIE(InfoExtractor): return urlopen(url).read() def _download_and_extract_video_id_from_page(self, url): - page_bytes = self._download_page(url) - tree = lxml.html.fromstring(page_bytes) - src_xpath = tree.xpath("//script[contains(@src, " - "'delivery.vidible.tv')]") - src_tag = src_xpath[0].attrib.get('src') - parsed_vid_url = urlparse(src_tag) - vid_url_path = parsed_vid_url.path - vid_url_params = vid_url_path.split('/') - return self._find_vid_param(vid_url_params) - - def _find_vid_param(self, vid_url_params: [str]): - for param in vid_url_params: - if param.startswith('vid='): - return param.split('=')[1] - return None + video_id_regex = r'