From 69d05fd2dea21cb4b1cd69c7ecc097e04a9e76a2 Mon Sep 17 00:00:00 2001 From: Dante Date: Thu, 2 Apr 2020 18:38:12 -0700 Subject: [PATCH 01/11] Valid_url_Troubleshoot --- youtube_dl/extractor/Sonuma.py | 51 ++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 youtube_dl/extractor/Sonuma.py diff --git a/youtube_dl/extractor/Sonuma.py b/youtube_dl/extractor/Sonuma.py new file mode 100644 index 000000000..1b1f64a4c --- /dev/null +++ b/youtube_dl/extractor/Sonuma.py @@ -0,0 +1,51 @@ +from __future__ import unicode_literals + +import re + + + + + +from .common import InfoExtractor + + + +class SonumaIE(InfoExtractor): + + _VALID_URL = r'(?:https?://)?(?:www\.)?sonuma\.be/archive/(?P\w+[-0-9])*)' + + + + def _real_extract(self, url): + + mobj=re.match(self._VALID_URL,url) + + + + video_id = mobj.group('id') + + webpage_url='https://sonuma.be/archive'+video_id + + webpage=self._download_webpage(webpage_url,video_id) + + + + self.report_extraction(video_id) + + + + video_url = self._html_search_regex('https://vod.infomaniak.com/redirect/sonumasa_2_vod/web2-39166/copy-32/cb88bd20-b57b-b756-e040-010a076419f5.mp4?sKey=1bc95ea22b94002d7a208593b7620d9f') + + + + return [{ + + 'id': video_id, + + 'url': video_url, + + 'ext': 'mp4', + + 'title': self._og_search_title(webpage), + + }] \ No newline at end of file From 389657b9869db0816601ace67cfe2175e57b3478 Mon Sep 17 00:00:00 2001 From: Dante Date: Thu, 2 Apr 2020 19:12:11 -0700 Subject: [PATCH 02/11] ixigua_code_basic --- youtube_dl/extractor/ixigua.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 youtube_dl/extractor/ixigua.py diff --git a/youtube_dl/extractor/ixigua.py b/youtube_dl/extractor/ixigua.py new file mode 100644 index 000000000..ec8fec847 --- /dev/null +++ b/youtube_dl/extractor/ixigua.py @@ -0,0 +1,31 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + + + +class SonumaIE(InfoExtractor): + + _VALID_URL = r'https://www\.sonuma\.be/archive/\S+' + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + url, video_id + ) + + title = self._html_search_regex(r'(.+?)', webpage, 'title') + + download_url = self._html_search_regex( + + r'(https://cdn\.acidcow\.com/pics/[0-9]+/video/\S+\.mp4)', + + webpage, "download_url" + ) + return { + 'id': video_id, + 'url': download_url, + 'title': title + } \ No newline at end of file From dc1bae68c1b8e0e2ad985b0068c76be6c7cca4d5 Mon Sep 17 00:00:00 2001 From: Dante Date: Thu, 2 Apr 2020 19:23:45 -0700 Subject: [PATCH 03/11] ixigua_code_url_add --- youtube_dl/extractor/Sonuma.py | 51 ------------------------------ youtube_dl/extractor/extractors.py | 2 ++ youtube_dl/extractor/ixigua.py | 4 +-- 3 files changed, 4 insertions(+), 53 deletions(-) delete mode 100644 youtube_dl/extractor/Sonuma.py diff --git a/youtube_dl/extractor/Sonuma.py b/youtube_dl/extractor/Sonuma.py deleted file mode 100644 index 1b1f64a4c..000000000 --- a/youtube_dl/extractor/Sonuma.py +++ /dev/null @@ -1,51 +0,0 @@ -from __future__ import unicode_literals - -import re - - - - - -from .common import InfoExtractor - - - -class SonumaIE(InfoExtractor): - - _VALID_URL = r'(?:https?://)?(?:www\.)?sonuma\.be/archive/(?P\w+[-0-9])*)' - - - - def _real_extract(self, url): - - mobj=re.match(self._VALID_URL,url) - - - - video_id = mobj.group('id') - - webpage_url='https://sonuma.be/archive'+video_id - - webpage=self._download_webpage(webpage_url,video_id) - - - - self.report_extraction(video_id) - - - - video_url = self._html_search_regex('https://vod.infomaniak.com/redirect/sonumasa_2_vod/web2-39166/copy-32/cb88bd20-b57b-b756-e040-010a076419f5.mp4?sKey=1bc95ea22b94002d7a208593b7620d9f') - - - - return [{ - - 'id': video_id, - - 'url': video_url, - - 'ext': 'mp4', - - 'title': self._og_search_title(webpage), - - }] \ No newline at end of file diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index d0e9a858f..e311eec08 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -489,6 +489,8 @@ from .ivi import ( ) from .ivideon import IvideonIE from .iwara import IwaraIE + +from .ixigua import IxiguaIE from .izlesene import IzleseneIE from .jamendo import ( JamendoIE, diff --git a/youtube_dl/extractor/ixigua.py b/youtube_dl/extractor/ixigua.py index ec8fec847..311448bc9 100644 --- a/youtube_dl/extractor/ixigua.py +++ b/youtube_dl/extractor/ixigua.py @@ -5,9 +5,9 @@ from .common import InfoExtractor -class SonumaIE(InfoExtractor): +class IxiguaIE(InfoExtractor): - _VALID_URL = r'https://www\.sonuma\.be/archive/\S+' + _VALID_URL = r'https://www.ixigua.com/(?P\d+)/' def _real_extract(self, url): video_id = self._match_id(url) From 5896b2afb0b06d69fa355986b566ad95f218b7e5 Mon Sep 17 00:00:00 2001 From: Dante Date: Sun, 5 Apr 2020 21:23:23 -0700 Subject: [PATCH 04/11] Fixing Title Regex --- youtube_dl/extractor/ixigua.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/ixigua.py b/youtube_dl/extractor/ixigua.py index 311448bc9..c2c34c59a 100644 --- a/youtube_dl/extractor/ixigua.py +++ b/youtube_dl/extractor/ixigua.py @@ -7,7 +7,7 @@ from .common import InfoExtractor class IxiguaIE(InfoExtractor): - _VALID_URL = r'https://www.ixigua.com/(?P\d+)/' + _VALID_URL = r'https://www.ixigua.com/i(?P\d+)/' def _real_extract(self, url): video_id = self._match_id(url) @@ -16,7 +16,7 @@ class IxiguaIE(InfoExtractor): url, video_id ) - title = self._html_search_regex(r'(.+?)', webpage, 'title') + title = self._html_search_regex(r'(\S+) - \S+', webpage, 'title') download_url = self._html_search_regex( @@ -25,6 +25,7 @@ class IxiguaIE(InfoExtractor): webpage, "download_url" ) return { + 'id': video_id, 'url': download_url, 'title': title From 908d8d2bccd0c56bef7ec7f7113de7e3a5528151 Mon Sep 17 00:00:00 2001 From: Dante <m.hussain@wsu.edu> Date: Sun, 5 Apr 2020 21:46:19 -0700 Subject: [PATCH 05/11] Fixing Title Regex_ver_2 --- youtube_dl/extractor/ixigua.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/ixigua.py b/youtube_dl/extractor/ixigua.py index c2c34c59a..b3aefb17b 100644 --- a/youtube_dl/extractor/ixigua.py +++ b/youtube_dl/extractor/ixigua.py @@ -16,7 +16,7 @@ class IxiguaIE(InfoExtractor): url, video_id ) - title = self._html_search_regex(r'<title>(\S+) - \S+', webpage, 'title') + title = self._html_search_regex(r'<title>(\S+ - \S+)', webpage, 'title') download_url = self._html_search_regex( From 3e863e341810a1769f5010adeae07ba0ccaa812f Mon Sep 17 00:00:00 2001 From: Dante Date: Wed, 8 Apr 2020 16:25:53 -0700 Subject: [PATCH 06/11] adding a valid_url for metrotvnews --- youtube_dl/extractor/extractors.py | 3 +- youtube_dl/extractor/ixigua.py | 32 --------------------- youtube_dl/extractor/metrotvnews.py | 43 +++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 33 deletions(-) delete mode 100644 youtube_dl/extractor/ixigua.py create mode 100644 youtube_dl/extractor/metrotvnews.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index e311eec08..cb69c9cf4 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -490,7 +490,7 @@ from .ivi import ( from .ivideon import IvideonIE from .iwara import IwaraIE -from .ixigua import IxiguaIE + from .izlesene import IzleseneIE from .jamendo import ( JamendoIE, @@ -618,6 +618,7 @@ from .melonvod import MelonVODIE from .meta import METAIE from .metacafe import MetacafeIE from .metacritic import MetacriticIE +from .metrotvnews import MetrotvnewsIE from .mgoon import MgoonIE from .mgtv import MGTVIE from .miaopai import MiaoPaiIE diff --git a/youtube_dl/extractor/ixigua.py b/youtube_dl/extractor/ixigua.py deleted file mode 100644 index b3aefb17b..000000000 --- a/youtube_dl/extractor/ixigua.py +++ /dev/null @@ -1,32 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor - - - - -class IxiguaIE(InfoExtractor): - - _VALID_URL = r'https://www.ixigua.com/i(?P\d+)/' - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - url, video_id - ) - - title = self._html_search_regex(r'(\S+ - \S+)', webpage, 'title') - - download_url = self._html_search_regex( - - r'(https://cdn\.acidcow\.com/pics/[0-9]+/video/\S+\.mp4)', - - webpage, "download_url" - ) - return { - - 'id': video_id, - 'url': download_url, - 'title': title - } \ No newline at end of file diff --git a/youtube_dl/extractor/metrotvnews.py b/youtube_dl/extractor/metrotvnews.py new file mode 100644 index 000000000..3a1bb8924 --- /dev/null +++ b/youtube_dl/extractor/metrotvnews.py @@ -0,0 +1,43 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + + + +class MetrotvnewsIE(InfoExtractor): + _VALID_URL = r'https:\/\/www.metrotvnews\.com\/play\/(?P\S+)-\S+' + + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + url, video_id + ) + + title = self._html_search_regex(r'(.+) - www.metrotvnews.com<\/title>', webpage, 'title') + ''' + download_url = self._html_search_regex( + + r'(https:\/\/celebsroulette\.com\/get_file\/1\/\S+\/[0-9]+\/[0-9]+\/[0-9]+\.mp4)', + + webpage, "download_url" + ) + + download_url = self._html_search_regex( + + r'(https:\/\/5-337-10435-2.b.cdn13.com\/contents\/videos\/3000\/3032\/3032\.mp4\?.+)', + + webpage, "download_url" + ) + ''' + download_url = self._html_search_regex( + r'(https:\/\/cdn01\.metrotvnews\.com\/videos\/\d+\/\d+\/\d+\/\S+.mp4)', + webpage, "download_url" + ) + return { + 'id': video_id, + 'url': download_url, + 'title': title + } From 35d7908d785254d4cbf97b7699d1745269c27f42 Mon Sep 17 00:00:00 2001 From: Dante <m.hussain@wsu.edu> Date: Wed, 8 Apr 2020 16:30:00 -0700 Subject: [PATCH 07/11] Regex for title & Download_url --- youtube_dl/extractor/metrotvnews.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/youtube_dl/extractor/metrotvnews.py b/youtube_dl/extractor/metrotvnews.py index 3a1bb8924..1899e425c 100644 --- a/youtube_dl/extractor/metrotvnews.py +++ b/youtube_dl/extractor/metrotvnews.py @@ -17,21 +17,7 @@ class MetrotvnewsIE(InfoExtractor): ) title = self._html_search_regex(r'<title>(.+) - www.metrotvnews.com<\/title>', webpage, 'title') - ''' - download_url = self._html_search_regex( - r'(https:\/\/celebsroulette\.com\/get_file\/1\/\S+\/[0-9]+\/[0-9]+\/[0-9]+\.mp4)', - - webpage, "download_url" - ) - - download_url = self._html_search_regex( - - r'(https:\/\/5-337-10435-2.b.cdn13.com\/contents\/videos\/3000\/3032\/3032\.mp4\?.+)', - - webpage, "download_url" - ) - ''' download_url = self._html_search_regex( r'(https:\/\/cdn01\.metrotvnews\.com\/videos\/\d+\/\d+\/\d+\/\S+.mp4)', webpage, "download_url" From 3e9004cd8aed6aa49bb6b41e50b8368c9ad86155 Mon Sep 17 00:00:00 2001 From: Dante <m.hussain@wsu.edu> Date: Wed, 8 Apr 2020 16:33:29 -0700 Subject: [PATCH 08/11] Formatting the code --- youtube_dl/extractor/metrotvnews.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/metrotvnews.py b/youtube_dl/extractor/metrotvnews.py index 1899e425c..2a7890225 100644 --- a/youtube_dl/extractor/metrotvnews.py +++ b/youtube_dl/extractor/metrotvnews.py @@ -6,10 +6,12 @@ from .common import InfoExtractor class MetrotvnewsIE(InfoExtractor): + _VALID_URL = r'https:\/\/www.metrotvnews\.com\/play\/(?P<id>\S+)-\S+' def _real_extract(self, url): + video_id = self._match_id(url) webpage = self._download_webpage( @@ -22,6 +24,7 @@ class MetrotvnewsIE(InfoExtractor): r'(https:\/\/cdn01\.metrotvnews\.com\/videos\/\d+\/\d+\/\d+\/\S+.mp4)', webpage, "download_url" ) + return { 'id': video_id, 'url': download_url, From ff65f32387be1775dc282fefdcfd6c17e24705d1 Mon Sep 17 00:00:00 2001 From: Dante <m.hussain@wsu.edu> Date: Wed, 8 Apr 2020 19:35:36 -0700 Subject: [PATCH 09/11] Removed unnecessary configuration from extractor file --- youtube_dl/extractor/extractors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index cb69c9cf4..23f723604 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -18,7 +18,7 @@ from .acast import ( ACastIE, ACastChannelIE, ) -from .acidcow import acidcowIE + from .adn import ADNIE from .adobeconnect import AdobeConnectIE from .adobetv import ( From c55f7f1f9c21ec4668b928c60a8a84c0a1b96a37 Mon Sep 17 00:00:00 2001 From: Dante <m.hussain@wsu.edu> Date: Wed, 8 Apr 2020 20:26:47 -0700 Subject: [PATCH 10/11] Removed unmergerd extractor file --- youtube_dl/extractor/acidcow.py | 39 --------------------------------- 1 file changed, 39 deletions(-) delete mode 100644 youtube_dl/extractor/acidcow.py diff --git a/youtube_dl/extractor/acidcow.py b/youtube_dl/extractor/acidcow.py deleted file mode 100644 index 3f8f9bb16..000000000 --- a/youtube_dl/extractor/acidcow.py +++ /dev/null @@ -1,39 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class acidcowIE(InfoExtractor): - """ - InfoExtractor for acid.cow - This class should be used to handle videos. Another class (TODO) will be - used to implement playlists or other content. - """ - - _VALID_URL = r'https?://acidcow\.com/video/(?P<id>\d+)-\S+' - - _TESTS = { - # TODO: Implement - - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - url, video_id - ) - - title = self._html_search_regex(r'<title>(.+?)', webpage, 'title') - - download_url = self._html_search_regex( - - r'(https://cdn\.acidcow\.com/pics/[0-9]+/video/\S+\.mp4)', - - webpage, "download_url" - ) - return { - 'id': video_id, - 'url': download_url, - 'title': title - } From 58be8f1966a42b70f7b9fc28afe7d5fe9556da12 Mon Sep 17 00:00:00 2001 From: Dante Date: Wed, 8 Apr 2020 20:40:51 -0700 Subject: [PATCH 11/11] Formatting of code according to convention --- youtube_dl/extractor/metrotvnews.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/youtube_dl/extractor/metrotvnews.py b/youtube_dl/extractor/metrotvnews.py index 2a7890225..7abf1c876 100644 --- a/youtube_dl/extractor/metrotvnews.py +++ b/youtube_dl/extractor/metrotvnews.py @@ -3,21 +3,15 @@ from __future__ import unicode_literals from .common import InfoExtractor - - class MetrotvnewsIE(InfoExtractor): _VALID_URL = r'https:\/\/www.metrotvnews\.com\/play\/(?P\S+)-\S+' - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage( url, video_id ) - title = self._html_search_regex(r'(.+) - www.metrotvnews.com<\/title>', webpage, 'title') download_url = self._html_search_regex(