From 3175e76ad0e7c601a0821d9eb9770f84d0e22934 Mon Sep 17 00:00:00 2001 From: root Date: Fri, 3 Jul 2020 18:25:44 -0700 Subject: [PATCH 1/2] [Gab] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/gab.py | 41 ++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 youtube_dl/extractor/gab.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 4b3092028..7bdea4657 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -396,6 +396,7 @@ from .funk import FunkIE from .fusion import FusionIE from .fxnetworks import FXNetworksIE from .gaia import GaiaIE +from .gab import GabIE from .gameinformer import GameInformerIE from .gamespot import GameSpotIE from .gamestar import GameStarIE diff --git a/youtube_dl/extractor/gab.py b/youtube_dl/extractor/gab.py new file mode 100644 index 000000000..ccc5d525c --- /dev/null +++ b/youtube_dl/extractor/gab.py @@ -0,0 +1,41 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +# Run the following to test +# python test/test_download.py TestDownload.test_Gab + + +class GabIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gab\.com/\w+/posts/(?P[0-9]+)+' + _TESTS = [ + { + 'url': 'https://gab.com/ACT1TV/posts/104450493441154721', + 'md5': '04bbd2146e0afe033eb1cb184f3748ce', + 'info_dict': { + 'id': '104450493441154721', + 'ext': 'mp4', + 'title': 'Bill Blaze on Gab: \'He shoots, he scores and the crowd went wild.... \u2026\' - Gab Social', + 'description': 'Bill Blaze on Gab: \'He shoots, he scores and the crowd went wild.... \u2026\'', + }, + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex(r'(.+?)', webpage, 'title') + + url_result = re.search('https?://(?:www\.)?gab\.com/system/media_attachments/files/[0-9]+/[0-9]+/[0-9]+/original/\w+\.\w+', webpage) + video_url = url_result.group() + + return { + 'id': video_id, + 'title': title, + 'description': self._og_search_description(webpage), + 'url': video_url, + } From 469a31b9318e90a4357d0ed98dfb5721949e4c02 Mon Sep 17 00:00:00 2001 From: runraid Date: Fri, 3 Jul 2020 18:54:25 -0700 Subject: [PATCH 2/2] Fix flake8 --- youtube_dl/extractor/gab.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/gab.py b/youtube_dl/extractor/gab.py index ccc5d525c..7593056a1 100644 --- a/youtube_dl/extractor/gab.py +++ b/youtube_dl/extractor/gab.py @@ -30,7 +30,7 @@ class GabIE(InfoExtractor): webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'(.+?)', webpage, 'title') - url_result = re.search('https?://(?:www\.)?gab\.com/system/media_attachments/files/[0-9]+/[0-9]+/[0-9]+/original/\w+\.\w+', webpage) + url_result = re.search(r'https?://(?:www\.)?gab\.com/system/media_attachments/files/[0-9]+/[0-9]+/[0-9]+/original/\w+\.\w+', webpage) video_url = url_result.group() return {