From 15a0eb17a27533f9036caccee1b127b0f0162d61 Mon Sep 17 00:00:00 2001 From: oleksis Date: Fri, 1 Feb 2019 19:07:44 -0500 Subject: [PATCH 01/19] [picta] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/picta.py | 61 ++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 youtube_dl/extractor/picta.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 2ffcffa9e..6fbf57fff 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -859,6 +859,7 @@ from .picarto import ( PicartoIE, PicartoVodIE, ) +from .picta import PictaIE from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py new file mode 100644 index 000000000..674a6251c --- /dev/null +++ b/youtube_dl/extractor/picta.py @@ -0,0 +1,61 @@ +# coding: utf-8 +from __future__ import unicode_literals +from ..utils import ( + int_or_none, + unified_timestamp, + ExtractorError +) +from .common import InfoExtractor + + +class PictaIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?picta\.cu/medias/(?P[0-9]+)' + _TEST = { + 'url': 'https://www.picta.cu/medias/818', + 'file': 'Orishas - Everyday-818.webm', + 'md5': 'ebd10d5a34f23059e08419aa123aebdb', + 'info_dict': { + 'id': '818', + 'ext': 'webm', + 'title': 'Orishas - Everyday', + 'thumbnail': r're:^https?://.*imagen/img.*\.png$', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + # https://www.picta.cu/api/v1/publicacion/?id_publicacion=818&tipo=publicacion + # https://www.picta.cu/api/v1/publicacion/?format=json&id_publicacion=818&tipo=publicacion + json_url = 'https://www.picta.cu/api/v1/publicacion/?format=json&id_publicacion=' + \ + str(video_id) + '&tipo=publicacion' + # JSON MetaFields + meta = self._download_json(json_url, video_id) + # Fields + title = meta.get('results')[0].get('nombre') or self._search_regex( + r']+class="post-video-title"[^>]*>([^<]+)', webpage, 'title') + description = meta.get('results')[0].get('descripcion') + uploader = meta.get('results')[0].get('usuario') + add_date = meta.get('results')[0].get('fecha_creacion') + timestamp = int_or_none(unified_timestamp(add_date)) + thumbnail = meta.get('results')[0].get('url_imagen') + manifest_url = meta.get('results')[0].get('url_manifiesto') + # Formats + formats = [] + # MPD manifest + if manifest_url: + formats.extend(self._extract_mpd_formats(manifest_url, video_id)) + if not formats: + raise ExtractorError('Cannot find video formats') + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'description': description, + 'uploader': uploader, + 'timestamp': timestamp, + 'thumbnail': thumbnail, + } From bb72d1bed22123639399c81c1e59712453216b14 Mon Sep 17 00:00:00 2001 From: oleksis Date: Sat, 2 Feb 2019 00:30:27 -0500 Subject: [PATCH 02/19] [picta] Fix coding conventions --- youtube_dl/extractor/picta.py | 67 ++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index 674a6251c..9f178b98d 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -1,14 +1,39 @@ # coding: utf-8 from __future__ import unicode_literals +from ..compat import compat_str from ..utils import ( int_or_none, unified_timestamp, + try_get, ExtractorError ) from .common import InfoExtractor -class PictaIE(InfoExtractor): +class PictaBaseIE(InfoExtractor): + def _extract_video(self, video, video_id=None, require_title=True): + title = video['results'][0]['nombre'] if require_title else video.get('results')[0].get('nombre') + description = try_get(video, lambda x: x['results'][0]['descripcion'], compat_str) + uploader = try_get(video, lambda x: x['results'][0]['usuario'], compat_str) + add_date = try_get(video, lambda x: x['results'][0]['fecha_creacion']) + timestamp = int_or_none(unified_timestamp(add_date)) + thumbnail = try_get(video, lambda x: x['results'][0]['url_imagen']) + manifest_url = try_get(video, lambda x: x['results'][0]['url_manifiesto']) + category = try_get(video, lambda x: x['results'][0]['canal'], compat_str) + + return { + 'id': try_get(video, lambda x: x['results'][0]['id'], compat_str) or video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'uploader': uploader, + 'timestamp': timestamp, + 'category': [category] if category else None, + 'manifest_url': manifest_url, + } + + +class PictaIE(PictaBaseIE): _VALID_URL = r'https?://(?:www\.)?picta\.cu/medias/(?P[0-9]+)' _TEST = { 'url': 'https://www.picta.cu/medias/818', @@ -24,38 +49,22 @@ class PictaIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - # https://www.picta.cu/api/v1/publicacion/?id_publicacion=818&tipo=publicacion - # https://www.picta.cu/api/v1/publicacion/?format=json&id_publicacion=818&tipo=publicacion - json_url = 'https://www.picta.cu/api/v1/publicacion/?format=json&id_publicacion=' + \ - str(video_id) + '&tipo=publicacion' - # JSON MetaFields - meta = self._download_json(json_url, video_id) - # Fields - title = meta.get('results')[0].get('nombre') or self._search_regex( - r']+class="post-video-title"[^>]*>([^<]+)', webpage, 'title') - description = meta.get('results')[0].get('descripcion') - uploader = meta.get('results')[0].get('usuario') - add_date = meta.get('results')[0].get('fecha_creacion') - timestamp = int_or_none(unified_timestamp(add_date)) - thumbnail = meta.get('results')[0].get('url_imagen') - manifest_url = meta.get('results')[0].get('url_manifiesto') - # Formats + + api_url = 'https://www.picta.cu/api/v1/publicacion/' + json_url = api_url + '?format=json&id_publicacion=%s&tipo=publicacion' % video_id + + video = self._download_json(json_url, video_id, 'Downloading video JSON') + + info = self._extract_video(video, video_id) + formats = [] # MPD manifest - if manifest_url: - formats.extend(self._extract_mpd_formats(manifest_url, video_id)) + if info.get('manifest_url'): + formats.extend(self._extract_mpd_formats(info.get('manifest_url'), video_id)) if not formats: raise ExtractorError('Cannot find video formats') self._sort_formats(formats) - return { - 'id': video_id, - 'title': title, - 'formats': formats, - 'description': description, - 'uploader': uploader, - 'timestamp': timestamp, - 'thumbnail': thumbnail, - } + info['formats'] = formats + return info From d9cf3bfaa940c3d60c0ceead9dd776b7a78ef897 Mon Sep 17 00:00:00 2001 From: oleksis Date: Thu, 7 Feb 2019 18:38:41 -0500 Subject: [PATCH 03/19] [picta] Extract embedding videos --- youtube_dl/extractor/picta.py | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index 9f178b98d..009e18b1e 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -34,8 +34,11 @@ class PictaBaseIE(InfoExtractor): class PictaIE(PictaBaseIE): - _VALID_URL = r'https?://(?:www\.)?picta\.cu/medias/(?P[0-9]+)' - _TEST = { + IE_NAME = 'picta' + IE_DESC = 'Picta videos' + _VALID_URL = r'https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P[0-9]+)' + + _TESTS = [{ 'url': 'https://www.picta.cu/medias/818', 'file': 'Orishas - Everyday-818.webm', 'md5': 'ebd10d5a34f23059e08419aa123aebdb', @@ -45,7 +48,10 @@ class PictaIE(PictaBaseIE): 'title': 'Orishas - Everyday', 'thumbnail': r're:^https?://.*imagen/img.*\.png$', } - } + }, { + 'url': 'https://www.picta.cu/embed/?v=818', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -68,3 +74,26 @@ class PictaIE(PictaBaseIE): info['formats'] = formats return info + + +class PictaEmbedIE(InfoExtractor): + IE_NAME = 'picta:embed' + IE_DESC = 'Picta embedded videos' + _VALID_URL = r'https?://www\.picta\.cu/embed/\?v=(?P[0-9]+)' + + _TEST = { + 'url': 'https://www.picta.cu/embed/?v=818', + 'file': 'Orishas - Everyday-818.webm', + 'md5': 'ebd10d5a34f23059e08419aa123aebdb', + 'info_dict': { + 'id': '818', + 'ext': 'webm', + 'title': 'Orishas - Everyday', + 'thumbnail': r're:^https?://.*imagen/img.*\.png$', + } + } + + def _real_extract(self, url): + embed_id = self._match_id(url) + video_url = 'https://www.picta.cu/medias/%s' % embed_id + return self.url_result(video_url, PictaIE.ie_key()) From 5c619d2d5eb6ebcd76d971c77db924ce01c23eb8 Mon Sep 17 00:00:00 2001 From: Oleksis Date: Sat, 16 Feb 2019 03:02:32 -0500 Subject: [PATCH 04/19] [picta] Add field API_BASE_URL in PictaBaseIE class Add support for slug url --- youtube_dl/extractor/picta.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index 009e18b1e..65530f9a0 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -5,12 +5,15 @@ from ..utils import ( int_or_none, unified_timestamp, try_get, + base_url, ExtractorError ) from .common import InfoExtractor class PictaBaseIE(InfoExtractor): + API_BASE_URL = 'https://www.picta.cu/api/v1/' + def _extract_video(self, video, video_id=None, require_title=True): title = video['results'][0]['nombre'] if require_title else video.get('results')[0].get('nombre') description = try_get(video, lambda x: x['results'][0]['descripcion'], compat_str) @@ -36,11 +39,11 @@ class PictaBaseIE(InfoExtractor): class PictaIE(PictaBaseIE): IE_NAME = 'picta' IE_DESC = 'Picta videos' - _VALID_URL = r'https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P[0-9]+)' + _VALID_URL = r'https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P[\da-z-]+)' _TESTS = [{ - 'url': 'https://www.picta.cu/medias/818', - 'file': 'Orishas - Everyday-818.webm', + 'url': 'https://www.picta.cu/medias/orishas-everyday-2019-01-16-16-36-42-443003', + 'file': 'Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm', 'md5': 'ebd10d5a34f23059e08419aa123aebdb', 'info_dict': { 'id': '818', @@ -55,9 +58,10 @@ class PictaIE(PictaBaseIE): def _real_extract(self, url): video_id = self._match_id(url) - - api_url = 'https://www.picta.cu/api/v1/publicacion/' - json_url = api_url + '?format=json&id_publicacion=%s&tipo=publicacion' % video_id + if base_url(url).find('medias') != -1: + json_url = self.API_BASE_URL + 'publicacion/?format=json&slug_url=%s&tipo=publicacion' % video_id + else: + json_url = self.API_BASE_URL + 'publicacion/?format=json&id_publicacion=%s&tipo=publicacion' % video_id video = self._download_json(json_url, video_id, 'Downloading video JSON') @@ -83,7 +87,7 @@ class PictaEmbedIE(InfoExtractor): _TEST = { 'url': 'https://www.picta.cu/embed/?v=818', - 'file': 'Orishas - Everyday-818.webm', + 'file': 'Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm', 'md5': 'ebd10d5a34f23059e08419aa123aebdb', 'info_dict': { 'id': '818', @@ -94,6 +98,4 @@ class PictaEmbedIE(InfoExtractor): } def _real_extract(self, url): - embed_id = self._match_id(url) - video_url = 'https://www.picta.cu/medias/%s' % embed_id - return self.url_result(video_url, PictaIE.ie_key()) + return self.url_result(url, PictaIE.ie_key()) From 270fcc1d742fd36256d37b276fc66bec6045ae4d Mon Sep 17 00:00:00 2001 From: Oleksis Date: Sat, 16 Feb 2019 18:25:59 -0500 Subject: [PATCH 05/19] [picta] Add default formats --- youtube_dl/extractor/picta.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index 65530f9a0..bf02628f6 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -40,6 +40,19 @@ class PictaIE(PictaBaseIE): IE_NAME = 'picta' IE_DESC = 'Picta videos' _VALID_URL = r'https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P[\da-z-]+)' + _formats = { + # Dash webm + '0': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'}, + '1': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '2': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '3': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + '4': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + + # Dash webm audio with opus inside + '5': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 128}, + '6': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 134}, + + } _TESTS = [{ 'url': 'https://www.picta.cu/medias/orishas-everyday-2019-01-16-16-36-42-443003', @@ -50,7 +63,7 @@ class PictaIE(PictaBaseIE): 'ext': 'webm', 'title': 'Orishas - Everyday', 'thumbnail': r're:^https?://.*imagen/img.*\.png$', - } + }, }, { 'url': 'https://www.picta.cu/embed/?v=818', 'only_matching': True, @@ -70,7 +83,7 @@ class PictaIE(PictaBaseIE): formats = [] # MPD manifest if info.get('manifest_url'): - formats.extend(self._extract_mpd_formats(info.get('manifest_url'), video_id)) + formats.extend(self._extract_mpd_formats(info.get('manifest_url'), video_id, formats_dict=self._formats)) if not formats: raise ExtractorError('Cannot find video formats') From 62b9e7c2c60184c4b58dbb5950b1d36964c7219e Mon Sep 17 00:00:00 2001 From: Oleksis Date: Sat, 16 Feb 2019 22:36:31 -0500 Subject: [PATCH 06/19] [picta] Fix Tests Picta --- youtube_dl/extractor/picta.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index bf02628f6..0b30f155b 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -57,12 +57,19 @@ class PictaIE(PictaBaseIE): _TESTS = [{ 'url': 'https://www.picta.cu/medias/orishas-everyday-2019-01-16-16-36-42-443003', 'file': 'Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm', - 'md5': 'ebd10d5a34f23059e08419aa123aebdb', + 'md5': '7ffdeb0043500c4bb660c04e74e90f7a', 'info_dict': { - 'id': '818', + 'id': 'orishas-everyday-2019-01-16-16-36-42-443003', 'ext': 'webm', 'title': 'Orishas - Everyday', 'thumbnail': r're:^https?://.*imagen/img.*\.png$', + 'upload_date': '20190116', + 'description': 'Orishas - Everyday (Video Oficial)', + 'uploader': 'admin', + 'timestamp': 1547656602, + }, + 'params': { + 'format': '4', }, }, { 'url': 'https://www.picta.cu/embed/?v=818', @@ -101,13 +108,20 @@ class PictaEmbedIE(InfoExtractor): _TEST = { 'url': 'https://www.picta.cu/embed/?v=818', 'file': 'Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm', - 'md5': 'ebd10d5a34f23059e08419aa123aebdb', + 'md5': '7ffdeb0043500c4bb660c04e74e90f7a', 'info_dict': { - 'id': '818', + 'id': 'orishas-everyday-2019-01-16-16-36-42-443003', 'ext': 'webm', 'title': 'Orishas - Everyday', 'thumbnail': r're:^https?://.*imagen/img.*\.png$', - } + 'upload_date': '20190116', + 'description': 'Orishas - Everyday (Video Oficial)', + 'uploader': 'admin', + 'timestamp': 1547656602, + }, + 'params': { + 'format': '4', + }, } def _real_extract(self, url): From 3fb09eecd514263576cb81f374f554545c7f40ce Mon Sep 17 00:00:00 2001 From: Oleksis Date: Tue, 26 Feb 2019 21:33:14 -0500 Subject: [PATCH 07/19] [picta] Fix API_BASE_URL --- youtube_dl/extractor/picta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index 0b30f155b..adf798748 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -12,7 +12,7 @@ from .common import InfoExtractor class PictaBaseIE(InfoExtractor): - API_BASE_URL = 'https://www.picta.cu/api/v1/' + API_BASE_URL = 'https://api.picta.cu/api/v1/' def _extract_video(self, video, video_id=None, require_title=True): title = video['results'][0]['nombre'] if require_title else video.get('results')[0].get('nombre') From b5626f19791002fa024ec7e15272d5641375f3ea Mon Sep 17 00:00:00 2001 From: Oleksis Date: Sat, 6 Apr 2019 02:29:01 -0400 Subject: [PATCH 08/19] [picta] Fix KeyError when missing attributes in MPD Get element attribute using element.get('key') instead of element.attrib['index'] manifests like: SegmentList/Initialization@sourceURL SegmentList/SegmentURL@media Example MPD manifest https://www.picta.cu/videos/3bf20311b6564d61a0fd519b3c7dcced/manifest.mpd --- youtube_dl/extractor/common.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c4ea2882f..cec918b77 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2054,8 +2054,8 @@ class InfoExtractor(object): def extract_Initialization(source): initialization = source.find(_add_ns('Initialization')) - if initialization is not None: - ms_info['initialization_url'] = initialization.attrib['sourceURL'] + if initialization is not None and initialization.get('sourceURL') is not None: + ms_info['initialization_url'] = initialization.get('sourceURL') segment_list = element.find(_add_ns('SegmentList')) if segment_list is not None: @@ -2063,7 +2063,9 @@ class InfoExtractor(object): extract_Initialization(segment_list) segment_urls_e = segment_list.findall(_add_ns('SegmentURL')) if segment_urls_e: - ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e] + segment_urls = [segment.get('media') for segment in segment_urls_e if segment.get('media') is not None] + if len(segment_urls) > 0: + ms_info['segment_urls'] = segment_urls else: segment_template = element.find(_add_ns('SegmentTemplate')) if segment_template is not None: From dd00a331ad5d166c3d4ef12361c6291cfa6e6e62 Mon Sep 17 00:00:00 2001 From: Oleksis Date: Sat, 6 Apr 2019 02:31:49 -0400 Subject: [PATCH 09/19] [picta] Fix some Picta DASH video vp09.00.[dd].08 for 'vcodec': 'vp9', 'acodec':'none' --- youtube_dl/extractor/picta.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index adf798748..5d7157beb 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -91,6 +91,11 @@ class PictaIE(PictaBaseIE): # MPD manifest if info.get('manifest_url'): formats.extend(self._extract_mpd_formats(info.get('manifest_url'), video_id, formats_dict=self._formats)) + # Fix some Picta DASH video vp09.00.[dd].08 for 'vcodec': 'vp9', 'acodec':'none' + for f in formats: + if f.get('acodec') is None and f.get('vcodec') == 'none': + f.update({'vcodec':'vp9'}) + f.update({'acodec':'none'}) if not formats: raise ExtractorError('Cannot find video formats') From b9722f7c645f361f2c52cc2c09c46beb75b20a54 Mon Sep 17 00:00:00 2001 From: Oleksis Date: Fri, 19 Apr 2019 00:13:38 -0400 Subject: [PATCH 10/19] [picta] Remove all unrelated changes from PictaExtractor --- youtube_dl/extractor/picta.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index 5d7157beb..3f2ce66b4 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -94,8 +94,8 @@ class PictaIE(PictaBaseIE): # Fix some Picta DASH video vp09.00.[dd].08 for 'vcodec': 'vp9', 'acodec':'none' for f in formats: if f.get('acodec') is None and f.get('vcodec') == 'none': - f.update({'vcodec':'vp9'}) - f.update({'acodec':'none'}) + f.update({'vcodec': 'vp9'}) + f.update({'acodec': 'none'}) if not formats: raise ExtractorError('Cannot find video formats') From 44aa87581891492890bbec2364ec2cb9aefe1365 Mon Sep 17 00:00:00 2001 From: oleksis <“oleksis.fraga@gmail.com”> Date: Mon, 25 May 2020 02:42:40 -0400 Subject: [PATCH 11/19] [picta] Update use api v2 --- youtube_dl/extractor/picta.py | 198 ++++++++++++++++++---------------- 1 file changed, 105 insertions(+), 93 deletions(-) diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index 3f2ce66b4..f88f0d3af 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -1,132 +1,144 @@ # coding: utf-8 from __future__ import unicode_literals from ..compat import compat_str -from ..utils import ( - int_or_none, - unified_timestamp, - try_get, - base_url, - ExtractorError -) +from ..utils import int_or_none, unified_timestamp, try_get, ExtractorError from .common import InfoExtractor class PictaBaseIE(InfoExtractor): - API_BASE_URL = 'https://api.picta.cu/api/v1/' + API_BASE_URL = "https://api.picta.cu/api/v2/" - def _extract_video(self, video, video_id=None, require_title=True): - title = video['results'][0]['nombre'] if require_title else video.get('results')[0].get('nombre') - description = try_get(video, lambda x: x['results'][0]['descripcion'], compat_str) - uploader = try_get(video, lambda x: x['results'][0]['usuario'], compat_str) - add_date = try_get(video, lambda x: x['results'][0]['fecha_creacion']) + @staticmethod + def _extract_video(video, video_id=None, require_title=True): + title = ( + video["results"][0]["nombre"] + if require_title + else video.get("results")[0].get("nombre") + ) + description = try_get( + video, lambda x: x["results"][0]["descripcion"], compat_str + ) + uploader = try_get( + video, lambda x: x["results"][0]["usuario"]["username"], compat_str + ) + add_date = try_get(video, lambda x: x["results"][0]["fecha_creacion"]) timestamp = int_or_none(unified_timestamp(add_date)) - thumbnail = try_get(video, lambda x: x['results'][0]['url_imagen']) - manifest_url = try_get(video, lambda x: x['results'][0]['url_manifiesto']) - category = try_get(video, lambda x: x['results'][0]['canal'], compat_str) + thumbnail = try_get(video, lambda x: x["results"][0]["url_imagen"]) + manifest_url = try_get(video, lambda x: x["results"][0]["url_manifiesto"]) + category = try_get( + video, lambda x: x["results"][0]["canal"]["nombre"], compat_str + ) return { - 'id': try_get(video, lambda x: x['results'][0]['id'], compat_str) or video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'uploader': uploader, - 'timestamp': timestamp, - 'category': [category] if category else None, - 'manifest_url': manifest_url, + "id": try_get(video, lambda x: x["results"][0]["id"], compat_str) + or video_id, + "title": title, + "description": description, + "thumbnail": thumbnail, + "uploader": uploader, + "timestamp": timestamp, + "category": [category] if category else None, + "manifest_url": manifest_url, } class PictaIE(PictaBaseIE): - IE_NAME = 'picta' - IE_DESC = 'Picta videos' - _VALID_URL = r'https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P[\da-z-]+)' - _formats = { - # Dash webm - '0': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'}, - '1': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'}, - '2': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'}, - '3': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'}, - '4': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'}, + IE_NAME = "picta" + IE_DESC = "Picta videos" + _VALID_URL = ( + r"https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P[\da-z-]+)" + ) - # Dash webm audio with opus inside - '5': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 128}, - '6': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 134}, - - } - - _TESTS = [{ - 'url': 'https://www.picta.cu/medias/orishas-everyday-2019-01-16-16-36-42-443003', - 'file': 'Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm', - 'md5': '7ffdeb0043500c4bb660c04e74e90f7a', - 'info_dict': { - 'id': 'orishas-everyday-2019-01-16-16-36-42-443003', - 'ext': 'webm', - 'title': 'Orishas - Everyday', - 'thumbnail': r're:^https?://.*imagen/img.*\.png$', - 'upload_date': '20190116', - 'description': 'Orishas - Everyday (Video Oficial)', - 'uploader': 'admin', - 'timestamp': 1547656602, + _TESTS = [ + { + "url": "https://www.picta.cu/medias/orishas-everyday-2019-01-16-16-36-42-443003", + "file": "Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm", + "md5": "7ffdeb0043500c4bb660c04e74e90f7a", + "info_dict": { + "id": "orishas-everyday-2019-01-16-16-36-42-443003", + "ext": "webm", + "title": "Orishas - Everyday", + "thumbnail": r"re:^https?://.*imagen/img.*\.png$", + "upload_date": "20190116", + "description": "Orishas - Everyday (Video Oficial)", + "uploader": "admin", + "timestamp": 1547656602, + }, + "params": {"format": "4",}, }, - 'params': { - 'format': '4', + { + "url": "https://www.picta.cu/medias/palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", + "file": "Palmiche Galeno tercer lugar en torneo virtual de robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4", + "md5": "6031b7a3add2eade9c5bef7ecf5d4b02", + "info_dict": { + "id": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", + "ext": "mp4", + "title": "Palmiche Galeno tercer lugar en torneo virtual de robótica", + "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", + "upload_date": "20200521", + "description": "En esta emisión:\r\n" + "Iniciará en La Habana nuevo método para medir el consumo " + "eléctrico | https://bit.ly/jtlecturacee\r\n" + "GICAcovid: nueva aplicación web para los centros de " + "aislamiento | https://bit.ly/jtgicacovid\r\n" + "Obtuvo Palmiche tercer lugar en la primera competencia " + "virtual de robótica | https://bit.ly/jtpalmichegaleno\r\n" + "\r\n" + "Síguenos en:\r\n" + "Facebook: http://www.facebook.com/JuventudTecnicaCuba\r\n" + "Twitter e Instagram: @juventudtecnica\r\n" + "Telegram: http://t.me/juventudtecnica", + "uploader": "ernestoguerra21", + "timestamp": 1590077731, + }, }, - }, { - 'url': 'https://www.picta.cu/embed/?v=818', - 'only_matching': True, - }] + {"url": "https://www.picta.cu/embed/?v=818", "only_matching": True,}, + ] def _real_extract(self, url): video_id = self._match_id(url) - if base_url(url).find('medias') != -1: - json_url = self.API_BASE_URL + 'publicacion/?format=json&slug_url=%s&tipo=publicacion' % video_id - else: - json_url = self.API_BASE_URL + 'publicacion/?format=json&id_publicacion=%s&tipo=publicacion' % video_id - - video = self._download_json(json_url, video_id, 'Downloading video JSON') - + json_url = ( + self.API_BASE_URL + "publicacion/?format=json&slug_url_raw=%s" % video_id + ) + video = self._download_json(json_url, video_id, "Downloading video JSON") info = self._extract_video(video, video_id) formats = [] # MPD manifest - if info.get('manifest_url'): - formats.extend(self._extract_mpd_formats(info.get('manifest_url'), video_id, formats_dict=self._formats)) - # Fix some Picta DASH video vp09.00.[dd].08 for 'vcodec': 'vp9', 'acodec':'none' - for f in formats: - if f.get('acodec') is None and f.get('vcodec') == 'none': - f.update({'vcodec': 'vp9'}) - f.update({'acodec': 'none'}) + if info.get("manifest_url"): + formats.extend( + self._extract_mpd_formats(info.get("manifest_url"), video_id) + ) + if not formats: - raise ExtractorError('Cannot find video formats') + raise ExtractorError("Cannot find video formats") self._sort_formats(formats) - info['formats'] = formats + info["formats"] = formats return info class PictaEmbedIE(InfoExtractor): - IE_NAME = 'picta:embed' - IE_DESC = 'Picta embedded videos' - _VALID_URL = r'https?://www\.picta\.cu/embed/\?v=(?P[0-9]+)' + IE_NAME = "picta:embed" + IE_DESC = "Picta embedded videos" + _VALID_URL = r"https?://www\.picta\.cu/embed/(?:\?v=)?(?P[0-9]+)" _TEST = { - 'url': 'https://www.picta.cu/embed/?v=818', - 'file': 'Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm', - 'md5': '7ffdeb0043500c4bb660c04e74e90f7a', - 'info_dict': { - 'id': 'orishas-everyday-2019-01-16-16-36-42-443003', - 'ext': 'webm', - 'title': 'Orishas - Everyday', - 'thumbnail': r're:^https?://.*imagen/img.*\.png$', - 'upload_date': '20190116', - 'description': 'Orishas - Everyday (Video Oficial)', - 'uploader': 'admin', - 'timestamp': 1547656602, - }, - 'params': { - 'format': '4', + "url": "https://www.picta.cu/embed/?v=818", + "file": "Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm", + "md5": "7ffdeb0043500c4bb660c04e74e90f7a", + "info_dict": { + "id": "orishas-everyday-2019-01-16-16-36-42-443003", + "ext": "webm", + "title": "Orishas - Everyday", + "thumbnail": r"re:^https?://.*imagen/img.*\.png$", + "upload_date": "20190116", + "description": "Orishas - Everyday (Video Oficial)", + "uploader": "admin", + "timestamp": 1547656602, }, + "params": {"format": "4",}, } def _real_extract(self, url): From 6f1fa26d0bb16b41c08a7bae05cb57b098c743a6 Mon Sep 17 00:00:00 2001 From: oleksis <“oleksis.fraga@gmail.com”> Date: Mon, 25 May 2020 03:10:59 -0400 Subject: [PATCH 12/19] [picta] Update use api v2 + flake8 --- youtube_dl/extractor/picta.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index f88f0d3af..efb81e7e7 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -64,7 +64,7 @@ class PictaIE(PictaBaseIE): "uploader": "admin", "timestamp": 1547656602, }, - "params": {"format": "4",}, + "params": {"format": "4"}, }, { "url": "https://www.picta.cu/medias/palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", @@ -92,7 +92,7 @@ class PictaIE(PictaBaseIE): "timestamp": 1590077731, }, }, - {"url": "https://www.picta.cu/embed/?v=818", "only_matching": True,}, + {"url": "https://www.picta.cu/embed/?v=818", "only_matching": True}, ] def _real_extract(self, url): @@ -138,7 +138,7 @@ class PictaEmbedIE(InfoExtractor): "uploader": "admin", "timestamp": 1547656602, }, - "params": {"format": "4",}, + "params": {"format": "4"}, } def _real_extract(self, url): From ba4c5a3277e248a3875d09bfd8a35110a5407440 Mon Sep 17 00:00:00 2001 From: oleksis <“oleksis.fraga@gmail.com”> Date: Thu, 28 May 2020 09:20:51 -0400 Subject: [PATCH 13/19] Update extractor/picta.py --- youtube_dl/extractor/picta.py | 62 ++++++++++++++++++++++++++--------- 1 file changed, 47 insertions(+), 15 deletions(-) diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index efb81e7e7..fe8f0189c 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -93,6 +93,10 @@ class PictaIE(PictaBaseIE): }, }, {"url": "https://www.picta.cu/embed/?v=818", "only_matching": True}, + { + "url": "https://www.picta.cu/embed/palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", + "only_matching": True, + }, ] def _real_extract(self, url): @@ -124,22 +128,50 @@ class PictaEmbedIE(InfoExtractor): IE_DESC = "Picta embedded videos" _VALID_URL = r"https?://www\.picta\.cu/embed/(?:\?v=)?(?P[0-9]+)" - _TEST = { - "url": "https://www.picta.cu/embed/?v=818", - "file": "Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm", - "md5": "7ffdeb0043500c4bb660c04e74e90f7a", - "info_dict": { - "id": "orishas-everyday-2019-01-16-16-36-42-443003", - "ext": "webm", - "title": "Orishas - Everyday", - "thumbnail": r"re:^https?://.*imagen/img.*\.png$", - "upload_date": "20190116", - "description": "Orishas - Everyday (Video Oficial)", - "uploader": "admin", - "timestamp": 1547656602, + _TESTS = [ + { + "url": "https://www.picta.cu/embed/?v=818", + "file": "Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm", + "md5": "7ffdeb0043500c4bb660c04e74e90f7a", + "info_dict": { + "id": "orishas-everyday-2019-01-16-16-36-42-443003", + "ext": "webm", + "title": "Orishas - Everyday", + "thumbnail": r"re:^https?://.*imagen/img.*\.png$", + "upload_date": "20190116", + "description": "Orishas - Everyday (Video Oficial)", + "uploader": "admin", + "timestamp": 1547656602, + }, + "params": {"format": "4"}, }, - "params": {"format": "4"}, - } + { + "url": "https://www.picta.cu/embed/palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", + "file": "Palmiche Galeno tercer lugar en torneo virtual de robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4", + "md5": "6031b7a3add2eade9c5bef7ecf5d4b02", + "info_dict": { + "id": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", + "ext": "mp4", + "title": "Palmiche Galeno tercer lugar en torneo virtual de robótica", + "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", + "upload_date": "20200521", + "description": "En esta emisión:\r\n" + "Iniciará en La Habana nuevo método para medir el consumo " + "eléctrico | https://bit.ly/jtlecturacee\r\n" + "GICAcovid: nueva aplicación web para los centros de " + "aislamiento | https://bit.ly/jtgicacovid\r\n" + "Obtuvo Palmiche tercer lugar en la primera competencia " + "virtual de robótica | https://bit.ly/jtpalmichegaleno\r\n" + "\r\n" + "Síguenos en:\r\n" + "Facebook: http://www.facebook.com/JuventudTecnicaCuba\r\n" + "Twitter e Instagram: @juventudtecnica\r\n" + "Telegram: http://t.me/juventudtecnica", + "uploader": "ernestoguerra21", + "timestamp": 1590077731, + }, + }, + ] def _real_extract(self, url): return self.url_result(url, PictaIE.ie_key()) From 51c0a46e9631662a55625fd88284d7b16ec57a79 Mon Sep 17 00:00:00 2001 From: oleksis <“oleksis.fraga@gmail.com”> Date: Sat, 30 May 2020 23:57:21 -0400 Subject: [PATCH 14/19] Update youtube_dl/extractor/picta.py --- youtube_dl/extractor/picta.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index fe8f0189c..960aadad5 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -10,6 +10,9 @@ class PictaBaseIE(InfoExtractor): @staticmethod def _extract_video(video, video_id=None, require_title=True): + if len(video["results"]) == 0: + raise ExtractorError("Cannot find video!") + title = ( video["results"][0]["nombre"] if require_title @@ -156,17 +159,17 @@ class PictaEmbedIE(InfoExtractor): "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", "upload_date": "20200521", "description": "En esta emisión:\r\n" - "Iniciará en La Habana nuevo método para medir el consumo " - "eléctrico | https://bit.ly/jtlecturacee\r\n" - "GICAcovid: nueva aplicación web para los centros de " - "aislamiento | https://bit.ly/jtgicacovid\r\n" - "Obtuvo Palmiche tercer lugar en la primera competencia " - "virtual de robótica | https://bit.ly/jtpalmichegaleno\r\n" - "\r\n" - "Síguenos en:\r\n" - "Facebook: http://www.facebook.com/JuventudTecnicaCuba\r\n" - "Twitter e Instagram: @juventudtecnica\r\n" - "Telegram: http://t.me/juventudtecnica", + "Iniciará en La Habana nuevo método para medir el consumo " + "eléctrico | https://bit.ly/jtlecturacee\r\n" + "GICAcovid: nueva aplicación web para los centros de " + "aislamiento | https://bit.ly/jtgicacovid\r\n" + "Obtuvo Palmiche tercer lugar en la primera competencia " + "virtual de robótica | https://bit.ly/jtpalmichegaleno\r\n" + "\r\n" + "Síguenos en:\r\n" + "Facebook: http://www.facebook.com/JuventudTecnicaCuba\r\n" + "Twitter e Instagram: @juventudtecnica\r\n" + "Telegram: http://t.me/juventudtecnica", "uploader": "ernestoguerra21", "timestamp": 1590077731, }, From 1f034effd8c0afdf860c92469c6b095c358bcdb9 Mon Sep 17 00:00:00 2001 From: oleksis <“oleksis.fraga@gmail.com”> Date: Sat, 11 Jul 2020 16:55:56 -0400 Subject: [PATCH 15/19] Add PictaPlaylist extractor --- .gitignore | 21 ++-- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/picta.py | 157 ++++++++++++++++++++++------- 3 files changed, 134 insertions(+), 46 deletions(-) diff --git a/.gitignore b/.gitignore index c4870a6ba..246f74d90 100644 --- a/.gitignore +++ b/.gitignore @@ -10,13 +10,13 @@ build/ dist/ MANIFEST README.txt -youtube-dl.1 -youtube-dl.bash-completion -youtube-dl.fish -youtube_dl/extractor/lazy_extractors.py -youtube-dl -youtube-dl.exe -youtube-dl.tar.gz +picta-dl.1 +picta-dl.bash-completion +picta-dl.fish +picta_dl/extractor/lazy_extractors.py +picta-dl +picta-dl.exe +picta-dl.tar.gz .coverage cover/ updates_key.pem @@ -40,7 +40,7 @@ updates_key.pem *.swp test/local_parameters.json .tox -youtube-dl.zsh +picta-dl.zsh # IntelliJ related files .idea @@ -48,6 +48,9 @@ youtube-dl.zsh tmp/ venv/ +.env + +*.spec # VS Code related files -.vscode +.vscode \ No newline at end of file diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 7371fdf0f..8b03aef4b 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -840,7 +840,7 @@ from .picarto import ( PicartoIE, PicartoVodIE, ) -from .picta import PictaIE +from .picta import PictaIE, PictaPlaylistIE from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index 960aadad5..7dc4657f5 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -4,9 +4,12 @@ from ..compat import compat_str from ..utils import int_or_none, unified_timestamp, try_get, ExtractorError from .common import InfoExtractor +ROOT_BASE_URL = "https://www.picta.cu/" +API_BASE_URL = "https://api.picta.cu/api/v2/" + +# noinspection PyAbstractClass class PictaBaseIE(InfoExtractor): - API_BASE_URL = "https://api.picta.cu/api/v2/" @staticmethod def _extract_video(video, video_id=None, require_title=True): @@ -29,12 +32,16 @@ class PictaBaseIE(InfoExtractor): thumbnail = try_get(video, lambda x: x["results"][0]["url_imagen"]) manifest_url = try_get(video, lambda x: x["results"][0]["url_manifiesto"]) category = try_get( - video, lambda x: x["results"][0]["canal"]["nombre"], compat_str + video, lambda x: x["results"][0]["categoria"]["tipologia"]["nombre"], compat_str + ) + playlist_channel = ( + video["results"][0]["lista_reproduccion_canal"][0] + if len(video["results"][0]["lista_reproduccion_canal"]) > 0 + else None ) return { - "id": try_get(video, lambda x: x["results"][0]["id"], compat_str) - or video_id, + "id": try_get(video, lambda x: x["results"][0]["id"], compat_str) or video_id, "title": title, "description": description, "thumbnail": thumbnail, @@ -42,9 +49,11 @@ class PictaBaseIE(InfoExtractor): "timestamp": timestamp, "category": [category] if category else None, "manifest_url": manifest_url, + "playlist_channel": playlist_channel } +# noinspection PyAbstractClass class PictaIE(PictaBaseIE): IE_NAME = "picta" IE_DESC = "Picta videos" @@ -70,8 +79,10 @@ class PictaIE(PictaBaseIE): "params": {"format": "4"}, }, { - "url": "https://www.picta.cu/medias/palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", - "file": "Palmiche Galeno tercer lugar en torneo virtual de robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4", + "url": ("https://www.picta.cu/embed/" + "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895"), + "file": ("Palmiche Galeno tercer lugar en torneo virtual de " + "robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4"), "md5": "6031b7a3add2eade9c5bef7ecf5d4b02", "info_dict": { "id": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", @@ -79,36 +90,52 @@ class PictaIE(PictaBaseIE): "title": "Palmiche Galeno tercer lugar en torneo virtual de robótica", "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", "upload_date": "20200521", - "description": "En esta emisión:\r\n" - "Iniciará en La Habana nuevo método para medir el consumo " - "eléctrico | https://bit.ly/jtlecturacee\r\n" - "GICAcovid: nueva aplicación web para los centros de " - "aislamiento | https://bit.ly/jtgicacovid\r\n" - "Obtuvo Palmiche tercer lugar en la primera competencia " - "virtual de robótica | https://bit.ly/jtpalmichegaleno\r\n" - "\r\n" - "Síguenos en:\r\n" - "Facebook: http://www.facebook.com/JuventudTecnicaCuba\r\n" - "Twitter e Instagram: @juventudtecnica\r\n" - "Telegram: http://t.me/juventudtecnica", + "description": ("En esta emisión:\r\n" + "Iniciará en La Habana nuevo método para medir el consumo " + "eléctrico | https://bit.ly/jtlecturacee\r\n" + "GICAcovid: nueva aplicación web para los centros de " + "aislamiento | https://bit.ly/jtgicacovid\r\n" + "Obtuvo Palmiche tercer lugar en la primera competencia " + "virtual de robótica | https://bit.ly/jtpalmichegaleno\r\n" + "\r\n" + "Síguenos en:\r\n" + "Facebook: http://www.facebook.com/JuventudTecnicaCuba\r\n" + "Twitter e Instagram: @juventudtecnica\r\n" + "Telegram: http://t.me/juventudtecnica"), "uploader": "ernestoguerra21", "timestamp": 1590077731, }, }, {"url": "https://www.picta.cu/embed/?v=818", "only_matching": True}, { - "url": "https://www.picta.cu/embed/palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", + "url": ("https://www.picta.cu/embed/" + "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895"), "only_matching": True, }, ] + def _real_initialize(self): + self.playlist_id = None + def _real_extract(self, url): + playlist_id = None video_id = self._match_id(url) - json_url = ( - self.API_BASE_URL + "publicacion/?format=json&slug_url_raw=%s" % video_id - ) + json_url = API_BASE_URL + "publicacion/?format=json&slug_url_raw=%s" % video_id video = self._download_json(json_url, video_id, "Downloading video JSON") info = self._extract_video(video, video_id) + if info["playlist_channel"] and self.playlist_id is None: + playlist_id = info["playlist_channel"].get("id") + self.playlist_id = playlist_id + # Download Playlist (--yes-playlist) in first place + if playlist_id and not self._downloader.params.get('noplaylist'): + self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id) + return self.url_result( + ROOT_BASE_URL + "playlist/" + str(playlist_id), + PictaPlaylistIE.ie_key(), + playlist_id + ) + elif self._downloader.params.get('noplaylist'): + self.to_screen('Downloading just video %s because of --no-playlist' % video_id) formats = [] # MPD manifest @@ -126,6 +153,7 @@ class PictaIE(PictaBaseIE): return info +# noinspection PyAbstractClass class PictaEmbedIE(InfoExtractor): IE_NAME = "picta:embed" IE_DESC = "Picta embedded videos" @@ -149,8 +177,10 @@ class PictaEmbedIE(InfoExtractor): "params": {"format": "4"}, }, { - "url": "https://www.picta.cu/embed/palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", - "file": "Palmiche Galeno tercer lugar en torneo virtual de robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4", + "url": ("https://www.picta.cu/embed/" + "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895"), + "file": ("Palmiche Galeno tercer lugar en torneo virtual de " + "robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4"), "md5": "6031b7a3add2eade9c5bef7ecf5d4b02", "info_dict": { "id": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", @@ -158,18 +188,18 @@ class PictaEmbedIE(InfoExtractor): "title": "Palmiche Galeno tercer lugar en torneo virtual de robótica", "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", "upload_date": "20200521", - "description": "En esta emisión:\r\n" - "Iniciará en La Habana nuevo método para medir el consumo " - "eléctrico | https://bit.ly/jtlecturacee\r\n" - "GICAcovid: nueva aplicación web para los centros de " - "aislamiento | https://bit.ly/jtgicacovid\r\n" - "Obtuvo Palmiche tercer lugar en la primera competencia " - "virtual de robótica | https://bit.ly/jtpalmichegaleno\r\n" - "\r\n" - "Síguenos en:\r\n" - "Facebook: http://www.facebook.com/JuventudTecnicaCuba\r\n" - "Twitter e Instagram: @juventudtecnica\r\n" - "Telegram: http://t.me/juventudtecnica", + "description": ("En esta emisión:\r\n" + "Iniciará en La Habana nuevo método para medir el consumo " + "eléctrico | https://bit.ly/jtlecturacee\r\n" + "GICAcovid: nueva aplicación web para los centros de " + "aislamiento | https://bit.ly/jtgicacovid\r\n" + "Obtuvo Palmiche tercer lugar en la primera competencia " + "virtual de robótica | https://bit.ly/jtpalmichegaleno\r\n" + "\r\n" + "Síguenos en:\r\n" + "Facebook: http://www.facebook.com/JuventudTecnicaCuba\r\n" + "Twitter e Instagram: @juventudtecnica\r\n" + "Telegram: http://t.me/juventudtecnica"), "uploader": "ernestoguerra21", "timestamp": 1590077731, }, @@ -178,3 +208,58 @@ class PictaEmbedIE(InfoExtractor): def _real_extract(self, url): return self.url_result(url, PictaIE.ie_key()) + + +# noinspection PyAbstractClass +class PictaPlaylistIE(InfoExtractor): + API_PLAYLIST_ENDPOINT = API_BASE_URL + "lista_reproduccion_canal/" + IE_NAME = "picta:playlist" + IE_DESC = "Picta playlist videos" + _VALID_URL = r"https?://www\.picta\.cu/playlist/(?P[0-9]+)" + + _TESTS = [ + { + "url": "https://www.picta.cu/playlist/4441", + "info_dict": { + "id": 4441, + "title": "D\u00eda 2: Telecomunicaciones, Redes y Ciberseguridad", + "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", + }, + }, + ] + + @staticmethod + def _extract_playlist(playlist, playlist_id=None, require_title=True): + if len(playlist["results"]) == 0: + raise ExtractorError("Cannot find playlist!") + + title = ( + playlist["results"][0]["nombre"] + if require_title + else playlist.get("results")[0].get("nombre") + ) + thumbnail = try_get(playlist, lambda x: x["results"][0]["url_imagen"]) + entries = try_get(playlist, lambda x: x["results"][0]["publicaciones"]) + + return { + "id": try_get(playlist, lambda x: x["results"][0]["id"], compat_str) or playlist_id, + "title": title, + "thumbnail": thumbnail, + "entries": entries, + } + + def _entries(self, playlist_id): + json_url = self.API_PLAYLIST_ENDPOINT + "?format=json&id=%s" % playlist_id + playlist = self._download_json(json_url, playlist_id, "Downloading playlist JSON") + info_playlist = self._extract_playlist(playlist, playlist_id) + playlist_entries = info_playlist.get("entries") + + for video in playlist_entries: + video_id = video.get("id") + video_url = ROOT_BASE_URL + "medias/" + video.get("slug_url") + yield self.url_result(video_url, PictaIE.ie_key(), video_id) + + def _real_extract(self, url): + playlist_id = self._match_id(url) + entries = self._entries(playlist_id) + return self.playlist_result(entries, playlist_id) From 6d467183786e8b9bdb9d6afdcab96da74a74787e Mon Sep 17 00:00:00 2001 From: oleksis <“oleksis.fraga@gmail.com”> Date: Wed, 12 Aug 2020 19:52:13 -0400 Subject: [PATCH 16/19] Add PictaChannelPlaylistIE, PictaUserPlaylistIE --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/picta.py | 177 ++++++++++++++++++++++++----- 2 files changed, 149 insertions(+), 30 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 8b03aef4b..0948f1c21 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -840,7 +840,7 @@ from .picarto import ( PicartoIE, PicartoVodIE, ) -from .picta import PictaIE, PictaPlaylistIE +from .picta import PictaIE, PictaChannelPlaylistIE, PictaUserPlaylistIE from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index 7dc4657f5..009197d28 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -1,6 +1,10 @@ # coding: utf-8 from __future__ import unicode_literals -from ..compat import compat_str + +from base64 import b64encode +import re + +from ..compat import compat_str, compat_HTTPError from ..utils import int_or_none, unified_timestamp, try_get, ExtractorError from .common import InfoExtractor @@ -24,6 +28,9 @@ class PictaBaseIE(InfoExtractor): description = try_get( video, lambda x: x["results"][0]["descripcion"], compat_str ) + slug_url = try_get( + video, lambda x: x["results"][0]["slug_url"], compat_str + ) uploader = try_get( video, lambda x: x["results"][0]["usuario"]["username"], compat_str ) @@ -43,6 +50,7 @@ class PictaBaseIE(InfoExtractor): return { "id": try_get(video, lambda x: x["results"][0]["id"], compat_str) or video_id, "title": title, + "slug_url": slug_url, "description": description, "thumbnail": thumbnail, "uploader": uploader, @@ -57,9 +65,7 @@ class PictaBaseIE(InfoExtractor): class PictaIE(PictaBaseIE): IE_NAME = "picta" IE_DESC = "Picta videos" - _VALID_URL = ( - r"https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P[\da-z-]+)" - ) + _VALID_URL = r"https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P[\da-z-]+)(?:\?playlist=(?P[\da-z-]+))?" _TESTS = [ { @@ -67,7 +73,8 @@ class PictaIE(PictaBaseIE): "file": "Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm", "md5": "7ffdeb0043500c4bb660c04e74e90f7a", "info_dict": { - "id": "orishas-everyday-2019-01-16-16-36-42-443003", + "id": "818", + "slug_url": "orishas-everyday-2019-01-16-16-36-42-443003", "ext": "webm", "title": "Orishas - Everyday", "thumbnail": r"re:^https?://.*imagen/img.*\.png$", @@ -85,7 +92,8 @@ class PictaIE(PictaBaseIE): "robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4"), "md5": "6031b7a3add2eade9c5bef7ecf5d4b02", "info_dict": { - "id": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", + "id": "3500", + "slug_url": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", "ext": "mp4", "title": "Palmiche Galeno tercer lugar en torneo virtual de robótica", "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", @@ -117,6 +125,14 @@ class PictaIE(PictaBaseIE): def _real_initialize(self): self.playlist_id = None + @classmethod + def _match_playlist_id(cls, url): + if '_VALID_URL_RE' not in cls.__dict__: + cls._VALID_URL_RE = re.compile(cls._VALID_URL) + m = cls._VALID_URL_RE.match(url) + assert m + return compat_str(m.group('playlist_id')) + def _real_extract(self, url): playlist_id = None video_id = self._match_id(url) @@ -130,8 +146,21 @@ class PictaIE(PictaBaseIE): if playlist_id and not self._downloader.params.get('noplaylist'): self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id) return self.url_result( - ROOT_BASE_URL + "playlist/" + str(playlist_id), - PictaPlaylistIE.ie_key(), + ROOT_BASE_URL + "medias/" + video_id + "?" + "playlist=" + compat_str(playlist_id), + PictaChannelPlaylistIE.ie_key(), + playlist_id + ) + elif ( + self.playlist_id is None + and self._match_playlist_id(url) + and not self._downloader.params.get('noplaylist') + ): + playlist_id = compat_str(self._match_playlist_id(url)) + self.playlist_id = playlist_id + self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id) + return self.url_result( + ROOT_BASE_URL + "medias/" + video_id + "?" + "playlist=" + playlist_id, + PictaUserPlaylistIE.ie_key(), playlist_id ) elif self._downloader.params.get('noplaylist'): @@ -157,7 +186,7 @@ class PictaIE(PictaBaseIE): class PictaEmbedIE(InfoExtractor): IE_NAME = "picta:embed" IE_DESC = "Picta embedded videos" - _VALID_URL = r"https?://www\.picta\.cu/embed/(?:\?v=)?(?P[0-9]+)" + _VALID_URL = r"https?://www\.picta\.cu/embed/(?:\?v=)?(?P[\d]+)" _TESTS = [ { @@ -165,7 +194,8 @@ class PictaEmbedIE(InfoExtractor): "file": "Orishas - Everyday-orishas-everyday-2019-01-16-16-36-42-443003.webm", "md5": "7ffdeb0043500c4bb660c04e74e90f7a", "info_dict": { - "id": "orishas-everyday-2019-01-16-16-36-42-443003", + "id": "818", + "slug_url": "orishas-everyday-2019-01-16-16-36-42-443003", "ext": "webm", "title": "Orishas - Everyday", "thumbnail": r"re:^https?://.*imagen/img.*\.png$", @@ -183,7 +213,8 @@ class PictaEmbedIE(InfoExtractor): "robótica-palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895.mp4"), "md5": "6031b7a3add2eade9c5bef7ecf5d4b02", "info_dict": { - "id": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", + "id": "3500", + "slug_url": "palmiche-galeno-tercer-lugar-torneo-virtual-robotica-2020-05-21-16-15-31-431895", "ext": "mp4", "title": "Palmiche Galeno tercer lugar en torneo virtual de robótica", "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", @@ -214,22 +245,37 @@ class PictaEmbedIE(InfoExtractor): class PictaPlaylistIE(InfoExtractor): API_PLAYLIST_ENDPOINT = API_BASE_URL + "lista_reproduccion_canal/" IE_NAME = "picta:playlist" - IE_DESC = "Picta playlist videos" - _VALID_URL = r"https?://www\.picta\.cu/playlist/(?P[0-9]+)" + IE_DESC = "Picta playlist" + _VALID_URL = r"https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P[\da-z-]+)(?:\?playlist=(?P[\da-z-]+))?" - _TESTS = [ - { - "url": "https://www.picta.cu/playlist/4441", - "info_dict": { - "id": 4441, - "title": "D\u00eda 2: Telecomunicaciones, Redes y Ciberseguridad", - "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", - }, - }, - ] + _NETRC_MACHINE = "picta" - @staticmethod - def _extract_playlist(playlist, playlist_id=None, require_title=True): + @classmethod + def _match_playlist_id(cls, url): + if '_VALID_URL_RE' not in cls.__dict__: + cls._VALID_URL_RE = re.compile(cls._VALID_URL) + m = cls._VALID_URL_RE.match(url) + assert m + return compat_str(m.group('playlist_id')) + + def _set_auth_basic(self): + header = {} + username, password = self._get_login_info() + if username is None: + return header + + if isinstance(username, str): + username = username.encode('latin1') + + if isinstance(password, str): + password = password.encode('latin1') + + authstr = "Basic " + compat_str(b64encode(b":".join((username, password))).decode("utf-8")) + + header["Authorization"] = authstr + return header + + def _extract_playlist(self, playlist, playlist_id=None, require_title=True): if len(playlist["results"]) == 0: raise ExtractorError("Cannot find playlist!") @@ -238,7 +284,7 @@ class PictaPlaylistIE(InfoExtractor): if require_title else playlist.get("results")[0].get("nombre") ) - thumbnail = try_get(playlist, lambda x: x["results"][0]["url_imagen"]) + thumbnail = try_get(playlist, lambda x: x["results"][0].get("url_imagen")) entries = try_get(playlist, lambda x: x["results"][0]["publicaciones"]) return { @@ -250,16 +296,89 @@ class PictaPlaylistIE(InfoExtractor): def _entries(self, playlist_id): json_url = self.API_PLAYLIST_ENDPOINT + "?format=json&id=%s" % playlist_id - playlist = self._download_json(json_url, playlist_id, "Downloading playlist JSON") + headers = self._set_auth_basic() + try: + playlist = self._download_json(json_url, playlist_id, "Downloading playlist JSON", headers=headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403,): + raise self.raise_login_required( + msg='This playlist is only available for registered users. Check your username and password' + ) + + raise ExtractorError("Unable to login...") + info_playlist = self._extract_playlist(playlist, playlist_id) playlist_entries = info_playlist.get("entries") for video in playlist_entries: video_id = video.get("id") - video_url = ROOT_BASE_URL + "medias/" + video.get("slug_url") + video_url = ROOT_BASE_URL + "medias/" + video.get("slug_url") + "?" + "playlist=" + compat_str(playlist_id) yield self.url_result(video_url, PictaIE.ie_key(), video_id) def _real_extract(self, url): - playlist_id = self._match_id(url) + playlist_id = self._match_playlist_id(url) entries = self._entries(playlist_id) return self.playlist_result(entries, playlist_id) + + +# noinspection PyAbstractClass +class PictaChannelPlaylistIE(PictaPlaylistIE): + IE_NAME = "picta:channel:playlist" + IE_DESC = "Picta channel playlist" + + _TESTS = [ + { + "url": ("https://www.picta.cu/medias/" + "201-paradigma-devops-implementacion-tecnomatica-2020-07-05-22-44-41-299736?playlist=4441"), + "info_dict": { + "id": 4441, + "title": "D\u00eda 2: Telecomunicaciones, Redes y Ciberseguridad", + "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", + }, + }, + ] + + +# noinspection PyAbstractClass +class PictaUserPlaylistIE(PictaPlaylistIE, PictaBaseIE): + API_PLAYLIST_ENDPOINT = API_BASE_URL + "lista_reproduccion/" + IE_NAME = "picta:user:playlist" + IE_DESC = "Picta user playlist" + + _TESTS = [ + { + "url": "https://www.picta.cu/medias/die-hart-1x01-2020-08-06-18-12-50-310131?playlist=96", + "info_dict": { + "id": 96, + "title": "Die hart", + "thumbnail": None, + }, + }, + ] + + def _extract_playlist(self, playlist, playlist_id=None, require_title=True): + if len(playlist["results"]) == 0: + raise ExtractorError("Cannot find playlist!") + + title = ( + playlist["results"][0]["nombre"] + if require_title + else playlist.get("results")[0].get("nombre") + ) + thumbnail = None + entries = try_get(playlist, lambda x: x["results"][0]["publicacion"]) + + # Playlist User need update slug_url video + for entry in entries: + video_id = entry.get("id") + json_url = API_BASE_URL + "publicacion/?format=json&id=%s" % video_id + video = self._download_json(json_url, video_id, "Downloading video JSON") + info = self._extract_video(video, video_id) + entry["slug_url"] = info.get("slug_url") + + return { + "id": try_get(playlist, lambda x: x["results"][0]["id"], compat_str) or playlist_id, + "title": title, + "thumbnail": thumbnail, + "entries": entries, + } From c672f91e050fe8c1b0373c131cccb9f7104f19b1 Mon Sep 17 00:00:00 2001 From: oleksis <“oleksis.fraga@gmail.com”> Date: Thu, 13 Aug 2020 23:52:30 -0400 Subject: [PATCH 17/19] Improved PictaPlaylist Extractors --- youtube_dl/extractor/picta.py | 75 +++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index 009197d28..d5be2a6ca 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -65,7 +65,8 @@ class PictaBaseIE(InfoExtractor): class PictaIE(PictaBaseIE): IE_NAME = "picta" IE_DESC = "Picta videos" - _VALID_URL = r"https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P[\da-z-]+)(?:\?playlist=(?P[\da-z-]+))?" + _VALID_URL = r"https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P[\da-z-]+)" \ + r"(?:\?playlist=(?P[\da-z-]+))?" _TESTS = [ { @@ -131,7 +132,7 @@ class PictaIE(PictaBaseIE): cls._VALID_URL_RE = re.compile(cls._VALID_URL) m = cls._VALID_URL_RE.match(url) assert m - return compat_str(m.group('playlist_id')) + return m.group('playlist_id') def _real_extract(self, url): playlist_id = None @@ -139,18 +140,15 @@ class PictaIE(PictaBaseIE): json_url = API_BASE_URL + "publicacion/?format=json&slug_url_raw=%s" % video_id video = self._download_json(json_url, video_id, "Downloading video JSON") info = self._extract_video(video, video_id) - if info["playlist_channel"] and self.playlist_id is None: + if ( + info["playlist_channel"] + and self.playlist_id is None + and self._match_playlist_id(url) is None + ): playlist_id = info["playlist_channel"].get("id") self.playlist_id = playlist_id # Download Playlist (--yes-playlist) in first place - if playlist_id and not self._downloader.params.get('noplaylist'): - self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id) - return self.url_result( - ROOT_BASE_URL + "medias/" + video_id + "?" + "playlist=" + compat_str(playlist_id), - PictaChannelPlaylistIE.ie_key(), - playlist_id - ) - elif ( + if ( self.playlist_id is None and self._match_playlist_id(url) and not self._downloader.params.get('noplaylist') @@ -163,6 +161,14 @@ class PictaIE(PictaBaseIE): PictaUserPlaylistIE.ie_key(), playlist_id ) + elif playlist_id and not self._downloader.params.get('noplaylist'): + playlist_id = compat_str(playlist_id) + self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id) + return self.url_result( + ROOT_BASE_URL + "medias/" + video_id + "?" + "playlist=" + playlist_id, + PictaChannelPlaylistIE.ie_key(), + playlist_id + ) elif self._downloader.params.get('noplaylist'): self.to_screen('Downloading just video %s because of --no-playlist' % video_id) @@ -246,7 +252,8 @@ class PictaPlaylistIE(InfoExtractor): API_PLAYLIST_ENDPOINT = API_BASE_URL + "lista_reproduccion_canal/" IE_NAME = "picta:playlist" IE_DESC = "Picta playlist" - _VALID_URL = r"https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P[\da-z-]+)(?:\?playlist=(?P[\da-z-]+))?" + _VALID_URL = r"https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?P[\da-z-]+)" \ + r"\?playlist=(?P[\da-z-]+)$" _NETRC_MACHINE = "picta" @@ -256,7 +263,7 @@ class PictaPlaylistIE(InfoExtractor): cls._VALID_URL_RE = re.compile(cls._VALID_URL) m = cls._VALID_URL_RE.match(url) assert m - return compat_str(m.group('playlist_id')) + return m.group('playlist_id') def _set_auth_basic(self): header = {} @@ -297,22 +304,24 @@ class PictaPlaylistIE(InfoExtractor): def _entries(self, playlist_id): json_url = self.API_PLAYLIST_ENDPOINT + "?format=json&id=%s" % playlist_id headers = self._set_auth_basic() + playlist = {} try: playlist = self._download_json(json_url, playlist_id, "Downloading playlist JSON", headers=headers) + assert playlist.get("count", 0) >= 1 except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403,): raise self.raise_login_required( msg='This playlist is only available for registered users. Check your username and password' ) - - raise ExtractorError("Unable to login...") + except AssertionError: + raise ExtractorError("Playlist no exists!") info_playlist = self._extract_playlist(playlist, playlist_id) playlist_entries = info_playlist.get("entries") for video in playlist_entries: video_id = video.get("id") - video_url = ROOT_BASE_URL + "medias/" + video.get("slug_url") + "?" + "playlist=" + compat_str(playlist_id) + video_url = ROOT_BASE_URL + "medias/" + video.get("slug_url") + "?" + "playlist=" + playlist_id yield self.url_result(video_url, PictaIE.ie_key(), video_id) def _real_extract(self, url): @@ -326,17 +335,15 @@ class PictaChannelPlaylistIE(PictaPlaylistIE): IE_NAME = "picta:channel:playlist" IE_DESC = "Picta channel playlist" - _TESTS = [ - { - "url": ("https://www.picta.cu/medias/" - "201-paradigma-devops-implementacion-tecnomatica-2020-07-05-22-44-41-299736?playlist=4441"), - "info_dict": { - "id": 4441, - "title": "D\u00eda 2: Telecomunicaciones, Redes y Ciberseguridad", - "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", - }, + _TEST_CHANNEL = { + "url": ("https://www.picta.cu/medias/" + "201-paradigma-devops-implementacion-tecnomatica-2020-07-05-22-44-41-299736"), + "info_dict": { + "id": 4441, + "title": "D\u00eda 2: Telecomunicaciones, Redes y Ciberseguridad", + "thumbnail": r"re:^https?://.*imagen/img.*\.jpeg$", }, - ] + } # noinspection PyAbstractClass @@ -345,16 +352,14 @@ class PictaUserPlaylistIE(PictaPlaylistIE, PictaBaseIE): IE_NAME = "picta:user:playlist" IE_DESC = "Picta user playlist" - _TESTS = [ - { - "url": "https://www.picta.cu/medias/die-hart-1x01-2020-08-06-18-12-50-310131?playlist=96", - "info_dict": { - "id": 96, - "title": "Die hart", - "thumbnail": None, - }, + _TEST_USER = { + "url": "https://www.picta.cu/medias/fundamento-big-data-2020-08-09-19-47-15-230297?playlist=129", + "info_dict": { + "id": 129, + "title": "picta-dl", + "thumbnail": None, }, - ] + } def _extract_playlist(self, playlist, playlist_id=None, require_title=True): if len(playlist["results"]) == 0: From d7c566568a277c525f4bfeb1ea12b6c57cff226a Mon Sep 17 00:00:00 2001 From: oleksis <“oleksis.fraga@gmail.com”> Date: Mon, 17 Aug 2020 01:19:59 -0400 Subject: [PATCH 18/19] Extract subtitles if available --- youtube_dl/extractor/picta.py | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index d5be2a6ca..e26d873d9 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -46,6 +46,7 @@ class PictaBaseIE(InfoExtractor): if len(video["results"][0]["lista_reproduccion_canal"]) > 0 else None ) + subtitle_url = try_get(video, lambda x: x["results"][0]["url_subtitulo"]) return { "id": try_get(video, lambda x: x["results"][0]["id"], compat_str) or video_id, @@ -57,7 +58,8 @@ class PictaBaseIE(InfoExtractor): "timestamp": timestamp, "category": [category] if category else None, "manifest_url": manifest_url, - "playlist_channel": playlist_channel + "playlist_channel": playlist_channel, + "subtitle_url": subtitle_url, } @@ -123,6 +125,11 @@ class PictaIE(PictaBaseIE): }, ] + _LANGUAGES_CODES = ['es'] + _LANG_ES = _LANGUAGES_CODES[0] + + _SUBTITLE_FORMATS = ('srt', ) + def _real_initialize(self): self.playlist_id = None @@ -134,6 +141,25 @@ class PictaIE(PictaBaseIE): assert m return m.group('playlist_id') + def _get_subtitles(self, video): + sub_lang_list = {} + lang = self._LANG_ES + + sub_url = video.get('subtitle_url', '') + + if sub_url: + sub_formats = [] + for ext in self._SUBTITLE_FORMATS: + sub_formats.append({ + 'url': sub_url, + 'ext': ext, + }) + sub_lang_list[lang] = sub_formats + if not sub_lang_list: + self._downloader.report_warning('video doesn\'t have subtitles') + return {} + return sub_lang_list + def _real_extract(self, url): playlist_id = None video_id = self._match_id(url) @@ -183,8 +209,11 @@ class PictaIE(PictaBaseIE): raise ExtractorError("Cannot find video formats") self._sort_formats(formats) - info["formats"] = formats + + # subtitles + video_subtitles = self.extract_subtitles(info) + info["subtitles"] = video_subtitles return info From 7da6facfe2bfcdf952d922c6201ec00ab5632234 Mon Sep 17 00:00:00 2001 From: oleksis <“oleksis.fraga@gmail.com”> Date: Mon, 31 Aug 2020 01:18:02 -0400 Subject: [PATCH 19/19] Add movie URL path --- youtube_dl/extractor/picta.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/picta.py b/youtube_dl/extractor/picta.py index e26d873d9..cd6c82654 100644 --- a/youtube_dl/extractor/picta.py +++ b/youtube_dl/extractor/picta.py @@ -67,7 +67,7 @@ class PictaBaseIE(InfoExtractor): class PictaIE(PictaBaseIE): IE_NAME = "picta" IE_DESC = "Picta videos" - _VALID_URL = r"https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?:\?v=)?(?P[\da-z-]+)" \ + _VALID_URL = r"https?://(?:www\.)?picta\.cu/(?:medias|movie|embed)/(?:\?v=)?(?P[\da-z-]+)" \ r"(?:\?playlist=(?P[\da-z-]+))?" _TESTS = [ @@ -117,6 +117,10 @@ class PictaIE(PictaBaseIE): "timestamp": 1590077731, }, }, + { + "url": "https://www.picta.cu/movie/phineas-ferb-pelicula-candace-universo-2020-08-28-21-00-32-857026", + "only_matching": True, + }, {"url": "https://www.picta.cu/embed/?v=818", "only_matching": True}, { "url": ("https://www.picta.cu/embed/" @@ -281,7 +285,7 @@ class PictaPlaylistIE(InfoExtractor): API_PLAYLIST_ENDPOINT = API_BASE_URL + "lista_reproduccion_canal/" IE_NAME = "picta:playlist" IE_DESC = "Picta playlist" - _VALID_URL = r"https?://(?:www\.)?picta\.cu/(?:medias|embed)/(?P[\da-z-]+)" \ + _VALID_URL = r"https?://(?:www\.)?picta\.cu/(?:medias|movie|embed)/(?P[\da-z-]+)" \ r"\?playlist=(?P[\da-z-]+)$" _NETRC_MACHINE = "picta"