From 74ee98e39d840b7323e906c0052a5a164df09a8a Mon Sep 17 00:00:00 2001 From: Martin Hartkorn Date: Sat, 16 Sep 2017 13:50:49 +0200 Subject: [PATCH 1/8] Added extractor for pietsmiet.de --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/pietsmiet.py | 79 ++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 youtube_dl/extractor/pietsmiet.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 6fb65e4fe..cd2c77e12 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -818,6 +818,7 @@ from .picarto import ( PicartoIE, PicartoVodIE, ) +from .pietsmiet import PietsmietIE from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pladform import PladformIE diff --git a/youtube_dl/extractor/pietsmiet.py b/youtube_dl/extractor/pietsmiet.py new file mode 100644 index 000000000..f39bef0d5 --- /dev/null +++ b/youtube_dl/extractor/pietsmiet.py @@ -0,0 +1,79 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +from .once import OnceIE +from ..compat import ( + compat_urllib_parse_unquote, +) +from ..utils import ( + unescapeHTML, + js_to_json, + int_or_none, +) + + +class PietsmietIE(OnceIE): + _VALID_URL = r'https?://(?:www\.)?pietsmiet\.de/gallery/categories/[\w-]+/(?P\d+)-.*/?' + _TEST = { + 'url': 'http://www.pietsmiet.de/gallery/categories/8-frag-pietsmiet/29844-fps-912', + 'info_dict': { + 'id': '29844', + 'ext': 'mp4', + 'title': 'Was würdet ihr die Maus fragen? 🎮 Frag PietSmiet #912', + }, + 'params': { + 'skip_download': True, # m3u8 downloads + }, + } + + def _real_extract(self, url): + page_id = self._match_id(url) + webpage = self._download_webpage(url, page_id) + data_video_config = self._search_regex( + r'var config=(.*?);var', webpage, 'video config') + data_video = self._parse_json(js_to_json(unescapeHTML(data_video_config)), page_id) + + formats = [] + + m3u8_manifest_url = data_video['sources'][0]['file'] + m3u8_formats = self._extract_m3u8_formats( + m3u8_manifest_url, page_id, 'mp4', 'm3u8_native', + m3u8_id='hls') + + # Give reproducible names for HLS formats instead of hls- + for f in m3u8_formats: + f['format_id'] = 'hls-{}p'.format(f['height']) + + formats.extend(m3u8_formats) + + if len(data_video['sources']) > 1: + http_video = data_video['sources'][1] + + # Calculate resolution for HTTP format but should always be 1280x720 + format_height_raw = self._search_regex( + '([0-9]+)p', http_video['label'], 'http video height', + default=720, fatal=False) + format_height = int_or_none(format_height_raw) + + if format_height: + format_width = float(format_height) * (16 / 9) + + formats.append({ + 'url': "https:{}".format(http_video['file']), + 'ext': http_video['type'], + 'format_id': 'http-{}'.format(http_video['label']), + 'width': int_or_none(format_width), + 'height': format_height, + 'fps': 30.0, + }) + + self._sort_formats(formats) + + return { + 'id': page_id, + 'display_id': page_id, + 'title': compat_urllib_parse_unquote(data_video['abouttext']), + 'formats': formats, + 'thumbnail': 'http://www.pietsmiet.de/{}'.format(data_video.get('image')), + } From 0e68d908a9ae29f02517910ba66293d7db5bc251 Mon Sep 17 00:00:00 2001 From: Martin Hartkorn Date: Sun, 17 Sep 2017 11:56:46 +0200 Subject: [PATCH 2/8] Some fixes --- youtube_dl/extractor/pietsmiet.py | 39 +++++++++++++++++-------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/pietsmiet.py b/youtube_dl/extractor/pietsmiet.py index f39bef0d5..99a3c32fc 100644 --- a/youtube_dl/extractor/pietsmiet.py +++ b/youtube_dl/extractor/pietsmiet.py @@ -41,30 +41,35 @@ class PietsmietIE(OnceIE): m3u8_manifest_url, page_id, 'mp4', 'm3u8_native', m3u8_id='hls') - # Give reproducible names for HLS formats instead of hls- - for f in m3u8_formats: - f['format_id'] = 'hls-{}p'.format(f['height']) - formats.extend(m3u8_formats) if len(data_video['sources']) > 1: http_video = data_video['sources'][1] - # Calculate resolution for HTTP format but should always be 1280x720 - format_height_raw = self._search_regex( - '([0-9]+)p', http_video['label'], 'http video height', - default=720, fatal=False) - format_height = int_or_none(format_height_raw) + label = http_video.get('label') - if format_height: - format_width = float(format_height) * (16 / 9) + if label: + # Calculate resolution for HTTP format but should always be 1280x720 + format_height_raw = self._search_regex( + '([0-9]+)p', label, 'http video height', + default=720, fatal=False) + format_height = int_or_none(format_height_raw) + if format_height: + format_width = float(format_height) * (16 / 9) + + formats.append({ + 'url': "https:{0}".format(http_video['file']), + 'ext': http_video.get('type'), + 'format_id': 'http-{0}'.format(label), + 'width': int_or_none(format_width), + 'height': format_height, + 'fps': 30.0, + }) + else: formats.append({ - 'url': "https:{}".format(http_video['file']), - 'ext': http_video['type'], - 'format_id': 'http-{}'.format(http_video['label']), - 'width': int_or_none(format_width), - 'height': format_height, + 'url': "https:{0}".format(http_video['file']), + 'ext': http_video.get('type'), 'fps': 30.0, }) @@ -75,5 +80,5 @@ class PietsmietIE(OnceIE): 'display_id': page_id, 'title': compat_urllib_parse_unquote(data_video['abouttext']), 'formats': formats, - 'thumbnail': 'http://www.pietsmiet.de/{}'.format(data_video.get('image')), + 'thumbnail': 'http://www.pietsmiet.de/{0}'.format(data_video.get('image')), } From 3a28d8dcda5bb6a7669275b68d3b90a8c41b5fdd Mon Sep 17 00:00:00 2001 From: Martin Hartkorn Date: Fri, 13 Oct 2017 19:44:27 +0200 Subject: [PATCH 3/8] Allow formats with labels that don't match regex ([0-9]+)p --- youtube_dl/extractor/pietsmiet.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/youtube_dl/extractor/pietsmiet.py b/youtube_dl/extractor/pietsmiet.py index 99a3c32fc..3e0b1663c 100644 --- a/youtube_dl/extractor/pietsmiet.py +++ b/youtube_dl/extractor/pietsmiet.py @@ -47,6 +47,7 @@ class PietsmietIE(OnceIE): http_video = data_video['sources'][1] label = http_video.get('label') + format_height = 0 if label: # Calculate resolution for HTTP format but should always be 1280x720 @@ -55,17 +56,17 @@ class PietsmietIE(OnceIE): default=720, fatal=False) format_height = int_or_none(format_height_raw) - if format_height: - format_width = float(format_height) * (16 / 9) + if format_height > 0: + format_width = float(format_height) * (16 / 9) - formats.append({ - 'url': "https:{0}".format(http_video['file']), - 'ext': http_video.get('type'), - 'format_id': 'http-{0}'.format(label), - 'width': int_or_none(format_width), - 'height': format_height, - 'fps': 30.0, - }) + formats.append({ + 'url': "https:{0}".format(http_video['file']), + 'ext': http_video.get('type'), + 'format_id': 'http-{0}'.format(label), + 'width': int_or_none(format_width), + 'height': format_height, + 'fps': 30.0, + }) else: formats.append({ 'url': "https:{0}".format(http_video['file']), From 99a61908b81b44804e8ef1b3a1aa8769957dceb5 Mon Sep 17 00:00:00 2001 From: Martin Hartkorn Date: Tue, 31 Oct 2017 16:36:47 +0100 Subject: [PATCH 4/8] Fixes to match pattern to changed website --- youtube_dl/extractor/pietsmiet.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pietsmiet.py b/youtube_dl/extractor/pietsmiet.py index 3e0b1663c..edccb473f 100644 --- a/youtube_dl/extractor/pietsmiet.py +++ b/youtube_dl/extractor/pietsmiet.py @@ -2,6 +2,8 @@ from __future__ import unicode_literals +import re + from .once import OnceIE from ..compat import ( compat_urllib_parse_unquote, @@ -31,7 +33,8 @@ class PietsmietIE(OnceIE): page_id = self._match_id(url) webpage = self._download_webpage(url, page_id) data_video_config = self._search_regex( - r'var config=(.*?);var', webpage, 'video config') + r'var config =(.*?)\};\n', webpage, 'video config', flags=re.DOTALL) + data_video_config = data_video_config.replace(']', '],', 1) + '}' data_video = self._parse_json(js_to_json(unescapeHTML(data_video_config)), page_id) formats = [] From 4c685cbdde9c173e58767c866a7190e8917499c2 Mon Sep 17 00:00:00 2001 From: Martin Hartkorn Date: Sat, 10 Feb 2018 12:11:47 +0100 Subject: [PATCH 5/8] Minor refactoring --- youtube_dl/extractor/pietsmiet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pietsmiet.py b/youtube_dl/extractor/pietsmiet.py index edccb473f..9d9918492 100644 --- a/youtube_dl/extractor/pietsmiet.py +++ b/youtube_dl/extractor/pietsmiet.py @@ -63,9 +63,9 @@ class PietsmietIE(OnceIE): format_width = float(format_height) * (16 / 9) formats.append({ + 'format_id': 'http-{0}'.format(label), 'url': "https:{0}".format(http_video['file']), 'ext': http_video.get('type'), - 'format_id': 'http-{0}'.format(label), 'width': int_or_none(format_width), 'height': format_height, 'fps': 30.0, From 29b076ad6c1cf7fc6ded3e8d1f2d7e3b8cd6d8c5 Mon Sep 17 00:00:00 2001 From: Michael Date: Thu, 26 Apr 2018 16:14:03 +0200 Subject: [PATCH 6/8] Add support for videos with playlist in their url --- youtube_dl/extractor/pietsmiet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/pietsmiet.py b/youtube_dl/extractor/pietsmiet.py index 9d9918492..6f8d19fb8 100644 --- a/youtube_dl/extractor/pietsmiet.py +++ b/youtube_dl/extractor/pietsmiet.py @@ -16,7 +16,7 @@ from ..utils import ( class PietsmietIE(OnceIE): - _VALID_URL = r'https?://(?:www\.)?pietsmiet\.de/gallery/categories/[\w-]+/(?P\d+)-.*/?' + _VALID_URL = r'https?://(?:www\.)?pietsmiet\.de/gallery/(categories|playlists)/[\w-]+/(?P\d+)-.*/?' _TEST = { 'url': 'http://www.pietsmiet.de/gallery/categories/8-frag-pietsmiet/29844-fps-912', 'info_dict': { From f2c7a7f1b101fcfe2e9fca0382eee28a53216597 Mon Sep 17 00:00:00 2001 From: Martin Hartkorn Date: Fri, 8 Jun 2018 12:08:45 +0200 Subject: [PATCH 7/8] Added rules for legacy videos that don't use m3u8 files --- youtube_dl/extractor/pietsmiet.py | 63 ++++++++++++++++++------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/youtube_dl/extractor/pietsmiet.py b/youtube_dl/extractor/pietsmiet.py index 6f8d19fb8..a6586a6bc 100644 --- a/youtube_dl/extractor/pietsmiet.py +++ b/youtube_dl/extractor/pietsmiet.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals import re -from .once import OnceIE +from .common import InfoExtractor from ..compat import ( compat_urllib_parse_unquote, ) @@ -15,19 +15,31 @@ from ..utils import ( ) -class PietsmietIE(OnceIE): +class PietsmietIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pietsmiet\.de/gallery/(categories|playlists)/[\w-]+/(?P\d+)-.*/?' - _TEST = { - 'url': 'http://www.pietsmiet.de/gallery/categories/8-frag-pietsmiet/29844-fps-912', - 'info_dict': { - 'id': '29844', - 'ext': 'mp4', - 'title': 'Was würdet ihr die Maus fragen? 🎮 Frag PietSmiet #912', - }, - 'params': { - 'skip_download': True, # m3u8 downloads - }, - } + _TESTS = [ + { + 'url': 'https://www.pietsmiet.de/gallery/categories/8-frag-pietsmiet/29844-fps-912', + 'info_dict': { + 'id': '29844', + 'ext': 'mp4', + 'title': 'Was würdet ihr die Maus fragen? 🎮 Frag PietSmiet #912', + }, + 'params': { + 'skip_download': True, # m3u8 downloads + } + }, { + 'url': 'https://www.pietsmiet.de/gallery/playlists/646-metal-gear-solid-1/19804-metal-gear-solid-1-sniper-wolf-rematch', + 'info_dict': { + 'id': '19804', + 'ext': 'mp4', + 'title': 'SNIPER WOLF REMATCH 🎮 Metal Gear Solid #9' + }, + 'params': { + 'skip_download': True, # m3u8 downloads + } + } + ] def _real_extract(self, url): page_id = self._match_id(url) @@ -39,17 +51,16 @@ class PietsmietIE(OnceIE): formats = [] - m3u8_manifest_url = data_video['sources'][0]['file'] - m3u8_formats = self._extract_m3u8_formats( - m3u8_manifest_url, page_id, 'mp4', 'm3u8_native', - m3u8_id='hls') + m3u8_manifest_urls = filter(lambda x: x['file'].endswith('m3u8'), data_video['sources']) + for f in m3u8_manifest_urls: + m3u8_formats = self._extract_m3u8_formats( + f['file'], page_id, 'mp4', 'm3u8_native', m3u8_id='hls') - formats.extend(m3u8_formats) + formats.extend(m3u8_formats) - if len(data_video['sources']) > 1: - http_video = data_video['sources'][1] - - label = http_video.get('label') + mp4_urls = filter(lambda x: not x['file'].endswith('m3u8'), data_video['sources']) + for m in mp4_urls: + label = m.get('label') format_height = 0 if label: @@ -64,16 +75,16 @@ class PietsmietIE(OnceIE): formats.append({ 'format_id': 'http-{0}'.format(label), - 'url': "https:{0}".format(http_video['file']), - 'ext': http_video.get('type'), + 'url': "https:{0}".format(m['file']), + 'ext': m.get('type'), 'width': int_or_none(format_width), 'height': format_height, 'fps': 30.0, }) else: formats.append({ - 'url': "https:{0}".format(http_video['file']), - 'ext': http_video.get('type'), + 'url': "https:{0}".format(m['file']), + 'ext': m.get('type'), 'fps': 30.0, }) From 97f828ce1e7209273a78390ae4f159dc9283fefe Mon Sep 17 00:00:00 2001 From: Martin Hartkorn Date: Sun, 8 Jul 2018 15:30:46 +0200 Subject: [PATCH 8/8] More refactoring for PietsmietIE --- youtube_dl/extractor/pietsmiet.py | 76 ++++++++++++++++--------------- 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/youtube_dl/extractor/pietsmiet.py b/youtube_dl/extractor/pietsmiet.py index a6586a6bc..c1805d5fb 100644 --- a/youtube_dl/extractor/pietsmiet.py +++ b/youtube_dl/extractor/pietsmiet.py @@ -16,7 +16,7 @@ from ..utils import ( class PietsmietIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pietsmiet\.de/gallery/(categories|playlists)/[\w-]+/(?P\d+)-.*/?' + _VALID_URL = r'https?://(?:www\.)?pietsmiet\.de/gallery/(categories|playlists)/[\w-]+/(?P\d+)-' _TESTS = [ { 'url': 'https://www.pietsmiet.de/gallery/categories/8-frag-pietsmiet/29844-fps-912', @@ -49,51 +49,53 @@ class PietsmietIE(InfoExtractor): data_video_config = data_video_config.replace(']', '],', 1) + '}' data_video = self._parse_json(js_to_json(unescapeHTML(data_video_config)), page_id) + title = compat_urllib_parse_unquote(data_video['abouttext']) + formats = [] - m3u8_manifest_urls = filter(lambda x: x['file'].endswith('m3u8'), data_video['sources']) - for f in m3u8_manifest_urls: - m3u8_formats = self._extract_m3u8_formats( - f['file'], page_id, 'mp4', 'm3u8_native', m3u8_id='hls') - - formats.extend(m3u8_formats) - - mp4_urls = filter(lambda x: not x['file'].endswith('m3u8'), data_video['sources']) - for m in mp4_urls: - label = m.get('label') - format_height = 0 - - if label: - # Calculate resolution for HTTP format but should always be 1280x720 - format_height_raw = self._search_regex( - '([0-9]+)p', label, 'http video height', - default=720, fatal=False) - format_height = int_or_none(format_height_raw) - - if format_height > 0: - format_width = float(format_height) * (16 / 9) - - formats.append({ - 'format_id': 'http-{0}'.format(label), - 'url': "https:{0}".format(m['file']), - 'ext': m.get('type'), - 'width': int_or_none(format_width), - 'height': format_height, - 'fps': 30.0, - }) + for src in data_video['sources']: + if src['file'].endswith('m3u8'): + # HLS format + m3u8_formats = self._extract_m3u8_formats( + src['file'], page_id, 'mp4', 'm3u8_native', m3u8_id='hls') + formats.extend(m3u8_formats) else: - formats.append({ - 'url': "https:{0}".format(m['file']), - 'ext': m.get('type'), - 'fps': 30.0, - }) + # Standard mp4 + label = src.get('label') + format_height = 0 + + if label: + # Calculate resolution for HTTP format. Should always be 1280x720 + # for newer videos but older videos don't have HLS for all resolutions + format_height_raw = self._search_regex( + '([0-9]+)p', label, 'http video height', + default=720) + format_height = int_or_none(format_height_raw) + + if format_height > 0: + format_width = float(format_height) * (16 / 9) + + formats.append({ + 'format_id': 'http-{0}'.format(label), + 'url': "https:{0}".format(src['file']), + 'ext': src.get('type'), + 'width': int_or_none(format_width), + 'height': format_height, + 'fps': 30.0, + }) + else: + formats.append({ + 'url': 'https:{0}'.format(src['file']), + 'ext': src.get('type'), + 'fps': 30.0, + }) self._sort_formats(formats) return { 'id': page_id, 'display_id': page_id, - 'title': compat_urllib_parse_unquote(data_video['abouttext']), + 'title': title, 'formats': formats, 'thumbnail': 'http://www.pietsmiet.de/{0}'.format(data_video.get('image')), }