From 23a2e126fe68f6ad98e15040078bdefdba953b33 Mon Sep 17 00:00:00 2001 From: thiemo Date: Sun, 25 Mar 2018 16:36:17 +0200 Subject: [PATCH 1/3] [guitartricks] Add new extractor --- youtube_dl/extractor/extractors.py | 1 + youtube_dl/extractor/guitartricks.py | 136 +++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 youtube_dl/extractor/guitartricks.py diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index de48a37ad..a9a674eaf 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -421,6 +421,7 @@ from .googlesearch import GoogleSearchIE from .goshgay import GoshgayIE from .gputechconf import GPUTechConfIE from .groupon import GrouponIE +from .guitartricks import GuitarTricksIE from .hark import HarkIE from .hbo import ( HBOIE, diff --git a/youtube_dl/extractor/guitartricks.py b/youtube_dl/extractor/guitartricks.py new file mode 100644 index 000000000..4cf191fd5 --- /dev/null +++ b/youtube_dl/extractor/guitartricks.py @@ -0,0 +1,136 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from .wistia import WistiaIE +from ..compat import ( + compat_str, + compat_kwargs, +) +from ..utils import ( + clean_html, + ExtractorError, + get_element_by_class, + urlencode_postdata, + urljoin, +) + +class GuitarTricksIE(InfoExtractor): + IE_NAME = 'guitartricks' + _LOGIN_URL = 'https://www.guitartricks.com/login.php' + _ORIGIN_URL = 'https://www.guitartricks.com' + _NETRC_MACHINE = 'guitartricks' + _VALID_URL = r'https?://(?:www\.)?guitartricks\.com/(lesson|course|tutorial).php\?input=(?P[A-Za-z0-9]+.*)' + + _TESTS = [{ + 'url': 'https://www.guitartricks.com/lesson.php?input=21986', + 'md5': '09e89ad6c6a9b85b0471b80230995fcc', + 'info_dict': { + 'id': '5706o2esuo', + 'ext': 'mp4', + 'title': 'Guitar Lessons: Common Models of Guitars', + 'description': 'Let\'s take a look at the most common models of guitars, and learn a bit about what they have in common, as well as what sets them apart from each other. If you have a guitar already, this will help you understand your instrument a little better, and make sure it is the best model for you. If you are still looking for the right model of guitar for you, this should help you make that important decision.\r\n', + 'upload_date': '20160718', + 'timestamp': 1468852802, + }, + }, { + 'url': 'https://www.guitartricks.com/lesson.php?input=21987', + 'md5': '41f156090b82630c17b866f9ea9df854', + 'info_dict': { + 'id': 'nmxkzhog3w', + 'ext': 'mp4', + 'title': 'Guitar Lessons: How to Hold the Acoustic Guitar', + 'description': 'Holding the acoustic guitar properly is super-important to establishing proper angles for your body, arms, and hands. Every little thing can make a difference, from the height and design of your chair, to the length of your guitar strap. Make sure you are holding your acoustic guitar in such a way as to maximize your ability to learn really good playing habits and technique.', + 'upload_date': '20160718', + 'timestamp': 1468852808, + } + }, { + 'url': 'https://www.guitartricks.com/lesson.php?input=24742', + 'md5': '09e89ad6c6a9b85b0471b80230995fcc', + 'info_dict': { + 'id': '5706o2esuo', + 'ext': 'mp4', + }, + }] + + + def _download_webpage(self, *args, **kwargs): + kwargs.setdefault('headers', {})['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4' + return super(GuitarTricksIE, self)._download_webpage( + *args, **compat_kwargs(kwargs)) + + def _real_initialize(self): + self._login() + + def _login(self): + (username, password) = self._get_login_info() + return + + login_popup = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login popup') + + def is_logged(webpage): + return any(re.search(p, webpage) for p in ( + r']+\bhref=["\']/logout.php', + r'>Logout<')) + + # already logged in + if is_logged(login_popup): + return + + + login_form = self._form_hidden_inputs(login_popup) + + login_form.update({ + 'vb_login_username': username, + 'vb_login_password': password, + }) + + + response = self._download_webpage( + self._LOGIN_URL, None, 'Logging in', + data=urlencode_postdata(login_form), + headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + 'Referer': self._ORIGIN_URL, + 'Origin': self._ORIGIN_URL, + }) + + + if not is_logged(response): + error = self._html_search_regex( + r'(?s)]+class="form-errors[^"]*">(.+?)', + response, 'error message', default=None) + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) + raise ExtractorError('Unable to log in') + + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + + if re.search(r'Full Access Members Only', webpage): + self.raise_login_required('Full Access Members Only') + + wistia_url = WistiaIE._extract_url(webpage) + if not wistia_url: + if any(re.search(p, webpage) for p in ( + r'Full Access Members Only')): + self.raise_login_required('Lecture contents locked') + + title = self._og_search_title(webpage, default=None) + + return { + 'id': video_id, + '_type': 'url_transparent', + 'url': wistia_url, + 'ie_key': WistiaIE.ie_key(), + 'title': title, + 'description': self._og_search_description(webpage), + # TODO more properties (see youtube_dl/extractor/common.py) + } \ No newline at end of file From dd1b8f399150bf25547bda224512b1a8e0206bfd Mon Sep 17 00:00:00 2001 From: Thiemo Gamma Date: Sun, 25 Mar 2018 16:40:56 +0200 Subject: [PATCH 2/3] Update extractors.py --- youtube_dl/extractor/extractors.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index a9a674eaf..6eeaab7f8 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -422,6 +422,7 @@ from .goshgay import GoshgayIE from .gputechconf import GPUTechConfIE from .groupon import GrouponIE from .guitartricks import GuitarTricksIE + from .hark import HarkIE from .hbo import ( HBOIE, From 9b6f3a25d6dc65557f00451b3dadc9384c029495 Mon Sep 17 00:00:00 2001 From: thiemo Date: Thu, 29 Mar 2018 00:59:59 +0200 Subject: [PATCH 3/3] =?UTF-8?q?[guitartricks]=20login=20process=20fixed=20?= =?UTF-8?q?=F0=9F=92=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- youtube_dl/extractor/guitartricks.py | 104 ++++++++++++++------------- 1 file changed, 55 insertions(+), 49 deletions(-) diff --git a/youtube_dl/extractor/guitartricks.py b/youtube_dl/extractor/guitartricks.py index 4cf191fd5..97c436102 100644 --- a/youtube_dl/extractor/guitartricks.py +++ b/youtube_dl/extractor/guitartricks.py @@ -7,7 +7,7 @@ from .common import InfoExtractor from .wistia import WistiaIE from ..compat import ( compat_str, - compat_kwargs, + compat_HTTPError, ) from ..utils import ( clean_html, @@ -23,6 +23,7 @@ class GuitarTricksIE(InfoExtractor): _ORIGIN_URL = 'https://www.guitartricks.com' _NETRC_MACHINE = 'guitartricks' _VALID_URL = r'https?://(?:www\.)?guitartricks\.com/(lesson|course|tutorial).php\?input=(?P[A-Za-z0-9]+.*)' + _LOGIN_REQUIRED = False _TESTS = [{ 'url': 'https://www.guitartricks.com/lesson.php?input=21986', @@ -48,80 +49,86 @@ class GuitarTricksIE(InfoExtractor): } }, { 'url': 'https://www.guitartricks.com/lesson.php?input=24742', - 'md5': '09e89ad6c6a9b85b0471b80230995fcc', + 'md5': '98074943181f87361f16085b0370717d', 'info_dict': { - 'id': '5706o2esuo', + 'id': '4h7kvsarcg', 'ext': 'mp4', + 'title': 'Guitar Lessons: The \'Magic L\', in Reverse!', + 'description': "Let's flip the Magic L on it's head and open up a whole new set of options for playing in a variety of keys.\r\n\r\nThe Reverse Magic L is like the \u2018Magic L\u2019 you already learned, only it is turned upside down! \r\n\r\nThe 1 chord is played as a 5th-string power chord, and the IV and the V chords will fall into place as 6th string power chords, in an upside-down L shape. \r\n\r\nListen to how these three chords sound intuitively right together. \r\n", + 'upload_date': '20160718', + 'timestamp': 1468854273, }, }] - - def _download_webpage(self, *args, **kwargs): - kwargs.setdefault('headers', {})['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4' - return super(GuitarTricksIE, self)._download_webpage( - *args, **compat_kwargs(kwargs)) - def _real_initialize(self): self._login() def _login(self): (username, password) = self._get_login_info() - return - - login_popup = self._download_webpage( - self._LOGIN_URL, None, 'Downloading login popup') - - def is_logged(webpage): - return any(re.search(p, webpage) for p in ( - r']+\bhref=["\']/logout.php', - r'>Logout<')) - - # already logged in - if is_logged(login_popup): + if username is None: + if self._LOGIN_REQUIRED: + raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) return + def fail(message): + raise ExtractorError( + 'Unable to login. GuitarTricks said: %s' % message, expected=True) - login_form = self._form_hidden_inputs(login_popup) + def login_step(page, urlh, note, data): + form = self._hidden_inputs(page) + form.update(data) - login_form.update({ - 'vb_login_username': username, - 'vb_login_password': password, - }) + page_url = urlh.geturl() + post_url = '/process/loginAjax' + post_url = urljoin(page_url, post_url) + headers = {'Referer': page_url} - response = self._download_webpage( - self._LOGIN_URL, None, 'Logging in', - data=urlencode_postdata(login_form), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded', - 'Referer': self._ORIGIN_URL, - 'Origin': self._ORIGIN_URL, + try: + response = self._download_json( + post_url, None, note, + data=urlencode_postdata(form), + headers=headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + response = self._parse_json( + e.cause.read().decode('utf-8'), None) + fail(response.get('message') or response['errors'][0]) + raise + + if response.get('status'): + return None, None + else: + fail(response.get('error')) + + redirect_url = 'https://www.guitartricks.com/main.php' + return self._download_webpage_handle( + redirect_url, None, 'Downloading login redirect page', + headers=headers) + + login_page, handle= self._download_webpage_handle( + self._LOGIN_URL, None, 'Downloading login page') + + redirect_page, handle = login_step( + login_page, handle, 'Logging in', { + 'login': username, + 'password': password, + 'action': 'verify_login', }) - - if not is_logged(response): - error = self._html_search_regex( - r'(?s)]+class="form-errors[^"]*">(.+?)', - response, 'error message', default=None) - if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) - raise ExtractorError('Unable to log in') + # Successful login + if not redirect_page: + return - def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - if re.search(r'Full Access Members Only', webpage): - self.raise_login_required('Full Access Members Only') + raise ExtractorError('Video only available with premium plan', expected=True) wistia_url = WistiaIE._extract_url(webpage) - if not wistia_url: - if any(re.search(p, webpage) for p in ( - r'Full Access Members Only')): - self.raise_login_required('Lecture contents locked') title = self._og_search_title(webpage, default=None) @@ -132,5 +139,4 @@ class GuitarTricksIE(InfoExtractor): 'ie_key': WistiaIE.ie_key(), 'title': title, 'description': self._og_search_description(webpage), - # TODO more properties (see youtube_dl/extractor/common.py) } \ No newline at end of file