From 38592b0123754212305c67433ad28f8df64942aa Mon Sep 17 00:00:00 2001 From: bato3 Date: Tue, 31 Jul 2018 03:30:11 +0200 Subject: [PATCH 1/3] Logowanie przez clouda --- youtube_dl/extractor/crunchyroll.py | 40 +++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 463f995c7..fc1210079 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -52,28 +52,51 @@ class CrunchyrollBaseIE(InfoExtractor): username, password = self._get_login_info() if username is None: return + ''' + import cfscrape - self._download_webpage( + proxies = {"http": self._downloader.params.get('proxy'), "https": self._downloader.params.get('proxy')} + tokens, user_agent = cfscrape.get_tokens(self._LOGIN_URL, proxies=proxies, user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0") + + self._set_cookie( '.crunchyroll.com', 'cf_clearance',tokens['cf_clearance']) + self._set_cookie( '.crunchyroll.com', '__cfduid',tokens['__cfduid']) + ''' + login_page = self._download_webpage( 'https://www.crunchyroll.com/?a=formhandler', None, 'Logging in', 'Wrong login info', data=urlencode_postdata({ 'formname': 'RpcApiUser_Login', 'next_url': 'https://www.crunchyroll.com/acct/membership', + 'fail_url': self._LOGIN_URL, 'name': username, 'password': password, - })) - - ''' - login_page = self._download_webpage( - self._LOGIN_URL, None, 'Downloading login page') + }), expected_status=503) def is_logged(webpage): - return 'Redirecting' in webpage + return '<title>Redirecting' in webpage or '/logout' in webpage # Already logged in if is_logged(login_page): return + + ''' + print [tokens, user_agent] + + + form_data = self._form_hidden_inputs('challenge-form', login_page) + form_data['jschl_answer'] = self.solve_challenge(login_page, 'www.crunchyroll.com') + print form_data + self._sleep(6, None, 'Solving CloudFlare Challenge') + login_page = self._download_webpage('https://www.crunchyroll.com/cdn-cgi/l/chk_jschl', None, 'Login Form', data=urlencode_postdata(form_data), headers={ + 'Referer': self._LOGIN_URL, + }, expected_status= 503) + + import codecs + with codecs.open("yop", "w", encoding="utf-8") as f: + f.write(login_page) + ''' + login_form_str = self._search_regex( r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM, login_page, 'login form', group='form') @@ -107,7 +130,7 @@ class CrunchyrollBaseIE(InfoExtractor): raise ExtractorError('Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to log in') - ''' + def _real_initialize(self): self._login() @@ -123,6 +146,7 @@ class CrunchyrollBaseIE(InfoExtractor): # Crunchyroll to not work in georestriction cases in some browsers that don't place # the locale lang first in header. However allowing any language seems to workaround the issue. request.add_header('Accept-Language', '*') + request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0') return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs) @staticmethod From 1e77c43688fa2d4c3a8c1889c9ab22a3eb1c96fb Mon Sep 17 00:00:00 2001 From: bato3 <bato3@bandyci.org> Date: Tue, 31 Jul 2018 12:53:39 +0200 Subject: [PATCH 2/3] Login when is present cloudflare challenge --- youtube_dl/extractor/common.py | 58 +++++++++++++++++++++++++++++ youtube_dl/extractor/crunchyroll.py | 41 ++++++-------------- youtube_dl/extractor/openload.py | 4 +- 3 files changed, 71 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index b8bbaf81a..61ca7275c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2818,6 +2818,64 @@ class InfoExtractor(object): def _generic_title(self, url): return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]) + def _cf_solve_challenge(self, body, domain): + ''' + Solve CloudFlrae Callenge. + @param <String> domain result `ompat_urlparse.urlparse().netloc` + Oryginal code from :https://github.com/Anorov/cloudflare-scrape/blob/master/cfscrape/__init__.py#L112-L149 + ''' + try: + js = re.search(r"setTimeout\(function\(\){\s+(var s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n", body).group(1) + except Exception: + raise ValueError("Unable to identify Cloudflare IUAM Javascript on website.") + + js = re.sub(r"a\.value = (.+ \+ t\.length).+", r"\1", js) + js = re.sub(r"\s{3,}[a-z](?: = |\.).+", "", js).replace("t.length", str(len(domain))) + + # Strip characters that could be used to exit the string context + # These characters are not currently used in Cloudflare's arithmetic snippet + js = re.sub(r"[\n\\']", "", js) + + if "toFixed" not in js: + raise ValueError("Error parsing Cloudflare IUAM Javascript challenge.") + + # Use vm.runInNewContext to safely evaluate code + # The sandboxed code cannot use the Node.js standard library + js = "console.log(require('vm').runInNewContext('%s', Object.create(null), {timeout: 5000}));" % js + + import subprocess + try: + result = subprocess.check_output(["node", "-e", js]).strip() + except OSError as e: + if e.errno == 2: + raise EnvironmentError("Missing Node.js runtime. Node is required and must be in the PATH (check with `node -v`). Your Node binary may be called `nodejs` rather than `node`, in which case you may need to run `apt-get install nodejs-legacy` on some Debian-based systems. (Please read the cfscrape README's Dependencies section: https://github.com/Anorov/cloudflare-scrape#dependencies.") + raise + except Exception: + self.to_screen("Error executing Cloudflare IUAM Javascript.") + raise + + try: + float(result) + except Exception: + raise ValueError("Cloudflare IUAM challenge returned unexpected answer.") + + return result + + def cf_solve_and_download_webpage(self, html, download_url): + if '/cdn-cgi/l/chk_jschl' not in html: + return False + parsed_url = compat_urlparse.urlparse(download_url) + domain = parsed_url.netloc + submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme, domain) + form_data = self._form_hidden_inputs('challenge-form', html) + form_data['jschl_answer'] = self._cf_solve_challenge(html, domain) + + self._sleep(5, None, 'Solving Cloudflare challenge (5s)') + return self._download_webpage( + submit_url, + None, 'Sending Cloudflare challenge', 'Wrong Cloudflare challenge', query=form_data + ) + class SearchInfoExtractor(InfoExtractor): """ diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index fc1210079..51339abb7 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -36,6 +36,7 @@ from ..aes import ( class CrunchyrollBaseIE(InfoExtractor): _LOGIN_URL = 'https://www.crunchyroll.com/login' _LOGIN_FORM = 'login_form' + _PROFILE_URL = 'https://www.crunchyroll.com/acct/membership' _NETRC_MACHINE = 'crunchyroll' def _call_rpc_api(self, method, video_id, note=None, data=None): @@ -52,25 +53,17 @@ class CrunchyrollBaseIE(InfoExtractor): username, password = self._get_login_info() if username is None: return - ''' - import cfscrape - proxies = {"http": self._downloader.params.get('proxy'), "https": self._downloader.params.get('proxy')} - tokens, user_agent = cfscrape.get_tokens(self._LOGIN_URL, proxies=proxies, user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0") - - self._set_cookie( '.crunchyroll.com', 'cf_clearance',tokens['cf_clearance']) - self._set_cookie( '.crunchyroll.com', '__cfduid',tokens['__cfduid']) - ''' login_page = self._download_webpage( 'https://www.crunchyroll.com/?a=formhandler', None, 'Logging in', 'Wrong login info', data=urlencode_postdata({ 'formname': 'RpcApiUser_Login', - 'next_url': 'https://www.crunchyroll.com/acct/membership', - 'fail_url': self._LOGIN_URL, + 'next_url': self._PROFILE_URL, + 'fail_url': self._PROFILE_URL, # On login fail redirect to login page 'name': username, 'password': password, - }), expected_status=503) + }), expected_status=503) # 503 for CloudFlare def is_logged(webpage): return '<title>Redirecting' in webpage or '/logout' in webpage @@ -79,23 +72,13 @@ class CrunchyrollBaseIE(InfoExtractor): if is_logged(login_page): return - - ''' - print [tokens, user_agent] - - - form_data = self._form_hidden_inputs('challenge-form', login_page) - form_data['jschl_answer'] = self.solve_challenge(login_page, 'www.crunchyroll.com') - print form_data - self._sleep(6, None, 'Solving CloudFlare Challenge') - login_page = self._download_webpage('https://www.crunchyroll.com/cdn-cgi/l/chk_jschl', None, 'Login Form', data=urlencode_postdata(form_data), headers={ - 'Referer': self._LOGIN_URL, - }, expected_status= 503) - - import codecs - with codecs.open("yop", "w", encoding="utf-8") as f: - f.write(login_page) - ''' + cf_page = self.cf_solve_and_download_webpage(login_page, self._LOGIN_URL) + if cf_page: + login_page = cf_page + if is_logged(cf_page): + login_page = self._download_webpage(self._PROFILE_URL, None, 'Get new CSRF Token') + if is_logged(login_page): + return login_form_str = self._search_regex( r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM, @@ -130,7 +113,6 @@ class CrunchyrollBaseIE(InfoExtractor): raise ExtractorError('Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to log in') - def _real_initialize(self): self._login() @@ -146,7 +128,6 @@ class CrunchyrollBaseIE(InfoExtractor): # Crunchyroll to not work in georestriction cases in some browsers that don't place # the locale lang first in header. However allowing any language seems to workaround the issue. request.add_header('Accept-Language', '*') - request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0') return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs) @staticmethod diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py index d264fe206..58360f3da 100644 --- a/youtube_dl/extractor/openload.py +++ b/youtube_dl/extractor/openload.py @@ -164,7 +164,7 @@ class PhantomJSwrapper(object): cookie['expire_time'] = cookie['expiry'] self.extractor._set_cookie(**compat_kwargs(cookie)) - def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'): + def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();', expected_status=None): """ Downloads webpage (if needed) and executes JS @@ -203,7 +203,7 @@ class PhantomJSwrapper(object): if 'saveAndExit();' not in jscode: raise ExtractorError('`saveAndExit();` not found in `jscode`') if not html: - html = self.extractor._download_webpage(url, video_id, note=note, headers=headers) + html = self.extractor._download_webpage(url, video_id, note=note, headers=headers, expected_status=expected_status) with open(self._TMP_FILES['html'].name, 'wb') as f: f.write(html.encode('utf-8')) From 165961649234f9c6125beb071ec905e7ccba43eb Mon Sep 17 00:00:00 2001 From: bato3 <bato3@bandyci.org> Date: Tue, 31 Jul 2018 15:19:32 +0200 Subject: [PATCH 3/3] + Check is sove proces was OK * Change some errors to ExtractorError --- youtube_dl/extractor/common.py | 13 ++++++++----- youtube_dl/extractor/crunchyroll.py | 4 +++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 61ca7275c..17958308d 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -2827,7 +2827,7 @@ class InfoExtractor(object): try: js = re.search(r"setTimeout\(function\(\){\s+(var s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n", body).group(1) except Exception: - raise ValueError("Unable to identify Cloudflare IUAM Javascript on website.") + raise ExtractorError("Unable to identify Cloudflare IUAM Javascript on website.") js = re.sub(r"a\.value = (.+ \+ t\.length).+", r"\1", js) js = re.sub(r"\s{3,}[a-z](?: = |\.).+", "", js).replace("t.length", str(len(domain))) @@ -2837,7 +2837,7 @@ class InfoExtractor(object): js = re.sub(r"[\n\\']", "", js) if "toFixed" not in js: - raise ValueError("Error parsing Cloudflare IUAM Javascript challenge.") + raise ExtractorError("Error parsing Cloudflare IUAM Javascript challenge.") # Use vm.runInNewContext to safely evaluate code # The sandboxed code cannot use the Node.js standard library @@ -2848,7 +2848,7 @@ class InfoExtractor(object): result = subprocess.check_output(["node", "-e", js]).strip() except OSError as e: if e.errno == 2: - raise EnvironmentError("Missing Node.js runtime. Node is required and must be in the PATH (check with `node -v`). Your Node binary may be called `nodejs` rather than `node`, in which case you may need to run `apt-get install nodejs-legacy` on some Debian-based systems. (Please read the cfscrape README's Dependencies section: https://github.com/Anorov/cloudflare-scrape#dependencies.") + raise ExtractorError("Missing Node.js runtime. Node is required and must be in the PATH (check with `node -v`). Your Node binary may be called `nodejs` rather than `node`, in which case you may need to run `apt-get install nodejs-legacy` on some Debian-based systems. (Please read the cfscrape README's Dependencies section: https://github.com/Anorov/cloudflare-scrape#dependencies.") raise except Exception: self.to_screen("Error executing Cloudflare IUAM Javascript.") @@ -2857,12 +2857,15 @@ class InfoExtractor(object): try: float(result) except Exception: - raise ValueError("Cloudflare IUAM challenge returned unexpected answer.") + raise ExtractorError("Cloudflare IUAM challenge returned unexpected answer.") return result + def has_cf_challenge(self, html): + return True if '/cdn-cgi/l/chk_jschl' in html else False + def cf_solve_and_download_webpage(self, html, download_url): - if '/cdn-cgi/l/chk_jschl' not in html: + if not self.has_cf_challenge(html): return False parsed_url = compat_urlparse.urlparse(download_url) domain = parsed_url.netloc diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 51339abb7..d0080939a 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -76,7 +76,9 @@ class CrunchyrollBaseIE(InfoExtractor): if cf_page: login_page = cf_page if is_logged(cf_page): - login_page = self._download_webpage(self._PROFILE_URL, None, 'Get new CSRF Token') + login_page = self._download_webpage(self._PROFILE_URL, None, 'Get new CSRF Token', expected_status=503) + if self.has_cf_challenge(login_page): + raise ExtractorError('Cloudflare challenge still is present, try run again', expected=True) if is_logged(login_page): return