1
0
mirror of https://github.com/l1ving/youtube-dl synced 2020-11-18 19:53:54 -08:00

Update facebook.py

This commit is contained in:
ajj8 2020-01-14 21:20:36 +00:00 committed by GitHub
parent 5e83f922ac
commit a944eb8884
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -534,62 +534,71 @@ class FacebookUserIE(InfoExtractor):
url, user_id, 'Downloading user webpage') url, user_id, 'Downloading user webpage')
fb_url = self._html_search_meta( fb_url = self._html_search_meta(
'al:android:url', page, default=None) 'al:android:url', page, default=None)
fb_url_re = re.match(r'fb://(?P<type>page|profile)/(?P<id>\d+)', fb_url) fb_url_mobj = re.match(r'fb://(?P<type>page|profile)/(?P<id>\d+)', fb_url)
if not fb_url_re: if not fb_url_mobj:
raise ExtractorError('Could not extract page ID', expected=False) raise ExtractorError('Could not extract page ID', expected=False)
page_id = fb_url_re.group('id') page_id = fb_url_mobj.group('id')
fb_dtsg_ag_re = re.search(r'"async_get_token":"([\w\-:]+)"', page) fb_dtsg_ag = self._search_regex(
pagelet_token_re = re.search(r'pagelet_token:"([\w\-]+)"', page) r'"async_get_token":"([\w\-:]+)"',
collection_token_re = re.search(r'pagelet_timeline_app_collection_([\d:]+)', page) page, 'fb_dtsg_ag', default=None)
pagelet_token = self._search_regex(
r'pagelet_token:"([\w\-]+)"',
page, 'pagelet_token', default=None)
collection_token = self._search_regex(
r'pagelet_timeline_app_collection_([\d:]+)',
page, 'collection_token', default=None)
cursor = None cursor = None
entries = [] entries = []
if fb_url_re.group('type') == 'page': if fb_url_mobj.group('type') == 'page':
endpoint = 'PagesVideoHubVideoContainerPagelet' endpoint = 'PagesVideoHubVideoContainerPagelet'
a_class = '_5asm' a_class = '_5asm'
data = { data = {
'page': page_id 'page': page_id
} }
elif fb_url_re.group('type') == 'profile': elif fb_url_mobj.group('type') == 'profile':
if not (fb_dtsg_ag_re and pagelet_token_re and collection_token_re): if not (fb_dtsg_ag and pagelet_token and collection_token):
raise ExtractorError('You must be logged in to extract profile videos', expected=True) raise ExtractorError('You must be logged in to extract profile videos', expected=True)
endpoint = 'VideosByUserAppCollectionPagelet' endpoint = 'VideosByUserAppCollectionPagelet'
a_class = '_400z' a_class = '_400z'
data = { data = {
'collection_token': collection_token_re[1], 'collection_token': collection_token,
'disablepager': False, 'disablepager': False,
'overview': False, 'overview': False,
'profile_id': page_id, 'profile_id': page_id,
'pagelet_token': pagelet_token_re[1], 'pagelet_token': pagelet_token,
'order': None, 'order': None,
'sk': 'videos' 'sk': 'videos'
} }
for page_num in itertools.count(1): for page_num in itertools.count(1):
js_data_page = self._download_webpage( js_data_page = self._download_webpage(
'https://www.facebook.com/ajax/pagelet/generic.php/%s' % endpoint, 'https://www.facebook.com/ajax/pagelet/generic.php/%s' % endpoint,
user_id, 'Downloading page %d' % page_num, user_id, 'Downloading page %d' % page_num,
query={ query={
'fb_dtsg_ag': fb_dtsg_ag_re[1] if fb_dtsg_ag_re else None, 'fb_dtsg_ag': fb_dtsg_ag if fb_dtsg_ag else None,
'data': json.dumps( 'data': json.dumps(
{**data, 'cursor': cursor}, {**data, 'cursor': cursor},
separators=(',', ':')), separators=(',', ':')),
'__a': 1 '__a': 1
}) })
js_data = self._parse_json(self._search_regex( js_data = self._parse_json(
r'({.+})', js_data_page, self._search_regex(
'js data', default='{}'), user_id, fatal=True) r'({.+})', js_data_page,
'js data', default='{}'),
user_id, fatal=True)
for video in re.findall( for video in re.findall(
r'href="(?P<url>[^"]+)"[^>]+%s' % a_class, r'href="(?P<url>[^"]+)"[^>]+%s' % a_class,
js_data['payload']): js_data['payload']):
entries.append( entries.append(
self.url_result('https://www.facebook.com%s' % video, FacebookIE.ie_key()) self.url_result(
) 'https://www.facebook.com%s' % video,
FacebookIE.ie_key()))
cursor = None cursor = None
if fb_url_re.group('type') == 'page': if fb_url_mobj.group('type') == 'page':
if not 'instances' in js_data['jsmods']: if not 'instances' in js_data['jsmods']:
break break
for parent in js_data['jsmods']['instances']: for parent in js_data['jsmods']['instances']:
@ -600,7 +609,7 @@ class FacebookUserIE(InfoExtractor):
if type(subchild) is dict and 'cursor' in subchild: if type(subchild) is dict and 'cursor' in subchild:
cursor = subchild['cursor'] cursor = subchild['cursor']
break break
elif fb_url_re.group('type') == 'profile': elif fb_url_mobj.group('type') == 'profile':
for parent in js_data['jsmods']['require']: for parent in js_data['jsmods']['require']:
if type(parent) is list: if type(parent) is list:
for child in parent: for child in parent: