mirror of
https://github.com/l1ving/youtube-dl
synced 2020-11-18 19:53:54 -08:00
Update facebook.py
This commit is contained in:
parent
5e83f922ac
commit
a944eb8884
@ -534,62 +534,71 @@ class FacebookUserIE(InfoExtractor):
|
|||||||
url, user_id, 'Downloading user webpage')
|
url, user_id, 'Downloading user webpage')
|
||||||
fb_url = self._html_search_meta(
|
fb_url = self._html_search_meta(
|
||||||
'al:android:url', page, default=None)
|
'al:android:url', page, default=None)
|
||||||
fb_url_re = re.match(r'fb://(?P<type>page|profile)/(?P<id>\d+)', fb_url)
|
fb_url_mobj = re.match(r'fb://(?P<type>page|profile)/(?P<id>\d+)', fb_url)
|
||||||
if not fb_url_re:
|
if not fb_url_mobj:
|
||||||
raise ExtractorError('Could not extract page ID', expected=False)
|
raise ExtractorError('Could not extract page ID', expected=False)
|
||||||
page_id = fb_url_re.group('id')
|
page_id = fb_url_mobj.group('id')
|
||||||
fb_dtsg_ag_re = re.search(r'"async_get_token":"([\w\-:]+)"', page)
|
fb_dtsg_ag = self._search_regex(
|
||||||
pagelet_token_re = re.search(r'pagelet_token:"([\w\-]+)"', page)
|
r'"async_get_token":"([\w\-:]+)"',
|
||||||
collection_token_re = re.search(r'pagelet_timeline_app_collection_([\d:]+)', page)
|
page, 'fb_dtsg_ag', default=None)
|
||||||
|
pagelet_token = self._search_regex(
|
||||||
|
r'pagelet_token:"([\w\-]+)"',
|
||||||
|
page, 'pagelet_token', default=None)
|
||||||
|
collection_token = self._search_regex(
|
||||||
|
r'pagelet_timeline_app_collection_([\d:]+)',
|
||||||
|
page, 'collection_token', default=None)
|
||||||
cursor = None
|
cursor = None
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
if fb_url_re.group('type') == 'page':
|
if fb_url_mobj.group('type') == 'page':
|
||||||
endpoint = 'PagesVideoHubVideoContainerPagelet'
|
endpoint = 'PagesVideoHubVideoContainerPagelet'
|
||||||
a_class = '_5asm'
|
a_class = '_5asm'
|
||||||
data = {
|
data = {
|
||||||
'page': page_id
|
'page': page_id
|
||||||
}
|
}
|
||||||
elif fb_url_re.group('type') == 'profile':
|
elif fb_url_mobj.group('type') == 'profile':
|
||||||
if not (fb_dtsg_ag_re and pagelet_token_re and collection_token_re):
|
if not (fb_dtsg_ag and pagelet_token and collection_token):
|
||||||
raise ExtractorError('You must be logged in to extract profile videos', expected=True)
|
raise ExtractorError('You must be logged in to extract profile videos', expected=True)
|
||||||
endpoint = 'VideosByUserAppCollectionPagelet'
|
endpoint = 'VideosByUserAppCollectionPagelet'
|
||||||
a_class = '_400z'
|
a_class = '_400z'
|
||||||
data = {
|
data = {
|
||||||
'collection_token': collection_token_re[1],
|
'collection_token': collection_token,
|
||||||
'disablepager': False,
|
'disablepager': False,
|
||||||
'overview': False,
|
'overview': False,
|
||||||
'profile_id': page_id,
|
'profile_id': page_id,
|
||||||
'pagelet_token': pagelet_token_re[1],
|
'pagelet_token': pagelet_token,
|
||||||
'order': None,
|
'order': None,
|
||||||
'sk': 'videos'
|
'sk': 'videos'
|
||||||
}
|
}
|
||||||
|
|
||||||
for page_num in itertools.count(1):
|
for page_num in itertools.count(1):
|
||||||
js_data_page = self._download_webpage(
|
js_data_page = self._download_webpage(
|
||||||
'https://www.facebook.com/ajax/pagelet/generic.php/%s' % endpoint,
|
'https://www.facebook.com/ajax/pagelet/generic.php/%s' % endpoint,
|
||||||
user_id, 'Downloading page %d' % page_num,
|
user_id, 'Downloading page %d' % page_num,
|
||||||
query={
|
query={
|
||||||
'fb_dtsg_ag': fb_dtsg_ag_re[1] if fb_dtsg_ag_re else None,
|
'fb_dtsg_ag': fb_dtsg_ag if fb_dtsg_ag else None,
|
||||||
'data': json.dumps(
|
'data': json.dumps(
|
||||||
{**data, 'cursor': cursor},
|
{**data, 'cursor': cursor},
|
||||||
separators=(',', ':')),
|
separators=(',', ':')),
|
||||||
'__a': 1
|
'__a': 1
|
||||||
})
|
})
|
||||||
|
|
||||||
js_data = self._parse_json(self._search_regex(
|
js_data = self._parse_json(
|
||||||
r'({.+})', js_data_page,
|
self._search_regex(
|
||||||
'js data', default='{}'), user_id, fatal=True)
|
r'({.+})', js_data_page,
|
||||||
|
'js data', default='{}'),
|
||||||
|
user_id, fatal=True)
|
||||||
|
|
||||||
for video in re.findall(
|
for video in re.findall(
|
||||||
r'href="(?P<url>[^"]+)"[^>]+%s' % a_class,
|
r'href="(?P<url>[^"]+)"[^>]+%s' % a_class,
|
||||||
js_data['payload']):
|
js_data['payload']):
|
||||||
entries.append(
|
entries.append(
|
||||||
self.url_result('https://www.facebook.com%s' % video, FacebookIE.ie_key())
|
self.url_result(
|
||||||
)
|
'https://www.facebook.com%s' % video,
|
||||||
|
FacebookIE.ie_key()))
|
||||||
|
|
||||||
cursor = None
|
cursor = None
|
||||||
if fb_url_re.group('type') == 'page':
|
if fb_url_mobj.group('type') == 'page':
|
||||||
if not 'instances' in js_data['jsmods']:
|
if not 'instances' in js_data['jsmods']:
|
||||||
break
|
break
|
||||||
for parent in js_data['jsmods']['instances']:
|
for parent in js_data['jsmods']['instances']:
|
||||||
@ -600,7 +609,7 @@ class FacebookUserIE(InfoExtractor):
|
|||||||
if type(subchild) is dict and 'cursor' in subchild:
|
if type(subchild) is dict and 'cursor' in subchild:
|
||||||
cursor = subchild['cursor']
|
cursor = subchild['cursor']
|
||||||
break
|
break
|
||||||
elif fb_url_re.group('type') == 'profile':
|
elif fb_url_mobj.group('type') == 'profile':
|
||||||
for parent in js_data['jsmods']['require']:
|
for parent in js_data['jsmods']['require']:
|
||||||
if type(parent) is list:
|
if type(parent) is list:
|
||||||
for child in parent:
|
for child in parent:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user