mirror of
https://github.com/l1ving/youtube-dl
synced 2020-11-18 19:53:54 -08:00
[generic] Create new _search_regex_all method that returns all matches
This commit is contained in:
parent
8813775c98
commit
a01e16e5fd
@ -965,41 +965,27 @@ class InfoExtractor(object):
|
|||||||
video_info['description'] = playlist_description
|
video_info['description'] = playlist_description
|
||||||
return video_info
|
return video_info
|
||||||
|
|
||||||
def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None, return_all=False):
|
def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||||
"""
|
"""
|
||||||
Perform a regex search on the given string, using a single or a list of
|
Perform a regex search on the given string, using a single or a list of
|
||||||
patterns returning the first matching group.
|
patterns returning the first matching group.
|
||||||
In case of failure return a default value or raise a WARNING or a
|
In case of failure return a default value or raise a WARNING or a
|
||||||
RegexNotFoundError, depending on fatal, specifying the field name.
|
RegexNotFoundError, depending on fatal, specifying the field name.
|
||||||
"""
|
"""
|
||||||
matches = []
|
|
||||||
|
|
||||||
if isinstance(pattern, (str, compat_str, compiled_regex_type)):
|
if isinstance(pattern, (str, compat_str, compiled_regex_type)):
|
||||||
if return_all:
|
mobj = re.search(pattern, string, flags)
|
||||||
matches = list(re.finditer(pattern, string, flags))
|
|
||||||
else:
|
|
||||||
mobj = re.search(pattern, string, flags)
|
|
||||||
else:
|
else:
|
||||||
for p in pattern:
|
for p in pattern:
|
||||||
if return_all:
|
mobj = re.search(p, string, flags)
|
||||||
new_matches = list(re.finditer(p, string, flags))
|
if mobj:
|
||||||
matches.extend(new_matches)
|
break
|
||||||
else:
|
|
||||||
mobj = re.search(p, string, flags)
|
|
||||||
if mobj:
|
|
||||||
break
|
|
||||||
|
|
||||||
if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
|
if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
|
||||||
_name = '\033[0;34m%s\033[0m' % name
|
_name = '\033[0;34m%s\033[0m' % name
|
||||||
else:
|
else:
|
||||||
_name = name
|
_name = name
|
||||||
|
|
||||||
if return_all and len(matches) > 0:
|
if mobj:
|
||||||
if group is None:
|
|
||||||
return list(map(lambda m: next(g for g in m.groups() if g is not None), matches))
|
|
||||||
else:
|
|
||||||
return list(map(lambda m: m.group(group), matches))
|
|
||||||
elif mobj:
|
|
||||||
if group is None:
|
if group is None:
|
||||||
# return the first matching group
|
# return the first matching group
|
||||||
return next(g for g in mobj.groups() if g is not None)
|
return next(g for g in mobj.groups() if g is not None)
|
||||||
@ -1013,6 +999,40 @@ class InfoExtractor(object):
|
|||||||
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _search_regex_all(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||||
|
"""
|
||||||
|
Perform a regex search on the given string, using a single or a list of
|
||||||
|
patterns returning the first matching group. Returns all matches.
|
||||||
|
In case of failure return a default value or raise a WARNING or a
|
||||||
|
RegexNotFoundError, depending on fatal, specifying the field name.
|
||||||
|
"""
|
||||||
|
matches = []
|
||||||
|
|
||||||
|
if isinstance(pattern, (str, compat_str, compiled_regex_type)):
|
||||||
|
matches = list(re.finditer(pattern, string, flags))
|
||||||
|
else:
|
||||||
|
for p in pattern:
|
||||||
|
new_matches = list(re.finditer(p, string, flags))
|
||||||
|
matches.extend(new_matches)
|
||||||
|
|
||||||
|
if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
|
||||||
|
_name = '\033[0;34m%s\033[0m' % name
|
||||||
|
else:
|
||||||
|
_name = name
|
||||||
|
|
||||||
|
if len(matches) > 0:
|
||||||
|
if group is None:
|
||||||
|
return list(map(lambda m: next(g for g in m.groups() if g is not None), matches))
|
||||||
|
else:
|
||||||
|
return list(map(lambda m: m.group(group), matches))
|
||||||
|
elif default is not NO_DEFAULT:
|
||||||
|
return default
|
||||||
|
elif fatal:
|
||||||
|
raise RegexNotFoundError('Unable to extract %s' % _name)
|
||||||
|
else:
|
||||||
|
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
||||||
|
return None
|
||||||
|
|
||||||
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||||
"""
|
"""
|
||||||
Like _search_regex, but strips HTML tags and unescapes entities.
|
Like _search_regex, but strips HTML tags and unescapes entities.
|
||||||
@ -1188,10 +1208,10 @@ class InfoExtractor(object):
|
|||||||
'twitter card player')
|
'twitter card player')
|
||||||
|
|
||||||
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
||||||
json_lds = self._search_regex(
|
json_lds = self._search_regex_all(
|
||||||
JSON_LD_RE, html, 'JSON-LD', group='json_ld', return_all=True, **kwargs)
|
JSON_LD_RE, html, 'JSON-LD', group='json_ld', return_all=True, **kwargs)
|
||||||
default = kwargs.get('default', NO_DEFAULT)
|
default = kwargs.get('default', NO_DEFAULT)
|
||||||
if not json_lds or len(json_lds) == 0:
|
if not json_lds:
|
||||||
return default if default is not NO_DEFAULT else {}
|
return default if default is not NO_DEFAULT else {}
|
||||||
# JSON-LD may be malformed and thus `fatal` should be respected.
|
# JSON-LD may be malformed and thus `fatal` should be respected.
|
||||||
# At the same time `default` may be passed that assumes `fatal=False`
|
# At the same time `default` may be passed that assumes `fatal=False`
|
||||||
|
Loading…
x
Reference in New Issue
Block a user