mirror of
https://github.com/l1ving/youtube-dl
synced 2020-11-18 19:53:54 -08:00
[generic] Create new _search_regex_all method that returns all matches
This commit is contained in:
parent
8813775c98
commit
a01e16e5fd
@ -965,41 +965,27 @@ class InfoExtractor(object):
|
||||
video_info['description'] = playlist_description
|
||||
return video_info
|
||||
|
||||
def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None, return_all=False):
|
||||
def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
"""
|
||||
Perform a regex search on the given string, using a single or a list of
|
||||
patterns returning the first matching group.
|
||||
In case of failure return a default value or raise a WARNING or a
|
||||
RegexNotFoundError, depending on fatal, specifying the field name.
|
||||
"""
|
||||
matches = []
|
||||
|
||||
if isinstance(pattern, (str, compat_str, compiled_regex_type)):
|
||||
if return_all:
|
||||
matches = list(re.finditer(pattern, string, flags))
|
||||
else:
|
||||
mobj = re.search(pattern, string, flags)
|
||||
mobj = re.search(pattern, string, flags)
|
||||
else:
|
||||
for p in pattern:
|
||||
if return_all:
|
||||
new_matches = list(re.finditer(p, string, flags))
|
||||
matches.extend(new_matches)
|
||||
else:
|
||||
mobj = re.search(p, string, flags)
|
||||
if mobj:
|
||||
break
|
||||
mobj = re.search(p, string, flags)
|
||||
if mobj:
|
||||
break
|
||||
|
||||
if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
|
||||
_name = '\033[0;34m%s\033[0m' % name
|
||||
else:
|
||||
_name = name
|
||||
|
||||
if return_all and len(matches) > 0:
|
||||
if group is None:
|
||||
return list(map(lambda m: next(g for g in m.groups() if g is not None), matches))
|
||||
else:
|
||||
return list(map(lambda m: m.group(group), matches))
|
||||
elif mobj:
|
||||
if mobj:
|
||||
if group is None:
|
||||
# return the first matching group
|
||||
return next(g for g in mobj.groups() if g is not None)
|
||||
@ -1013,6 +999,40 @@ class InfoExtractor(object):
|
||||
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
||||
return None
|
||||
|
||||
def _search_regex_all(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
"""
|
||||
Perform a regex search on the given string, using a single or a list of
|
||||
patterns returning the first matching group. Returns all matches.
|
||||
In case of failure return a default value or raise a WARNING or a
|
||||
RegexNotFoundError, depending on fatal, specifying the field name.
|
||||
"""
|
||||
matches = []
|
||||
|
||||
if isinstance(pattern, (str, compat_str, compiled_regex_type)):
|
||||
matches = list(re.finditer(pattern, string, flags))
|
||||
else:
|
||||
for p in pattern:
|
||||
new_matches = list(re.finditer(p, string, flags))
|
||||
matches.extend(new_matches)
|
||||
|
||||
if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
|
||||
_name = '\033[0;34m%s\033[0m' % name
|
||||
else:
|
||||
_name = name
|
||||
|
||||
if len(matches) > 0:
|
||||
if group is None:
|
||||
return list(map(lambda m: next(g for g in m.groups() if g is not None), matches))
|
||||
else:
|
||||
return list(map(lambda m: m.group(group), matches))
|
||||
elif default is not NO_DEFAULT:
|
||||
return default
|
||||
elif fatal:
|
||||
raise RegexNotFoundError('Unable to extract %s' % _name)
|
||||
else:
|
||||
self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
|
||||
return None
|
||||
|
||||
def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
|
||||
"""
|
||||
Like _search_regex, but strips HTML tags and unescapes entities.
|
||||
@ -1188,10 +1208,10 @@ class InfoExtractor(object):
|
||||
'twitter card player')
|
||||
|
||||
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
|
||||
json_lds = self._search_regex(
|
||||
json_lds = self._search_regex_all(
|
||||
JSON_LD_RE, html, 'JSON-LD', group='json_ld', return_all=True, **kwargs)
|
||||
default = kwargs.get('default', NO_DEFAULT)
|
||||
if not json_lds or len(json_lds) == 0:
|
||||
if not json_lds:
|
||||
return default if default is not NO_DEFAULT else {}
|
||||
# JSON-LD may be malformed and thus `fatal` should be respected.
|
||||
# At the same time `default` may be passed that assumes `fatal=False`
|
||||
|
Loading…
x
Reference in New Issue
Block a user