[utils] Improve _hidden_inputs

This commit is contained in:
Sergey M․ 2016-09-15 21:54:48 +07:00
parent eb5b1fc021
commit c849836854
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
1 changed files with 8 additions and 8 deletions

View File

@ -888,16 +888,16 @@ class InfoExtractor(object):
def _hidden_inputs(html): def _hidden_inputs(html):
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html) html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
hidden_inputs = {} hidden_inputs = {}
for input in re.findall(r'(?i)<input([^>]+)>', html): for input in re.findall(r'(?i)(<input[^>]+>)', html):
if not re.search(r'type=(["\'])(?:hidden|submit)\1', input): attrs = extract_attributes(input)
if not input:
continue continue
name = re.search(r'(?:name|id)=(["\'])(?P<value>.+?)\1', input) if attrs.get('type') not in ('hidden', 'submit'):
if not name:
continue continue
value = re.search(r'value=(["\'])(?P<value>.*?)\1', input) name = attrs.get('name') or attrs.get('id')
if not value: value = attrs.get('value')
continue if name and value is not None:
hidden_inputs[name.group('value')] = value.group('value') hidden_inputs[name] = value
return hidden_inputs return hidden_inputs
def _form_hidden_inputs(self, form_id, html): def _form_hidden_inputs(self, form_id, html):