utils backported from youtube-dl
parent
0646c6f6ac
commit
9c7163416e
|
@ -23,6 +23,9 @@ class FxckerError(Exception):
|
|||
pass
|
||||
|
||||
|
||||
TEST_URL = "http://detectportal.firefox.com/success.txt"
|
||||
|
||||
|
||||
def get_user_agent():
|
||||
if bool(random.getrandbits(1)):
|
||||
# Google Chrome
|
||||
|
@ -415,3 +418,63 @@ def absolute_url(new_url, old_url):
|
|||
|
||||
# like "hostname/path?query", if other checks fail
|
||||
return scheme + "://" + new_url
|
||||
|
||||
|
||||
# from youtube-dl
|
||||
def js_to_json(code):
|
||||
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
|
||||
SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
|
||||
INTEGER_TABLE = (
|
||||
(r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
|
||||
(r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
|
||||
)
|
||||
|
||||
def fix_kv(m):
|
||||
v = m.group(0)
|
||||
if v in ('true', 'false', 'null'):
|
||||
return v
|
||||
elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
|
||||
return ""
|
||||
|
||||
if v[0] in ("'", '"'):
|
||||
v = re.sub(r'(?s)\\.|"', lambda m: {
|
||||
'"': '\\"',
|
||||
"\\'": "'",
|
||||
'\\\n': '',
|
||||
'\\x': '\\u00',
|
||||
}.get(m.group(0), m.group(0)), v[1:-1])
|
||||
else:
|
||||
for regex, base in INTEGER_TABLE:
|
||||
im = re.match(regex, v)
|
||||
if im:
|
||||
i = int(im.group(1), base)
|
||||
return '"%d":' % i if v.endswith(':') else '%d' % i
|
||||
|
||||
return '"%s"' % v
|
||||
|
||||
return re.sub(r'''(?sx)
|
||||
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
|
||||
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
|
||||
{comment}|,(?={skip}[\]}}])|
|
||||
(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
|
||||
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
|
||||
[0-9]+(?={skip}:)|
|
||||
!+
|
||||
'''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
|
||||
|
||||
|
||||
# from youtube-dl
|
||||
def hidden_inputs(html):
|
||||
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
|
||||
hidden_inputs = {}
|
||||
for input in re.findall(r'(?i)(<input[^>]+>)', html):
|
||||
attrs = extract_attributes(input)
|
||||
if not input:
|
||||
continue
|
||||
if attrs.get('type') not in ('hidden', 'submit'):
|
||||
continue
|
||||
name = attrs.get('name') or attrs.get('id')
|
||||
value = attrs.get('value')
|
||||
if name and value is not None:
|
||||
hidden_inputs[name] = value
|
||||
return hidden_inputs
|
||||
|
|
Loading…
Reference in New Issue