utils backported from youtube-dl

master
Laura Liberda 2021-05-25 13:17:09 +02:00
parent 0646c6f6ac
commit 9c7163416e
1 changed files with 63 additions and 0 deletions

View File

@ -23,6 +23,9 @@ class FxckerError(Exception):
pass
TEST_URL = "http://detectportal.firefox.com/success.txt"
def get_user_agent():
if bool(random.getrandbits(1)):
# Google Chrome
@ -415,3 +418,63 @@ def absolute_url(new_url, old_url):
# like "hostname/path?query", if other checks fail
return scheme + "://" + new_url
# from youtube-dl
def js_to_json(code):
COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
INTEGER_TABLE = (
(r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
(r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
)
def fix_kv(m):
v = m.group(0)
if v in ('true', 'false', 'null'):
return v
elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
return ""
if v[0] in ("'", '"'):
v = re.sub(r'(?s)\\.|"', lambda m: {
'"': '\\"',
"\\'": "'",
'\\\n': '',
'\\x': '\\u00',
}.get(m.group(0), m.group(0)), v[1:-1])
else:
for regex, base in INTEGER_TABLE:
im = re.match(regex, v)
if im:
i = int(im.group(1), base)
return '"%d":' % i if v.endswith(':') else '%d' % i
return '"%s"' % v
return re.sub(r'''(?sx)
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
{comment}|,(?={skip}[\]}}])|
(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
\b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
[0-9]+(?={skip}:)|
!+
'''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
# from youtube-dl
def hidden_inputs(html):
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
hidden_inputs = {}
for input in re.findall(r'(?i)(<input[^>]+>)', html):
attrs = extract_attributes(input)
if not input:
continue
if attrs.get('type') not in ('hidden', 'submit'):
continue
name = attrs.get('name') or attrs.get('id')
value = attrs.get('value')
if name and value is not None:
hidden_inputs[name] = value
return hidden_inputs