Merge branch 'youtube-sig-crypto-proposal' into 'master'
[youtube] dynamic sig crypto fallback See merge request laudom/haruhi-dl!1
This commit is contained in:
commit
f4131bcac4
|
@ -19,11 +19,11 @@ IFS=';'
|
||||||
for i in $code; do
|
for i in $code; do
|
||||||
num="$(echo "$i" | grep -Poh ',[0-9]+' | grep -Poh '[0-9]+')"
|
num="$(echo "$i" | grep -Poh ',[0-9]+' | grep -Poh '[0-9]+')"
|
||||||
if [[ "$i" == *"$splice"* ]]; then
|
if [[ "$i" == *"$splice"* ]]; then
|
||||||
echo "a=a[$num:]"
|
echo "a = a[$num:]"
|
||||||
elif [[ "$i" == *"$rev"* ]]; then
|
elif [[ "$i" == *"$rev"* ]]; then
|
||||||
echo "a.reverse()"
|
echo "a.reverse()"
|
||||||
elif [[ "$i" == *"$mess"* ]]; then
|
elif [[ "$i" == *"$mess"* ]]; then
|
||||||
echo "a=self.mess(a,$num)"
|
echo "a = self.mess(a, $num)"
|
||||||
else
|
else
|
||||||
echo "UNKNOWN????"
|
echo "UNKNOWN????"
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -7,7 +7,6 @@ import os.path
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import traceback
|
|
||||||
|
|
||||||
from .common import InfoExtractor, SearchInfoExtractor
|
from .common import InfoExtractor, SearchInfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
@ -50,6 +49,7 @@ from ..utils import (
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
"""Provide base functions for Youtube extractors"""
|
"""Provide base functions for Youtube extractors"""
|
||||||
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
_LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
|
||||||
|
@ -1149,6 +1149,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
_VALID_SIG_VALUE_RE = r'^AO[a-zA-Z0-9_-]+=*$'
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super(YoutubeIE, self).__init__(*args, **kwargs)
|
super(YoutubeIE, self).__init__(*args, **kwargs)
|
||||||
self._player_cache = {}
|
self._player_cache = {}
|
||||||
|
@ -1181,35 +1183,91 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||||
return id_m.group('ext'), id_m.group('id')
|
return id_m.group('id')
|
||||||
|
|
||||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||||
player_type, player_id = self._extract_player_info(player_url)
|
player_id = self._extract_player_info(player_url)
|
||||||
|
|
||||||
# Read from filesystem cache
|
# Read from filesystem cache
|
||||||
func_id = '%s_%s_%s' % (
|
func_id = '%s_%s' % (
|
||||||
player_type, player_id, self._signature_cache_id(example_sig))
|
player_id, self._signature_cache_id(example_sig))
|
||||||
assert os.path.basename(func_id) == func_id
|
assert os.path.basename(func_id) == func_id
|
||||||
|
|
||||||
|
"""
|
||||||
cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
|
cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
|
||||||
if cache_spec is not None:
|
if cache_spec is not None:
|
||||||
return lambda s: ''.join(s[i] for i in cache_spec)
|
return lambda s: ''.join(s[i] for i in cache_spec)
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not player_url.startswith('http'):
|
||||||
|
player_url = 'https://www.youtube.com' + player_url
|
||||||
download_note = (
|
download_note = (
|
||||||
'Downloading player %s' % player_url
|
'Downloading player %s' % player_url
|
||||||
if self._downloader.params.get('verbose') else
|
if self._downloader.params.get('verbose') else
|
||||||
'Downloading %s player %s' % (player_type, player_id)
|
'Downloading js player %s' % player_id
|
||||||
)
|
)
|
||||||
|
code = self._download_webpage(
|
||||||
|
player_url, video_id,
|
||||||
|
note=download_note,
|
||||||
|
errnote='Download of js player %s failed' % player_url)
|
||||||
|
res = self._parse_sig_js(code)
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
test_string = ''.join(map(compat_chr, range(len(example_sig))))
|
test_string = ''.join(map(compat_chr, range(len(example_sig))))
|
||||||
cache_res = res(test_string)
|
cache_res = self._do_decrypt_signature(test_string, res)
|
||||||
cache_spec = [ord(c) for c in cache_res]
|
cache_spec = [ord(c) for c in cache_res]
|
||||||
|
|
||||||
self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
|
self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
|
||||||
|
"""
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
def _parse_sig_js(self, js_player):
|
||||||
|
shit_parser = re.search(r'[a-z]\=a\.split\((?:""|\'\')\);(([a-zA-Z_][a-zA-Z\d_]+).*);return a\.join', js_player)
|
||||||
|
if not shit_parser:
|
||||||
|
raise ExtractorError('Signature decryption code not found')
|
||||||
|
func, obfuscated_name = shit_parser.group(1, 2)
|
||||||
|
obfuscated_func = re.search(r'%s\s*=\s*{([\s\w(){}[\].,:;=%s]*?})};' % (re.escape(obfuscated_name), '%'),
|
||||||
|
js_player)
|
||||||
|
if not obfuscated_func:
|
||||||
|
raise ExtractorError('Signature decrypting deobfuscated functions not found')
|
||||||
|
obfuscated_stack = obfuscated_func.group(1)
|
||||||
|
obf_map = {}
|
||||||
|
for obffun in re.finditer(r'([a-zA-Z_][a-zA-Z\d_]+):function\(a(?:,b)?\){(.*?)}', obfuscated_stack):
|
||||||
|
obfname, obfval = obffun.group(1, 2)
|
||||||
|
if 'splice' in obfval:
|
||||||
|
obf_map[obfname] = 'splice'
|
||||||
|
elif 'reverse' in obfval:
|
||||||
|
obf_map[obfname] = 'reverse'
|
||||||
|
elif 'var' in obfval and 'length' in obfval:
|
||||||
|
obf_map[obfname] = 'mess'
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unknown obfuscation function type: %s.%s' % (obfuscated_name, obfname))
|
||||||
|
decryptor_stack = []
|
||||||
|
for instruction in re.finditer(r'%s\.([a-zA-Z_][a-zA-Z\d_]+)\(a,(\d+)\);?' % re.escape(obfuscated_name),
|
||||||
|
func):
|
||||||
|
obf_name, obf_arg = instruction.group(1, 2)
|
||||||
|
inst = obf_map.get(obf_name)
|
||||||
|
if self._downloader.params.get('verbose', True):
|
||||||
|
self.to_screen('sig %s %s %s' % (obf_name, inst, obf_arg))
|
||||||
|
if inst:
|
||||||
|
decryptor_stack.append((inst, int(obf_arg) if inst != 'reverse' else None))
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unknown obfuscation function: %s.%s' % (obfuscated_name, obf_name))
|
||||||
|
return decryptor_stack
|
||||||
|
|
||||||
|
def _do_decrypt_signature(self, sig, stack):
|
||||||
|
a = list(sig)
|
||||||
|
for fun in stack:
|
||||||
|
if fun[0] == 'splice':
|
||||||
|
a = a[fun[1]:]
|
||||||
|
elif fun[0] == 'reverse':
|
||||||
|
a.reverse()
|
||||||
|
elif fun[0] == 'mess':
|
||||||
|
a = self.mess(a, fun[1])
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unknown stack action: %s' % (fun[0]))
|
||||||
|
return ''.join(a)
|
||||||
|
|
||||||
def _print_sig_code(self, func, example_sig):
|
def _print_sig_code(self, func, example_sig):
|
||||||
def gen_sig_code(idxs):
|
def gen_sig_code(idxs):
|
||||||
def _genslice(start, end, step):
|
def _genslice(start, end, step):
|
||||||
|
@ -1249,39 +1307,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
' return %s\n') % (signature_id_tuple, expr_code)
|
' return %s\n') % (signature_id_tuple, expr_code)
|
||||||
self.to_screen('Extracted signature function:\n' + code)
|
self.to_screen('Extracted signature function:\n' + code)
|
||||||
|
|
||||||
def mess(self,a,b):
|
def mess(self, a, b):
|
||||||
c=a[0]
|
c = a[0]
|
||||||
a[0]=a[b%len(a)]
|
a[0] = a[b % len(a)]
|
||||||
a[b%len(a)]=c
|
a[b % len(a)] = c
|
||||||
return a
|
return a
|
||||||
|
|
||||||
def _decrypt_signature(self, s):
|
def _decrypt_signature_protected(self, s):
|
||||||
"""Turn the encrypted s field into a working signature
|
a = list(s)
|
||||||
YouTube ignores this? It only matters on protected videos..."""
|
a = self.mess(a, 69)
|
||||||
a=[char for char in s]
|
|
||||||
a=self.mess(a,67)
|
|
||||||
a=a[1:]
|
|
||||||
a=self.mess(a,49)
|
|
||||||
a=a[3:]
|
|
||||||
a=self.mess(a,52)
|
|
||||||
a.reverse()
|
a.reverse()
|
||||||
a=a[1:]
|
a = a[2:]
|
||||||
a=self.mess(a,43)
|
a = self.mess(a, 56)
|
||||||
|
a = a[1:]
|
||||||
|
a.reverse()
|
||||||
|
a = a[3:]
|
||||||
a.reverse()
|
a.reverse()
|
||||||
return "".join(a)
|
return "".join(a)
|
||||||
|
|
||||||
def _decrypt_signature_protected(self, s):
|
|
||||||
a=[char for char in s]
|
|
||||||
a=self.mess(a,69)
|
|
||||||
a.reverse()
|
|
||||||
a=a[2:]
|
|
||||||
a=self.mess(a,56)
|
|
||||||
a=a[1:]
|
|
||||||
a.reverse()
|
|
||||||
a=a[3:]
|
|
||||||
a.reverse()
|
|
||||||
return "".join(a)
|
|
||||||
|
|
||||||
def _get_subtitles(self, video_id, webpage):
|
def _get_subtitles(self, video_id, webpage):
|
||||||
try:
|
try:
|
||||||
subs_doc = self._download_xml(
|
subs_doc = self._download_xml(
|
||||||
|
@ -1682,8 +1725,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
data = compat_urllib_parse_urlencode({
|
data = compat_urllib_parse_urlencode({
|
||||||
'video_id': video_id,
|
'video_id': video_id,
|
||||||
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
||||||
# 'sts': self._search_regex(
|
# 'sts': self._search_regex(
|
||||||
# r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
|
# r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
|
||||||
})
|
})
|
||||||
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
||||||
try:
|
try:
|
||||||
|
@ -1889,6 +1932,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
'width': int_or_none(fmt.get('width')),
|
'width': int_or_none(fmt.get('width')),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sig_decrypt_stack = None
|
||||||
for fmt in streaming_formats:
|
for fmt in streaming_formats:
|
||||||
if fmt.get('drmFamilies') or fmt.get('drm_families'):
|
if fmt.get('drmFamilies') or fmt.get('drm_families'):
|
||||||
continue
|
continue
|
||||||
|
@ -1919,11 +1963,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
if cipher:
|
if cipher:
|
||||||
if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
|
if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||||
ASSETS_RE = r'"jsUrl":"(/s/player/.*?/player_ias.vflset/.*?/base.js)'
|
ASSETS_RE = r'"jsUrl":"(/s/player/.*?/player_ias.vflset/.*?/base.js)'
|
||||||
|
|
||||||
player_url = self._search_regex(
|
player_url = self._search_regex(
|
||||||
ASSETS_RE,
|
ASSETS_RE,
|
||||||
embed_webpage if age_gate else video_webpage, '', default=None)
|
embed_webpage if age_gate else video_webpage, '', default=None)
|
||||||
|
|
||||||
if not player_url and not age_gate:
|
if not player_url and not age_gate:
|
||||||
# We need the embed website after all
|
# We need the embed website after all
|
||||||
if embed_webpage is None:
|
if embed_webpage is None:
|
||||||
|
@ -1933,7 +1977,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
player_url = self._search_regex(
|
player_url = self._search_regex(
|
||||||
ASSETS_RE, embed_webpage, 'JS player URL')
|
ASSETS_RE, embed_webpage, 'JS player URL')
|
||||||
|
|
||||||
#if player_url is None:
|
# if player_url is None:
|
||||||
# player_url_json = self._search_regex(
|
# player_url_json = self._search_regex(
|
||||||
# r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
|
# r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
|
||||||
# video_webpage, 'age gate player URL')
|
# video_webpage, 'age gate player URL')
|
||||||
|
@ -1948,14 +1992,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
if player_url is None:
|
if player_url is None:
|
||||||
player_desc = 'unknown'
|
player_desc = 'unknown'
|
||||||
else:
|
else:
|
||||||
player_type, player_version = self._extract_player_info(player_url)
|
player_version = self._extract_player_info(player_url)
|
||||||
player_desc = 'html5 player %s' % player_version
|
player_desc = 'html5 player %s' % player_version
|
||||||
parts_sizes = self._signature_cache_id(encrypted_sig)
|
parts_sizes = self._signature_cache_id(encrypted_sig)
|
||||||
self.to_screen('{%s} signature length %s, %s' %
|
self.to_screen('{%s} signature length %s, %s' %
|
||||||
(format_id, parts_sizes, player_desc))
|
(format_id, parts_sizes, player_desc))
|
||||||
|
|
||||||
signature = self._decrypt_signature_protected(encrypted_sig)
|
signature = self._decrypt_signature_protected(encrypted_sig)
|
||||||
|
if not re.match(self._VALID_SIG_VALUE_RE, signature):
|
||||||
|
if not sig_decrypt_stack:
|
||||||
|
if self._downloader.params.get('verbose'):
|
||||||
|
self.to_screen("Built-in signature decryption failed, trying dynamic")
|
||||||
|
sig_decrypt_stack = self._extract_signature_function(video_id, player_url, encrypted_sig)
|
||||||
|
signature = self._do_decrypt_signature(encrypted_sig, sig_decrypt_stack)
|
||||||
|
|
||||||
sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
|
sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
|
||||||
url += '&%s=%s' % (sp, signature)
|
url += '&%s=%s' % (sp, signature)
|
||||||
if 'ratebypass' not in url:
|
if 'ratebypass' not in url:
|
||||||
|
|
Loading…
Reference in a new issue