From e31c0d2576b5768afc7d7d438b63b1209cb26293 Mon Sep 17 00:00:00 2001 From: selfisekai Date: Mon, 9 Nov 2020 16:02:49 +0100 Subject: [PATCH 1/3] [youtube] dynamic sig crypto fallback --- devscripts/ExtractCryptoYT.sh | 4 +- haruhi_dl/extractor/youtube.py | 125 ++++++++++++++++++++++----------- 2 files changed, 85 insertions(+), 44 deletions(-) diff --git a/devscripts/ExtractCryptoYT.sh b/devscripts/ExtractCryptoYT.sh index 6772de75e..ff4a2aabf 100755 --- a/devscripts/ExtractCryptoYT.sh +++ b/devscripts/ExtractCryptoYT.sh @@ -19,11 +19,11 @@ IFS=';' for i in $code; do num="$(echo "$i" | grep -Poh ',[0-9]+' | grep -Poh '[0-9]+')" if [[ "$i" == *"$splice"* ]]; then - echo "a=a[$num:]" + echo "a = a[$num:]" elif [[ "$i" == *"$rev"* ]]; then echo "a.reverse()" elif [[ "$i" == *"$mess"* ]]; then - echo "a=self.mess(a,$num)" + echo "a = self.mess(a, $num)" else echo "UNKNOWN????" fi diff --git a/haruhi_dl/extractor/youtube.py b/haruhi_dl/extractor/youtube.py index f4c68c26f..73809b5c4 100644 --- a/haruhi_dl/extractor/youtube.py +++ b/haruhi_dl/extractor/youtube.py @@ -7,7 +7,6 @@ import os.path import random import re import time -import traceback from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( @@ -50,6 +49,7 @@ from ..utils import ( urlencode_postdata, ) + class YoutubeBaseInfoExtractor(InfoExtractor): """Provide base functions for Youtube extractors""" _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' @@ -1149,6 +1149,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, ] + _VALID_SIG_VALUE_RE = r'^AO[a-zA-Z0-9_-]+=*$' + def __init__(self, *args, **kwargs): super(YoutubeIE, self).__init__(*args, **kwargs) self._player_cache = {} @@ -1181,35 +1183,82 @@ class YoutubeIE(YoutubeBaseInfoExtractor): break else: raise ExtractorError('Cannot identify player %r' % player_url) - return id_m.group('ext'), id_m.group('id') + return id_m.group('id') def _extract_signature_function(self, video_id, player_url, example_sig): - player_type, player_id = self._extract_player_info(player_url) + player_id = self._extract_player_info(player_url) # Read from filesystem cache - func_id = '%s_%s_%s' % ( - player_type, player_id, self._signature_cache_id(example_sig)) + func_id = '%s_%s' % ( + player_id, self._signature_cache_id(example_sig)) assert os.path.basename(func_id) == func_id + """ cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id) if cache_spec is not None: return lambda s: ''.join(s[i] for i in cache_spec) + """ + if not player_url.startswith('http'): + player_url = 'https://www.youtube.com' + player_url download_note = ( 'Downloading player %s' % player_url if self._downloader.params.get('verbose') else - 'Downloading %s player %s' % (player_type, player_id) + 'Downloading js player %s' % player_id ) + code = self._download_webpage( + player_url, video_id, + note=download_note, + errnote='Download of js player %s failed' % player_url) + res = self._parse_sig_js(code) - - + """ test_string = ''.join(map(compat_chr, range(len(example_sig)))) - cache_res = res(test_string) + cache_res = self._do_decrypt_signature(test_string, res) cache_spec = [ord(c) for c in cache_res] self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec) + """ return res + def _parse_sig_js(self, js_player): + shit_parser = re.search(r'[a-z]\=a\.split\((?:""|\'\')\);(([a-zA-Z]+).*);return a\.join', js_player) + if not shit_parser: + raise ExtractorError('Signature decryption code not found') + func, obfuscated_name = shit_parser.group(1, 2) + obfuscated_func = re.search(r'%s\s*=\s*{([\s\w(){}[\].,:;=%s]*?})};' % (re.escape(obfuscated_name), '%'), + js_player) + if not obfuscated_func: + raise ExtractorError('Signature decrypting deobfuscated functions not found') + obfuscated_stack = obfuscated_func.group(1) + obf_map = {} + for obffun in re.finditer(r'([a-zA-Z]{2}):function\(a(?:,b)?\){(.*?)}', obfuscated_stack): + obfname, obfval = obffun.group(1, 2) + if obfval == 'a.splice(0,b)': + obf_map[obfname] = 'splice' + elif obfval == 'a.reverse()': + obf_map[obfname] = 'reverse' + elif obfval == 'var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c': + obf_map[obfname] = 'mess' + decryptor_stack = [] + for instruction in re.finditer(r'%s\.([a-zA-Z]{2})\(a,(\d+)\);' % re.escape(obfuscated_name), + func): + obf_name, obf_arg = instruction.group(1, 2) + inst = obf_map.get(obf_name) + if inst == 'splice': + decryptor_stack.append(lambda a: a[:int(obf_arg)]) + elif inst == 'reverse': + decryptor_stack.append(lambda a: reversed(a)) + elif inst == 'mess': + decryptor_stack.append(lambda a: self.mess(a, int(obf_arg))) + return decryptor_stack + + def _do_decrypt_signature(self, sig, stack): + a = list(sig) + for fun in stack: + a = fun(a) + return ''.join(a) + def _print_sig_code(self, func, example_sig): def gen_sig_code(idxs): def _genslice(start, end, step): @@ -1249,37 +1298,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ' return %s\n') % (signature_id_tuple, expr_code) self.to_screen('Extracted signature function:\n' + code) - def mess(self,a,b): - c=a[0] - a[0]=a[b%len(a)] - a[b%len(a)]=c + def mess(self, a, b): + c = a[0] + a[0] = a[b % len(a)] + a[b % len(a)] = c return a - def _decrypt_signature(self, s): - """Turn the encrypted s field into a working signature - YouTube ignores this? It only matters on protected videos...""" - a=[char for char in s] - a=self.mess(a,67) - a=a[1:] - a=self.mess(a,49) - a=a[3:] - a=self.mess(a,52) - a.reverse() - a=a[1:] - a=self.mess(a,43) + def _decrypt_signature_protected(self, s): + a = list(s) + a = self.mess(a, 64) + a = self.mess(a, 1) + a = self.mess(a, 25) + a = self.mess(a, 70) a.reverse() + a = a[2:] return "".join(a) - def _decrypt_signature_protected(self, s): - a=[char for char in s] - a=self.mess(a,64) - a=self.mess(a,1) - a=self.mess(a,25) - a=self.mess(a,70) - a.reverse() - a=a[2:] - return "".join(a) - def _get_subtitles(self, video_id, webpage): try: subs_doc = self._download_xml( @@ -1680,8 +1714,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): data = compat_urllib_parse_urlencode({ 'video_id': video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id, -# 'sts': self._search_regex( - # r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''), + # 'sts': self._search_regex( + # r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''), }) video_info_url = proto + '://www.youtube.com/get_video_info?' + data try: @@ -1887,6 +1921,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'width': int_or_none(fmt.get('width')), } + sig_decrypt_stack = None for fmt in streaming_formats: if fmt.get('drmFamilies') or fmt.get('drm_families'): continue @@ -1917,11 +1952,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if cipher: if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True): ASSETS_RE = r'"jsUrl":"(/s/player/.*?/player_ias.vflset/.*?/base.js)' - + player_url = self._search_regex( ASSETS_RE, embed_webpage if age_gate else video_webpage, '', default=None) - + if not player_url and not age_gate: # We need the embed website after all if embed_webpage is None: @@ -1931,7 +1966,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): player_url = self._search_regex( ASSETS_RE, embed_webpage, 'JS player URL') - #if player_url is None: + # if player_url is None: # player_url_json = self._search_regex( # r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")', # video_webpage, 'age gate player URL') @@ -1946,14 +1981,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if player_url is None: player_desc = 'unknown' else: - player_type, player_version = self._extract_player_info(player_url) + player_version = self._extract_player_info(player_url) player_desc = 'html5 player %s' % player_version parts_sizes = self._signature_cache_id(encrypted_sig) self.to_screen('{%s} signature length %s, %s' % (format_id, parts_sizes, player_desc)) signature = self._decrypt_signature_protected(encrypted_sig) - + if not re.match(self._VALID_SIG_VALUE_RE, signature): + if self._downloader.params.get('verbose'): + self.to_screen("Built-in signature decryption failed") + if not sig_decrypt_stack: + sig_decrypt_stack = self._extract_signature_function(video_id, player_url, encrypted_sig) + signature = self._do_decrypt_signature(encrypted_sig, sig_decrypt_stack) + sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature' url += '&%s=%s' % (sp, signature) if 'ratebypass' not in url: From 8c0ff392eac6cd291989af114c8571e862d96811 Mon Sep 17 00:00:00 2001 From: selfisekai Date: Thu, 12 Nov 2020 06:31:11 +0100 Subject: [PATCH 2/3] [youtube] dynamic sig improvements --- haruhi_dl/extractor/youtube.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/haruhi_dl/extractor/youtube.py b/haruhi_dl/extractor/youtube.py index 73809b5c4..d29efa306 100644 --- a/haruhi_dl/extractor/youtube.py +++ b/haruhi_dl/extractor/youtube.py @@ -1222,7 +1222,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return res def _parse_sig_js(self, js_player): - shit_parser = re.search(r'[a-z]\=a\.split\((?:""|\'\')\);(([a-zA-Z]+).*);return a\.join', js_player) + shit_parser = re.search(r'[a-z]\=a\.split\((?:""|\'\')\);(([a-zA-Z_][a-zA-Z\d_]+).*);return a\.join', js_player) if not shit_parser: raise ExtractorError('Signature decryption code not found') func, obfuscated_name = shit_parser.group(1, 2) @@ -1232,31 +1232,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor): raise ExtractorError('Signature decrypting deobfuscated functions not found') obfuscated_stack = obfuscated_func.group(1) obf_map = {} - for obffun in re.finditer(r'([a-zA-Z]{2}):function\(a(?:,b)?\){(.*?)}', obfuscated_stack): + for obffun in re.finditer(r'([a-zA-Z_][a-zA-Z\d_]+):function\(a(?:,b)?\){(.*?)}', obfuscated_stack): obfname, obfval = obffun.group(1, 2) - if obfval == 'a.splice(0,b)': + if 'splice' in obfval: obf_map[obfname] = 'splice' - elif obfval == 'a.reverse()': + elif 'reverse' in obfval: obf_map[obfname] = 'reverse' - elif obfval == 'var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c': + elif 'var' in obfval and 'length' in obfval: obf_map[obfname] = 'mess' + else: + raise ExtractorError('Unknown obfuscation function type: %s.%s' % (obfuscated_name, obfname)) decryptor_stack = [] - for instruction in re.finditer(r'%s\.([a-zA-Z]{2})\(a,(\d+)\);' % re.escape(obfuscated_name), + for instruction in re.finditer(r'%s\.([a-zA-Z_][a-zA-Z\d_]+)\(a,(\d+)\);?' % re.escape(obfuscated_name), func): obf_name, obf_arg = instruction.group(1, 2) inst = obf_map.get(obf_name) - if inst == 'splice': - decryptor_stack.append(lambda a: a[:int(obf_arg)]) + self.to_screen('%s %s %s' % (obf_name, inst, obf_arg)) + if not inst: + raise ExtractorError('Unknown obfuscation function: %s.%s (1)' % (obfuscated_name, obf_name)) + elif inst == 'splice': + decryptor_stack.append(lambda a: a[int(obf_arg):]) elif inst == 'reverse': decryptor_stack.append(lambda a: reversed(a)) elif inst == 'mess': decryptor_stack.append(lambda a: self.mess(a, int(obf_arg))) + else: + raise ExtractorError('Unknown obfuscation function: %s.%s (2)' % (obfuscated_name, obf_name)) return decryptor_stack def _do_decrypt_signature(self, sig, stack): a = list(sig) for fun in stack: - a = fun(a) + a = list(fun(a)) return ''.join(a) def _print_sig_code(self, func, example_sig): From 0259a32b73c77d30d939fe8e7ce49bc83dd364cf Mon Sep 17 00:00:00 2001 From: selfisekai Date: Thu, 12 Nov 2020 11:02:07 +0100 Subject: [PATCH 3/3] [youtube] fix reworked sig decrypting --- haruhi_dl/extractor/youtube.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/haruhi_dl/extractor/youtube.py b/haruhi_dl/extractor/youtube.py index d29efa306..30b4a9596 100644 --- a/haruhi_dl/extractor/youtube.py +++ b/haruhi_dl/extractor/youtube.py @@ -1247,23 +1247,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor): func): obf_name, obf_arg = instruction.group(1, 2) inst = obf_map.get(obf_name) - self.to_screen('%s %s %s' % (obf_name, inst, obf_arg)) - if not inst: - raise ExtractorError('Unknown obfuscation function: %s.%s (1)' % (obfuscated_name, obf_name)) - elif inst == 'splice': - decryptor_stack.append(lambda a: a[int(obf_arg):]) - elif inst == 'reverse': - decryptor_stack.append(lambda a: reversed(a)) - elif inst == 'mess': - decryptor_stack.append(lambda a: self.mess(a, int(obf_arg))) + if self._downloader.params.get('verbose', True): + self.to_screen('sig %s %s %s' % (obf_name, inst, obf_arg)) + if inst: + decryptor_stack.append((inst, int(obf_arg) if inst != 'reverse' else None)) else: - raise ExtractorError('Unknown obfuscation function: %s.%s (2)' % (obfuscated_name, obf_name)) + raise ExtractorError('Unknown obfuscation function: %s.%s' % (obfuscated_name, obf_name)) return decryptor_stack def _do_decrypt_signature(self, sig, stack): a = list(sig) for fun in stack: - a = list(fun(a)) + if fun[0] == 'splice': + a = a[fun[1]:] + elif fun[0] == 'reverse': + a.reverse() + elif fun[0] == 'mess': + a = self.mess(a, fun[1]) + else: + raise ExtractorError('Unknown stack action: %s' % (fun[0])) return ''.join(a) def _print_sig_code(self, func, example_sig): @@ -1996,9 +1998,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): signature = self._decrypt_signature_protected(encrypted_sig) if not re.match(self._VALID_SIG_VALUE_RE, signature): - if self._downloader.params.get('verbose'): - self.to_screen("Built-in signature decryption failed") if not sig_decrypt_stack: + if self._downloader.params.get('verbose'): + self.to_screen("Built-in signature decryption failed, trying dynamic") sig_decrypt_stack = self._extract_signature_function(video_id, player_url, encrypted_sig) signature = self._do_decrypt_signature(encrypted_sig, sig_decrypt_stack)