prerelease artifact generator, for youtube sig

This commit is contained in:
Lauren Liberda 2021-06-21 23:01:02 +02:00 committed by Dominika
parent 17436014c9
commit 5f8b81c6e7
3 changed files with 64 additions and 14 deletions

1
.gitignore vendored
View file

@ -15,6 +15,7 @@ haruhi-dl.1
haruhi-dl.bash-completion haruhi-dl.bash-completion
haruhi-dl.fish haruhi-dl.fish
haruhi_dl/extractor/lazy_extractors.py haruhi_dl/extractor/lazy_extractors.py
haruhi_dl/extractor_artifacts/
haruhi-dl haruhi-dl
haruhi-dl.exe haruhi-dl.exe
haruhi-dl.tar.gz haruhi-dl.tar.gz

View file

@ -0,0 +1,31 @@
# this is intended to speed-up some extractors,
# which sometimes need to extract some data that doesn't change very much often,
# but it does on random times, like youtube's signature "crypto" or soundcloud's client id
import os
from os.path import dirname as dirn
import sys
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
from haruhi_dl import HaruhiDL
from haruhi_dl.utils import (
ExtractorError,
)
hdl = HaruhiDL(params={
'quiet': True,
})
artifact_dir = os.path.join(dirn(dirn((os.path.abspath(__file__)))), 'haruhi_dl', 'extractor_artifacts')
if not os.path.exists(artifact_dir):
os.mkdir(artifact_dir)
for ie_name in (
'Youtube',
):
ie = hdl.get_info_extractor(ie_name)
try:
file_contents = ie._generate_prerelease_file()
with open(os.path.join(artifact_dir, ie_name.lower() + '.py'), 'w') as file:
file.write(file_contents)
except ExtractorError as err:
print(err)

View file

@ -4,6 +4,7 @@ from __future__ import unicode_literals
from datetime import datetime from datetime import datetime
import json import json
import hashlib import hashlib
from inspect import getsource
import random import random
import re import re
import time import time
@ -45,6 +46,10 @@ from ..utils import (
urlencode_postdata, urlencode_postdata,
GeoRestrictedError, GeoRestrictedError,
) )
try:
from ..extractor_artifacts.youtube import _decrypt_signature_protected
except ImportError:
_decrypt_signature_protected = None
class YoutubeBaseInfoExtractor(InfoExtractor): class YoutubeBaseInfoExtractor(InfoExtractor):
@ -901,7 +906,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
raise ExtractorError('Cannot identify player %r' % player_url) raise ExtractorError('Cannot identify player %r' % player_url)
return id_m.group('id') return id_m.group('id')
def _extract_signature_function(self, video_id, player_url, example_sig): def _extract_signature_function(self, video_id, player_url):
player_id = self._extract_player_info(player_url) player_id = self._extract_player_info(player_url)
# Read from filesystem cache # Read from filesystem cache
@ -1012,31 +1017,44 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
' return %s\n') % (signature_id_tuple, expr_code) ' return %s\n') % (signature_id_tuple, expr_code)
self.to_screen('Extracted signature function:\n' + code) self.to_screen('Extracted signature function:\n' + code)
def mess(self, a, b): @staticmethod
def mess(a, b):
c = a[0] c = a[0]
a[0] = a[b % len(a)] a[0] = a[b % len(a)]
a[b % len(a)] = c a[b % len(a)] = c
return a return a
def _decrypt_signature_protected(self, s):
a = list(s)
a = self.mess(a, 49)
a = self.mess(a, 26)
a.reverse()
a = self.mess(a, 62)
a.reverse()
a = a[2:]
return "".join(a)
def _full_signature_handling(self, sig, player_url, video_id): def _full_signature_handling(self, sig, player_url, video_id):
signature = self._decrypt_signature_protected(sig) signature = _decrypt_signature_protected(sig)
if re.match(self._VALID_SIG_VALUE_RE, signature): if re.match(self._VALID_SIG_VALUE_RE, signature):
return signature return signature
if self._downloader.params.get('verbose'): if self._downloader.params.get('verbose'):
self.to_screen("Built-in signature decryption failed, trying dynamic") self.to_screen("Built-in signature decryption failed, trying dynamic")
sig_decrypt_stack = self._extract_signature_function(video_id, player_url, sig) sig_decrypt_stack = self._extract_signature_function(video_id, player_url)
return self._do_decrypt_signature(sig, sig_decrypt_stack) return self._do_decrypt_signature(sig, sig_decrypt_stack)
def _generate_prerelease_file(self):
# It's Monday, so I'm in a bad mood, but at least my sailor uniform is super cute!
video_id = 'ieQ1rAIjzXc'
self._set_consent()
webpage = self._download_webpage('https://www.youtube.com/watch?v=%s' % video_id, video_id)
player_url = self._search_regex(r'"jsUrl":"(/s/player/.*?/player_ias.vflset/.*?/base.js)', webpage, 'player url')
sig_decrypt_stack = self._extract_signature_function(video_id, player_url)
func = re.sub(r'(?m)^ ', '', getsource(self.mess).replace('@staticmethod', ''))
func += '\n\ndef _decrypt_signature_protected(sig):\n'
stack = ['a = list(sig)']
for fun in sig_decrypt_stack:
if fun[0] == 'splice':
stack.append(f'a = a[{fun[1]}:]')
elif fun[0] == 'reverse':
stack.append('a.reverse()')
elif fun[0] == 'mess':
stack.append(f'a = mess(a, {fun[1]})')
else:
raise ExtractorError('Unknown stack action: %s' % (fun[0]))
stack.append("return ''.join(a)")
return func + '\n'.join(map(lambda x: ' ' * 4 + x, stack)) + '\n'
def _get_subtitles(self, video_id, webpage): def _get_subtitles(self, video_id, webpage):
try: try:
subs_doc = self._download_xml( subs_doc = self._download_xml(