prerelease artifact generator, for youtube sig

unlicense
Lauren Liberda 2021-06-21 23:01:02 +02:00 committed by Dominika
parent 17436014c9
commit 5f8b81c6e7
3 changed files with 64 additions and 14 deletions

1
.gitignore vendored
View File

@ -15,6 +15,7 @@ haruhi-dl.1
haruhi-dl.bash-completion
haruhi-dl.fish
haruhi_dl/extractor/lazy_extractors.py
haruhi_dl/extractor_artifacts/
haruhi-dl
haruhi-dl.exe
haruhi-dl.tar.gz

View File

@ -0,0 +1,31 @@
# this is intended to speed-up some extractors,
# which sometimes need to extract some data that doesn't change very much often,
# but it does on random times, like youtube's signature "crypto" or soundcloud's client id
import os
from os.path import dirname as dirn
import sys
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
from haruhi_dl import HaruhiDL
from haruhi_dl.utils import (
ExtractorError,
)
hdl = HaruhiDL(params={
'quiet': True,
})
artifact_dir = os.path.join(dirn(dirn((os.path.abspath(__file__)))), 'haruhi_dl', 'extractor_artifacts')
if not os.path.exists(artifact_dir):
os.mkdir(artifact_dir)
for ie_name in (
'Youtube',
):
ie = hdl.get_info_extractor(ie_name)
try:
file_contents = ie._generate_prerelease_file()
with open(os.path.join(artifact_dir, ie_name.lower() + '.py'), 'w') as file:
file.write(file_contents)
except ExtractorError as err:
print(err)

View File

@ -4,6 +4,7 @@ from __future__ import unicode_literals
from datetime import datetime
import json
import hashlib
from inspect import getsource
import random
import re
import time
@ -45,6 +46,10 @@ from ..utils import (
urlencode_postdata,
GeoRestrictedError,
)
try:
from ..extractor_artifacts.youtube import _decrypt_signature_protected
except ImportError:
_decrypt_signature_protected = None
class YoutubeBaseInfoExtractor(InfoExtractor):
@ -901,7 +906,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
raise ExtractorError('Cannot identify player %r' % player_url)
return id_m.group('id')
def _extract_signature_function(self, video_id, player_url, example_sig):
def _extract_signature_function(self, video_id, player_url):
player_id = self._extract_player_info(player_url)
# Read from filesystem cache
@ -1012,31 +1017,44 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
' return %s\n') % (signature_id_tuple, expr_code)
self.to_screen('Extracted signature function:\n' + code)
def mess(self, a, b):
@staticmethod
def mess(a, b):
c = a[0]
a[0] = a[b % len(a)]
a[b % len(a)] = c
return a
def _decrypt_signature_protected(self, s):
a = list(s)
a = self.mess(a, 49)
a = self.mess(a, 26)
a.reverse()
a = self.mess(a, 62)
a.reverse()
a = a[2:]
return "".join(a)
def _full_signature_handling(self, sig, player_url, video_id):
signature = self._decrypt_signature_protected(sig)
signature = _decrypt_signature_protected(sig)
if re.match(self._VALID_SIG_VALUE_RE, signature):
return signature
if self._downloader.params.get('verbose'):
self.to_screen("Built-in signature decryption failed, trying dynamic")
sig_decrypt_stack = self._extract_signature_function(video_id, player_url, sig)
sig_decrypt_stack = self._extract_signature_function(video_id, player_url)
return self._do_decrypt_signature(sig, sig_decrypt_stack)
def _generate_prerelease_file(self):
# It's Monday, so I'm in a bad mood, but at least my sailor uniform is super cute!
video_id = 'ieQ1rAIjzXc'
self._set_consent()
webpage = self._download_webpage('https://www.youtube.com/watch?v=%s' % video_id, video_id)
player_url = self._search_regex(r'"jsUrl":"(/s/player/.*?/player_ias.vflset/.*?/base.js)', webpage, 'player url')
sig_decrypt_stack = self._extract_signature_function(video_id, player_url)
func = re.sub(r'(?m)^ ', '', getsource(self.mess).replace('@staticmethod', ''))
func += '\n\ndef _decrypt_signature_protected(sig):\n'
stack = ['a = list(sig)']
for fun in sig_decrypt_stack:
if fun[0] == 'splice':
stack.append(f'a = a[{fun[1]}:]')
elif fun[0] == 'reverse':
stack.append('a.reverse()')
elif fun[0] == 'mess':
stack.append(f'a = mess(a, {fun[1]})')
else:
raise ExtractorError('Unknown stack action: %s' % (fun[0]))
stack.append("return ''.join(a)")
return func + '\n'.join(map(lambda x: ' ' * 4 + x, stack)) + '\n'
def _get_subtitles(self, video_id, webpage):
try:
subs_doc = self._download_xml(