prerelease artifact generator, for youtube sig
This commit is contained in:
parent
17436014c9
commit
5f8b81c6e7
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -15,6 +15,7 @@ haruhi-dl.1
|
||||||
haruhi-dl.bash-completion
|
haruhi-dl.bash-completion
|
||||||
haruhi-dl.fish
|
haruhi-dl.fish
|
||||||
haruhi_dl/extractor/lazy_extractors.py
|
haruhi_dl/extractor/lazy_extractors.py
|
||||||
|
haruhi_dl/extractor_artifacts/
|
||||||
haruhi-dl
|
haruhi-dl
|
||||||
haruhi-dl.exe
|
haruhi-dl.exe
|
||||||
haruhi-dl.tar.gz
|
haruhi-dl.tar.gz
|
||||||
|
|
31
devscripts/prerelease_codegen.py
Normal file
31
devscripts/prerelease_codegen.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
# this is intended to speed-up some extractors,
|
||||||
|
# which sometimes need to extract some data that doesn't change very much often,
|
||||||
|
# but it does on random times, like youtube's signature "crypto" or soundcloud's client id
|
||||||
|
|
||||||
|
import os
|
||||||
|
from os.path import dirname as dirn
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
||||||
|
|
||||||
|
from haruhi_dl import HaruhiDL
|
||||||
|
from haruhi_dl.utils import (
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
hdl = HaruhiDL(params={
|
||||||
|
'quiet': True,
|
||||||
|
})
|
||||||
|
artifact_dir = os.path.join(dirn(dirn((os.path.abspath(__file__)))), 'haruhi_dl', 'extractor_artifacts')
|
||||||
|
if not os.path.exists(artifact_dir):
|
||||||
|
os.mkdir(artifact_dir)
|
||||||
|
|
||||||
|
for ie_name in (
|
||||||
|
'Youtube',
|
||||||
|
):
|
||||||
|
ie = hdl.get_info_extractor(ie_name)
|
||||||
|
try:
|
||||||
|
file_contents = ie._generate_prerelease_file()
|
||||||
|
with open(os.path.join(artifact_dir, ie_name.lower() + '.py'), 'w') as file:
|
||||||
|
file.write(file_contents)
|
||||||
|
except ExtractorError as err:
|
||||||
|
print(err)
|
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import json
|
import json
|
||||||
import hashlib
|
import hashlib
|
||||||
|
from inspect import getsource
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
@ -45,6 +46,10 @@ from ..utils import (
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
|
from ..extractor_artifacts.youtube import _decrypt_signature_protected
|
||||||
|
except ImportError:
|
||||||
|
_decrypt_signature_protected = None
|
||||||
|
|
||||||
|
|
||||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
|
@ -901,7 +906,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||||
return id_m.group('id')
|
return id_m.group('id')
|
||||||
|
|
||||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
def _extract_signature_function(self, video_id, player_url):
|
||||||
player_id = self._extract_player_info(player_url)
|
player_id = self._extract_player_info(player_url)
|
||||||
|
|
||||||
# Read from filesystem cache
|
# Read from filesystem cache
|
||||||
|
@ -1012,31 +1017,44 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
' return %s\n') % (signature_id_tuple, expr_code)
|
' return %s\n') % (signature_id_tuple, expr_code)
|
||||||
self.to_screen('Extracted signature function:\n' + code)
|
self.to_screen('Extracted signature function:\n' + code)
|
||||||
|
|
||||||
def mess(self, a, b):
|
@staticmethod
|
||||||
|
def mess(a, b):
|
||||||
c = a[0]
|
c = a[0]
|
||||||
a[0] = a[b % len(a)]
|
a[0] = a[b % len(a)]
|
||||||
a[b % len(a)] = c
|
a[b % len(a)] = c
|
||||||
return a
|
return a
|
||||||
|
|
||||||
def _decrypt_signature_protected(self, s):
|
|
||||||
a = list(s)
|
|
||||||
a = self.mess(a, 49)
|
|
||||||
a = self.mess(a, 26)
|
|
||||||
a.reverse()
|
|
||||||
a = self.mess(a, 62)
|
|
||||||
a.reverse()
|
|
||||||
a = a[2:]
|
|
||||||
return "".join(a)
|
|
||||||
|
|
||||||
def _full_signature_handling(self, sig, player_url, video_id):
|
def _full_signature_handling(self, sig, player_url, video_id):
|
||||||
signature = self._decrypt_signature_protected(sig)
|
signature = _decrypt_signature_protected(sig)
|
||||||
if re.match(self._VALID_SIG_VALUE_RE, signature):
|
if re.match(self._VALID_SIG_VALUE_RE, signature):
|
||||||
return signature
|
return signature
|
||||||
if self._downloader.params.get('verbose'):
|
if self._downloader.params.get('verbose'):
|
||||||
self.to_screen("Built-in signature decryption failed, trying dynamic")
|
self.to_screen("Built-in signature decryption failed, trying dynamic")
|
||||||
sig_decrypt_stack = self._extract_signature_function(video_id, player_url, sig)
|
sig_decrypt_stack = self._extract_signature_function(video_id, player_url)
|
||||||
return self._do_decrypt_signature(sig, sig_decrypt_stack)
|
return self._do_decrypt_signature(sig, sig_decrypt_stack)
|
||||||
|
|
||||||
|
def _generate_prerelease_file(self):
|
||||||
|
# It's Monday, so I'm in a bad mood, but at least my sailor uniform is super cute!
|
||||||
|
video_id = 'ieQ1rAIjzXc'
|
||||||
|
self._set_consent()
|
||||||
|
webpage = self._download_webpage('https://www.youtube.com/watch?v=%s' % video_id, video_id)
|
||||||
|
player_url = self._search_regex(r'"jsUrl":"(/s/player/.*?/player_ias.vflset/.*?/base.js)', webpage, 'player url')
|
||||||
|
sig_decrypt_stack = self._extract_signature_function(video_id, player_url)
|
||||||
|
func = re.sub(r'(?m)^ ', '', getsource(self.mess).replace('@staticmethod', ''))
|
||||||
|
func += '\n\ndef _decrypt_signature_protected(sig):\n'
|
||||||
|
stack = ['a = list(sig)']
|
||||||
|
for fun in sig_decrypt_stack:
|
||||||
|
if fun[0] == 'splice':
|
||||||
|
stack.append(f'a = a[{fun[1]}:]')
|
||||||
|
elif fun[0] == 'reverse':
|
||||||
|
stack.append('a.reverse()')
|
||||||
|
elif fun[0] == 'mess':
|
||||||
|
stack.append(f'a = mess(a, {fun[1]})')
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unknown stack action: %s' % (fun[0]))
|
||||||
|
stack.append("return ''.join(a)")
|
||||||
|
return func + '\n'.join(map(lambda x: ' ' * 4 + x, stack)) + '\n'
|
||||||
|
|
||||||
def _get_subtitles(self, video_id, webpage):
|
def _get_subtitles(self, video_id, webpage):
|
||||||
try:
|
try:
|
||||||
subs_doc = self._download_xml(
|
subs_doc = self._download_xml(
|
||||||
|
|
Loading…
Reference in a new issue