prerelease artifact generator, for youtube sig
parent
17436014c9
commit
5f8b81c6e7
|
@ -15,6 +15,7 @@ haruhi-dl.1
|
|||
haruhi-dl.bash-completion
|
||||
haruhi-dl.fish
|
||||
haruhi_dl/extractor/lazy_extractors.py
|
||||
haruhi_dl/extractor_artifacts/
|
||||
haruhi-dl
|
||||
haruhi-dl.exe
|
||||
haruhi-dl.tar.gz
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
# this is intended to speed-up some extractors,
|
||||
# which sometimes need to extract some data that doesn't change very much often,
|
||||
# but it does on random times, like youtube's signature "crypto" or soundcloud's client id
|
||||
|
||||
import os
|
||||
from os.path import dirname as dirn
|
||||
import sys
|
||||
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
||||
|
||||
from haruhi_dl import HaruhiDL
|
||||
from haruhi_dl.utils import (
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
hdl = HaruhiDL(params={
|
||||
'quiet': True,
|
||||
})
|
||||
artifact_dir = os.path.join(dirn(dirn((os.path.abspath(__file__)))), 'haruhi_dl', 'extractor_artifacts')
|
||||
if not os.path.exists(artifact_dir):
|
||||
os.mkdir(artifact_dir)
|
||||
|
||||
for ie_name in (
|
||||
'Youtube',
|
||||
):
|
||||
ie = hdl.get_info_extractor(ie_name)
|
||||
try:
|
||||
file_contents = ie._generate_prerelease_file()
|
||||
with open(os.path.join(artifact_dir, ie_name.lower() + '.py'), 'w') as file:
|
||||
file.write(file_contents)
|
||||
except ExtractorError as err:
|
||||
print(err)
|
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
|||
from datetime import datetime
|
||||
import json
|
||||
import hashlib
|
||||
from inspect import getsource
|
||||
import random
|
||||
import re
|
||||
import time
|
||||
|
@ -45,6 +46,10 @@ from ..utils import (
|
|||
urlencode_postdata,
|
||||
GeoRestrictedError,
|
||||
)
|
||||
try:
|
||||
from ..extractor_artifacts.youtube import _decrypt_signature_protected
|
||||
except ImportError:
|
||||
_decrypt_signature_protected = None
|
||||
|
||||
|
||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||
|
@ -901,7 +906,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||
return id_m.group('id')
|
||||
|
||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
||||
def _extract_signature_function(self, video_id, player_url):
|
||||
player_id = self._extract_player_info(player_url)
|
||||
|
||||
# Read from filesystem cache
|
||||
|
@ -1012,31 +1017,44 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
' return %s\n') % (signature_id_tuple, expr_code)
|
||||
self.to_screen('Extracted signature function:\n' + code)
|
||||
|
||||
def mess(self, a, b):
|
||||
@staticmethod
|
||||
def mess(a, b):
|
||||
c = a[0]
|
||||
a[0] = a[b % len(a)]
|
||||
a[b % len(a)] = c
|
||||
return a
|
||||
|
||||
def _decrypt_signature_protected(self, s):
|
||||
a = list(s)
|
||||
a = self.mess(a, 49)
|
||||
a = self.mess(a, 26)
|
||||
a.reverse()
|
||||
a = self.mess(a, 62)
|
||||
a.reverse()
|
||||
a = a[2:]
|
||||
return "".join(a)
|
||||
|
||||
def _full_signature_handling(self, sig, player_url, video_id):
|
||||
signature = self._decrypt_signature_protected(sig)
|
||||
signature = _decrypt_signature_protected(sig)
|
||||
if re.match(self._VALID_SIG_VALUE_RE, signature):
|
||||
return signature
|
||||
if self._downloader.params.get('verbose'):
|
||||
self.to_screen("Built-in signature decryption failed, trying dynamic")
|
||||
sig_decrypt_stack = self._extract_signature_function(video_id, player_url, sig)
|
||||
sig_decrypt_stack = self._extract_signature_function(video_id, player_url)
|
||||
return self._do_decrypt_signature(sig, sig_decrypt_stack)
|
||||
|
||||
def _generate_prerelease_file(self):
|
||||
# It's Monday, so I'm in a bad mood, but at least my sailor uniform is super cute!
|
||||
video_id = 'ieQ1rAIjzXc'
|
||||
self._set_consent()
|
||||
webpage = self._download_webpage('https://www.youtube.com/watch?v=%s' % video_id, video_id)
|
||||
player_url = self._search_regex(r'"jsUrl":"(/s/player/.*?/player_ias.vflset/.*?/base.js)', webpage, 'player url')
|
||||
sig_decrypt_stack = self._extract_signature_function(video_id, player_url)
|
||||
func = re.sub(r'(?m)^ ', '', getsource(self.mess).replace('@staticmethod', ''))
|
||||
func += '\n\ndef _decrypt_signature_protected(sig):\n'
|
||||
stack = ['a = list(sig)']
|
||||
for fun in sig_decrypt_stack:
|
||||
if fun[0] == 'splice':
|
||||
stack.append(f'a = a[{fun[1]}:]')
|
||||
elif fun[0] == 'reverse':
|
||||
stack.append('a.reverse()')
|
||||
elif fun[0] == 'mess':
|
||||
stack.append(f'a = mess(a, {fun[1]})')
|
||||
else:
|
||||
raise ExtractorError('Unknown stack action: %s' % (fun[0]))
|
||||
stack.append("return ''.join(a)")
|
||||
return func + '\n'.join(map(lambda x: ' ' * 4 + x, stack)) + '\n'
|
||||
|
||||
def _get_subtitles(self, video_id, webpage):
|
||||
try:
|
||||
subs_doc = self._download_xml(
|
||||
|
|
Loading…
Reference in New Issue