From 7fb9bc8b62e79518f9c43827cfe62573a3a61410 Mon Sep 17 00:00:00 2001 From: Dominika Date: Sun, 1 Nov 2020 22:24:53 +0100 Subject: [PATCH] removed useless code, fixed URLs, fixed like/dislike count in youtube extractor --- haruhi_dl/extractor/br.py | 4 - haruhi_dl/extractor/youtube.py | 184 +++------------------------------ haruhi_dl/utils.py | 4 +- 3 files changed, 14 insertions(+), 178 deletions(-) diff --git a/haruhi_dl/extractor/br.py b/haruhi_dl/extractor/br.py index 9bde7f2d8..a11178351 100644 --- a/haruhi_dl/extractor/br.py +++ b/haruhi_dl/extractor/br.py @@ -113,10 +113,6 @@ class BRIE(InfoExtractor): media['upload_date'] = ''.join(reversed(broadcast_date.split('.'))) medias.append(media) - if len(medias) > 1: - self._downloader.report_warning( - 'found multiple medias; please ' - 'report this with the video URL to http://yt-dl.org/bug') if not medias: raise ExtractorError('No media entries found') return medias[0] diff --git a/haruhi_dl/extractor/youtube.py b/haruhi_dl/extractor/youtube.py index 30f7025e9..a3cb77140 100644 --- a/haruhi_dl/extractor/youtube.py +++ b/haruhi_dl/extractor/youtube.py @@ -50,13 +50,6 @@ from ..utils import ( urlencode_postdata, ) -# am not a professional coder, this codebase can go to hell -def mess(a,b): - c=a[0] - a[0]=a[b%len(a)] - a[b%len(a)]=c - return a - class YoutubeBaseInfoExtractor(InfoExtractor): """Provide base functions for Youtube extractors""" _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' @@ -660,25 +653,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip_download': True, }, }, - { - 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I', - 'note': '256k DASH audio (format 141) via DASH manifest', - 'info_dict': { - 'id': 'a9LDPn-MO4I', - 'ext': 'm4a', - 'upload_date': '20121002', - 'uploader_id': '8KVIDEO', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO', - 'description': '', - 'uploader': '8KVIDEO', - 'title': 'UHDTV TEST 8K VIDEO.mp4' - }, - 'params': { - 'youtube_include_dash_manifest': True, - 'format': '141', - }, - 'skip': 'format 141 not served anymore', - }, # DASH manifest with encrypted signature { 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA', @@ -817,59 +791,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人', }, }, - # url_encoded_fmt_stream_map is empty string - { - 'url': 'qEJwOuvDf7I', - 'info_dict': { - 'id': 'qEJwOuvDf7I', - 'ext': 'webm', - 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге', - 'description': '', - 'upload_date': '20150404', - 'uploader_id': 'spbelect', - 'uploader': 'Наблюдатели Петербурга', - }, - 'params': { - 'skip_download': 'requires avconv', - }, - 'skip': 'This live event has ended.', - }, - # Extraction from multiple DASH manifests (https://github.com/ytdl-org/haruhi-dl/pull/6097) - { - 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y', - 'info_dict': { - 'id': 'FIl7x6_3R5Y', - 'ext': 'webm', - 'title': 'md5:7b81415841e02ecd4313668cde88737a', - 'description': 'md5:116377fd2963b81ec4ce64b542173306', - 'duration': 220, - 'upload_date': '20150625', - 'uploader_id': 'dorappi2000', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000', - 'uploader': 'dorappi2000', - 'formats': 'mincount:31', - }, - 'skip': 'not actual anymore', - }, - # DASH manifest with segment_list - { - 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8', - 'md5': '8ce563a1d667b599d21064e982ab9e31', - 'info_dict': { - 'id': 'CsmdDsKjzN8', - 'ext': 'mp4', - 'upload_date': '20150501', # According to '[a-zA-Z0-9$]+)\(', - r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', - r'(?:\b|[^a-zA-Z0-9$])(?P[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', - r'(?P[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', - # Obsolete patterns - r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', - r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', - r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P[a-zA-Z0-9$]+)\(', - r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', - r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(', - r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\('), - jscode, 'Initial JS player signature function name', group='sig') - - jsi = JSInterpreter(jscode) - initial_function = jsi.extract_function(funcname) - return lambda s: initial_function([s]) - - def _parse_sig_swf(self, file_contents): - swfi = SWFInterpreter(file_contents) - TARGET_CLASSNAME = 'SignatureDecipher' - searched_class = swfi.extract_class(TARGET_CLASSNAME) - initial_function = swfi.extract_function(searched_class, 'decipher') - return lambda s: initial_function([s]) + def mess(self,a,b): + c=a[0] + a[0]=a[b%len(a)] + a[b%len(a)]=c + return a def _decrypt_signature(self, s, video_id, player_url, age_gate=False): """Turn the encrypted s field into a working signature""" a=[char for char in s] - a=mess(a,67) + a=self.mess(a,67) a=a[1:] - a=mess(a,49) + a=self.mess(a,49) a=a[3:] - a=mess(a,52) + a=self.mess(a,52) a.reverse() a=a[1:] - a=mess(a,43) + a=self.mess(a,43) a.reverse() return "".join(a) - def _get_subtitles(self, video_id, webpage): try: @@ -2379,12 +2219,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_count(count_name): return str_to_int(self._search_regex( - r'-%s-button[^>]+>]+class="yt-uix-button-content"[^>]*>([\d,]+)' + r'"label":"([0-9,]+) %s' % re.escape(count_name), video_webpage, count_name, default=None)) - like_count = _extract_count('like') - dislike_count = _extract_count('dislike') + like_count = _extract_count('likes') + dislike_count = _extract_count('dislikes') if view_count is None: view_count = str_to_int(self._search_regex( diff --git a/haruhi_dl/utils.py b/haruhi_dl/utils.py index db580143d..12cab7734 100644 --- a/haruhi_dl/utils.py +++ b/haruhi_dl/utils.py @@ -2319,8 +2319,8 @@ def bug_reports_message(): if ytdl_is_updateable(): update_cmd = 'type haruhi-dl -U to update' else: - update_cmd = 'see https://yt-dl.org/update on how to update' - msg = '; please report this issue on https://yt-dl.org/bug .' + update_cmd = 'see https://haruhi.download/update on how to update' + msg = '; please report this issue on https://haruhi.download/bug .' msg += ' Make sure you are using the latest version; %s.' % update_cmd msg += ' Be sure to call haruhi-dl with the --verbose flag and include its complete output.' return msg