From 6f876fba516e917e4d2f164e685647e614c7d4f8 Mon Sep 17 00:00:00 2001 From: selfisekai Date: Sun, 15 Nov 2020 23:08:28 +0100 Subject: [PATCH] [youtube] fixed some download tests --- haruhi_dl/extractor/youtube.py | 195 +++++++-------------------------- 1 file changed, 39 insertions(+), 156 deletions(-) diff --git a/haruhi_dl/extractor/youtube.py b/haruhi_dl/extractor/youtube.py index 6c8bd4abb..867c05f5c 100644 --- a/haruhi_dl/extractor/youtube.py +++ b/haruhi_dl/extractor/youtube.py @@ -391,7 +391,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'}, '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'}, - '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/haruhi-dl/issues/4559) + '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559) '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'}, '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'}, '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'}, @@ -461,16 +461,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': 'BaW_jenozKc', 'ext': 'mp4', - 'title': 'haruhi-dl test video "\'/\\Ƥā†­š•', + 'title': 'youtube-dl test video "\'/\\Ƥā†­š•', 'uploader': 'Philipp Hagemeister', 'uploader_id': 'phihag', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag', 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q', 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q', 'upload_date': '20121002', - 'description': 'test chars: "\'/\\Ƥā†­š•\ntest URL: https://github.com/rg3/haruhi-dl/issues/1892\n\nThis is a test video for haruhi-dl.\n\nFor more information, contact phihag@phihag.de .', + 'description': 'test chars: "\'/\\Ƥā†­š•\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', 'categories': ['Science & Technology'], - 'tags': ['haruhi-dl'], + 'tags': ['youtube-dl'], 'duration': 10, 'view_count': int, 'like_count': int, @@ -480,25 +480,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } }, { - 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY', - 'note': 'Test generic use_cipher_signature video (#897)', + 'url': 'https://www.youtube.com/watch?v=MURua52_YPg', + 'note': 'cipher "encrypted" signature', 'info_dict': { - 'id': 'UxxajLWwzqY', + 'id': 'MURua52_YPg', 'ext': 'mp4', - 'upload_date': '20120506', - 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', - 'alt_title': 'I Love It (feat. Charli XCX)', - 'description': 'md5:19a2f98d9032b9311e686ed039564f63', - 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli', - 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop', - 'iconic ep', 'iconic', 'love', 'it'], - 'duration': 180, - 'uploader': 'Icona Pop', - 'uploader_id': 'IconaPop', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop', - 'creator': 'Icona Pop', - 'track': 'I Love It (feat. Charli XCX)', - 'artist': 'Icona Pop', + 'upload_date': '20200418', + 'title': 'My Ordinary Life', + 'description': 'Provided to YouTube by Fandalism\n\nMy Ordinary Life Ā· The Living Tombstone\n\nMy Ordinary Life\n\nā„— The Living Tombstone\n\nReleased on: 2017-11-23\n\nAuto-generated by YouTube.', + 'duration': 231, + 'uploader': 'The Living Tombstone - Topic', + 'uploader_id': 'UC5EH9egdct4dmAo3AHwzPBA', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC5EH9egdct4dmAo3AHwzPBA', + 'creator': 'The Living Tombstone', + 'track': 'My Ordinary Life', + 'artist': 'The Living Tombstone', } }, { @@ -509,30 +505,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'upload_date': '20130703', 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)', - 'alt_title': 'Tunnel Vision', - 'description': 'md5:07dab3356cde4199048e4c7cd93471e1', + 'description': 'md5:fc2016d319b50b68c5541c3fd66ffd16', 'duration': 419, 'uploader': 'justintimberlakeVEVO', 'uploader_id': 'justintimberlakeVEVO', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO', - 'creator': 'Justin Timberlake', - 'track': 'Tunnel Vision', - 'artist': 'Justin Timberlake', - 'age_limit': 18, - } - }, - { - 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ', - 'note': 'Embed-only video (#1746)', - 'info_dict': { - 'id': 'yZIXLfi8CZQ', - 'ext': 'mp4', - 'upload_date': '20120608', - 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012', - 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7', - 'uploader': 'SET India', - 'uploader_id': 'setindia', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia', 'age_limit': 18, } }, @@ -542,14 +519,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': 'BaW_jenozKc', 'ext': 'mp4', - 'title': 'haruhi-dl test video "\'/\\Ƥā†­š•', + 'title': 'youtube-dl test video "\'/\\Ƥā†­š•', 'uploader': 'Philipp Hagemeister', 'uploader_id': 'phihag', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag', 'upload_date': '20121002', - 'description': 'test chars: "\'/\\Ƥā†­š•\ntest URL: https://github.com/rg3/haruhi-dl/issues/1892\n\nThis is a test video for haruhi-dl.\n\nFor more information, contact phihag@phihag.de .', + 'description': 'test chars: "\'/\\Ƥā†­š•\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', 'categories': ['Science & Technology'], - 'tags': ['haruhi-dl'], + 'tags': ['youtube-dl'], 'duration': 10, 'view_count': int, 'like_count': int, @@ -584,7 +561,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': 'nfWlot6h_JM', 'ext': 'm4a', 'title': 'Taylor Swift - Shake It Off', - 'description': 'md5:307195cd21ff7fa352270fe884570ef0', + 'description': 'md5:9dc0bd58efe700594b54f7d82bed0bac', 'duration': 242, 'uploader': 'TaylorSwiftVEVO', 'uploader_id': 'TaylorSwiftVEVO', @@ -595,21 +572,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'format': '141/bestaudio[ext=m4a]', }, }, - # Controversy video - { - 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8', - 'info_dict': { - 'id': 'T4XJQO3qol8', - 'ext': 'mp4', - 'duration': 219, - 'upload_date': '20100909', - 'uploader': 'Amazing Atheist', - 'uploader_id': 'TheAmazingAtheist', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist', - 'title': 'Burning Everyone\'s Koran', - 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html', - } - }, # Normal age-gate video (No vevo, embed allowed) { 'url': 'https://youtube.com/watch?v=HtVdAasjOgU', @@ -642,7 +604,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'age_limit': 18, }, }, - # video_info is None (https://github.com/ytdl-org/haruhi-dl/issues/4421) + # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421) # YouTube Red ad is not captured for creator { 'url': '__2ABJjxzNo', @@ -653,17 +615,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20100430', 'uploader_id': 'deadmau5', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5', - 'creator': 'Dada Life, deadmau5', - 'description': 'md5:12c56784b8032162bb936a5f76d55360', + 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336', 'uploader': 'deadmau5', 'title': 'Deadmau5 - Some Chords (HD)', - 'alt_title': 'This Machine Kills Some Chords', }, 'expected_warnings': [ 'DASH manifest missing', ] }, - # Olympics (https://github.com/ytdl-org/haruhi-dl/issues/4431) + # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431) { 'url': 'lqQg6PlCWgI', 'info_dict': { @@ -697,82 +657,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': '[A-made] č®Šę…‹å¦å­—å¹•ē‰ˆ å¤Ŗ妍 ęˆ‘å°±ę˜Æ這ęØ£ēš„äŗŗ', }, }, - { - # Multifeed videos (multiple cameras), URL is for Main Camera - 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs', - 'info_dict': { - 'id': 'jqWvoWXjCVs', - 'title': 'teamPGP: Rocket League Noob Stream', - 'description': 'md5:dc7872fb300e143831327f1bae3af010', - }, - 'playlist': [{ - 'info_dict': { - 'id': 'jqWvoWXjCVs', - 'ext': 'mp4', - 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)', - 'description': 'md5:dc7872fb300e143831327f1bae3af010', - 'duration': 7335, - 'upload_date': '20150721', - 'uploader': 'Beer Games Beer', - 'uploader_id': 'beergamesbeer', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', - 'license': 'Standard YouTube License', - }, - }, { - 'info_dict': { - 'id': '6h8e8xoXJzg', - 'ext': 'mp4', - 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)', - 'description': 'md5:dc7872fb300e143831327f1bae3af010', - 'duration': 7337, - 'upload_date': '20150721', - 'uploader': 'Beer Games Beer', - 'uploader_id': 'beergamesbeer', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', - 'license': 'Standard YouTube License', - }, - }, { - 'info_dict': { - 'id': 'PUOgX5z9xZw', - 'ext': 'mp4', - 'title': 'teamPGP: Rocket League Noob Stream (grizzle)', - 'description': 'md5:dc7872fb300e143831327f1bae3af010', - 'duration': 7337, - 'upload_date': '20150721', - 'uploader': 'Beer Games Beer', - 'uploader_id': 'beergamesbeer', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', - 'license': 'Standard YouTube License', - }, - }, { - 'info_dict': { - 'id': 'teuwxikvS5k', - 'ext': 'mp4', - 'title': 'teamPGP: Rocket League Noob Stream (zim)', - 'description': 'md5:dc7872fb300e143831327f1bae3af010', - 'duration': 7334, - 'upload_date': '20150721', - 'uploader': 'Beer Games Beer', - 'uploader_id': 'beergamesbeer', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', - 'license': 'Standard YouTube License', - }, - }], - 'params': { - 'skip_download': True, - }, - 'skip': 'This video is not available.', - }, - { - # Multifeed video with comma in title (see https://github.com/ytdl-org/haruhi-dl/issues/8536) - 'url': 'https://www.youtube.com/watch?v=gVfLd0zhdlo', - 'info_dict': { - 'id': 'gVfLd0zhdlo', - 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30', - }, - 'playlist_count': 2, - 'skip': 'Not multifeed anymore', - }, { 'url': 'https://vid.plus/FlRa-iH7PGw', 'only_matching': True, @@ -782,16 +666,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'only_matching': True, }, { - # Title with JS-like syntax "};" (see https://github.com/ytdl-org/haruhi-dl/issues/7468) + # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468) # Also tests cut-off URL expansion in video description (see - # https://github.com/ytdl-org/haruhi-dl/issues/1892, - # https://github.com/ytdl-org/haruhi-dl/issues/8164) + # https://github.com/ytdl-org/youtube-dl/issues/1892, + # https://github.com/ytdl-org/youtube-dl/issues/8164) 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg', 'info_dict': { 'id': 'lsguqyKfVQg', 'ext': 'mp4', 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21', - 'alt_title': 'Dark Walk - Position Music', 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a', 'duration': 133, 'upload_date': '20151119', @@ -808,7 +691,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, }, { - # Tags with '};' (see https://github.com/ytdl-org/haruhi-dl/issues/7468) + # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468) 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8', 'only_matching': True, }, @@ -855,7 +738,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'only_matching': True, }, { - # YouTube Red paid video (https://github.com/ytdl-org/haruhi-dl/issues/10059) + # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059) 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo', 'only_matching': True, }, @@ -1270,8 +1153,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # regex won't capture the whole JSON. Yet working around by trying more # concrete regex first keeping in mind proper quoted string handling # to be implemented in future that will replace this workaround (see - # https://github.com/ytdl-org/haruhi-dl/issues/7468, - # https://github.com/ytdl-org/haruhi-dl/pull/7599) + # https://github.com/ytdl-org/youtube-dl/issues/7468, + # https://github.com/ytdl-org/youtube-dl/pull/7599) r';ytplayer\.config\s*=\s*({.+?});ytplayer', r';ytplayer\.config\s*=\s*({.+?});', ) @@ -1660,7 +1543,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): add_dash_mpd(video_info) # Rental video is not rented but preview is available (e.g. # https://www.youtube.com/watch?v=yYr8q0y5Jfg, - # https://github.com/ytdl-org/haruhi-dl/issues/10532) + # https://github.com/ytdl-org/youtube-dl/issues/10532) if not video_info and args.get('ypc_vid'): return self.url_result( args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid']) @@ -1671,7 +1554,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not player_response: player_response = extract_player_response( self._search_regex( - r'(?:window(?:\["|\.)|var )ytInitialPlayerResponse(?:"])?\s*=\s*({.+});', + r'(?:window(?:\["|\.)|var )ytInitialPlayerResponse(?:"])?\s*=\s*({.+?(?!\\)});', video_webpage, 'ytInitialPlayerResponse', fatal=False), video_id) if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): add_dash_mpd_pr(player_response) @@ -1750,7 +1633,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for feed in multifeed_metadata_list.split(','): # Unquote should take place before split on comma (,) since textual # fields may contain comma as well (see - # https://github.com/ytdl-org/haruhi-dl/issues/8536) + # https://github.com/ytdl-org/youtube-dl/issues/8536) feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed)) def feed_entry(name): @@ -1791,7 +1674,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Check for "rental" videos if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: - raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/haruhi-dl/issues/359 for more information.', expected=True) + raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True) def _extract_filesize(media_url): return int_or_none(self._search_regex( @@ -1811,7 +1694,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1): encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0] if 'rtmpe%3Dyes' in encoded_url_map: - raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/haruhi-dl/issues/343 for more information.', expected=True) + raise ExtractorError('rtmpe downloads are not supported, see https://web.archive.org/web/20200923164854/https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True) formats = [] formats_spec = {} fmt_list = video_info.get('fmt_list', [''])[0] @@ -1933,7 +1816,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): dct.update(formats_spec[format_id]) # Some itags are not included in DASH manifest thus corresponding formats will - # lack metadata (see https://github.com/ytdl-org/haruhi-dl/pull/5993). + # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993). # Trying to extract metadata from url_encoded_fmt_stream_map entry. mobj = re.search(r'^(?P\d+)[xX](?P\d+)$', url_data.get('size', [''])[0]) width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None) @@ -2269,7 +2152,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Remove the formats we found through non-DASH, they # contain less info and it can be wrong, because we use # fixed values (for example the resolution). See - # https://github.com/ytdl-org/haruhi-dl/issues/5774 for an + # https://github.com/ytdl-org/youtube-dl/issues/5774 for an # example. formats = [f for f in formats if f['format_id'] not in dash_formats.keys()] formats.extend(dash_formats.values())