youtube.py 129 KB
Newer Older
1
# coding: utf-8
Philipp Hagemeister's avatar
Philipp Hagemeister committed
2
3
from __future__ import unicode_literals

4
from datetime import datetime
5
import json
6
import hashlib
7
import random
8
import re
9
import time
10

11
from .common import InfoExtractor, SearchInfoExtractor
Philipp Hagemeister's avatar
Philipp Hagemeister committed
12
from ..compat import (
13
    compat_chr,
14
    compat_kwargs,
15
    compat_parse_qs,
16
17
    compat_urllib_parse_unquote,
    compat_urllib_parse_unquote_plus,
18
    compat_urllib_parse_urlencode,
19
    compat_urllib_parse_urlparse,
20
    compat_urlparse,
21
    compat_str,
Philipp Hagemeister's avatar
Philipp Hagemeister committed
22
23
)
from ..utils import (
24
    bool_or_none,
25
    clean_html,
26
    error_to_compat_str,
27
    ExtractorError,
28
    float_or_none,
Philipp Hagemeister's avatar
Philipp Hagemeister committed
29
    get_element_by_id,
30
    int_or_none,
31
    mimetype2ext,
32
    parse_codecs,
33
    parse_duration,
34
    remove_quotes,
Sergey M․'s avatar
Sergey M․ committed
35
    remove_start,
36
    smuggle_url,
37
    str_or_none,
38
    str_to_int,
39
    try_get,
40
41
    unescapeHTML,
    unified_strdate,
42
    unsmuggle_url,
43
    uppercase_escape,
44
    url_or_none,
45
    urlencode_postdata,
46
    GeoRestrictedError,
47
48
)

49

50
class YoutubeBaseInfoExtractor(InfoExtractor):
51
52
    """Provide base functions for Youtube extractors"""
    _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
53
    _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
54
55

    _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
Sergey M․'s avatar
Sergey M․ committed
56
57
    _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
    _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
58

59
60
61
62
    _NETRC_MACHINE = 'youtube'
    # If True it will raise an error if no login info is provided
    _LOGIN_REQUIRED = False

63
    _PLAYLIST_ID_RE = r'(?:LL|WL|(?:PL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,})'
64

65
66
    _YOUTUBE_CLIENT_HEADERS = {
        'x-youtube-client-name': '1',
67
        'x-youtube-client-version': '2.20201112.04.01',
68
69
    }

70
71
    _YOUTUBE_API_KEY = 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'

72
    def _set_consent(self):
73
        self._set_cookie(
74
            '.youtube.com', 'CONSENT', 'YES+0x557755',
75
            expire_time=time.time() + 2 * 30 * 24 * 3600)
76

77
78
79
80
81
    def _ids_to_results(self, ids):
        return [
            self.url_result(vid_id, 'Youtube', video_id=vid_id)
            for vid_id in ids]

82
    def _login(self):
83
84
85
86
87
88
89
        """
        Attempt to log in to YouTube.
        True is returned if successful or skipped.
        False is returned if login failed.

        If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
        """
90
        username, password = self._get_login_info()
91
92
        # No authentication to be performed
        if username is None:
93
            if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
94
                raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
95
            return True
96

97
98
        login_page = self._download_webpage(
            self._LOGIN_URL, None,
99
100
            note='Downloading login page',
            errnote='unable to fetch login page', fatal=False)
101
102
        if login_page is False:
            return
103

104
        login_form = self._hidden_inputs(login_page)
105

106
107
108
109
110
111
112
113
        def req(url, f_req, note, errnote):
            data = login_form.copy()
            data.update({
                'pstMsg': 1,
                'checkConnection': 'youtube',
                'checkedDomains': 'youtube',
                'hl': 'en',
                'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
Sergey M․'s avatar
Sergey M․ committed
114
                'f.req': json.dumps(f_req),
115
116
                'flowName': 'GlifWebSignIn',
                'flowEntry': 'ServiceLogin',
117
118
                # TODO: reverse actual botguard identifier generation algo
                'bgRequest': '["identifier",""]',
119
            })
120
121
122
123
124
125
126
127
128
            return self._download_json(
                url, None, note=note, errnote=errnote,
                transform_source=lambda s: re.sub(r'^[^[]*', '', s),
                fatal=False,
                data=urlencode_postdata(data), headers={
                    'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
                    'Google-Accounts-XSRF': 1,
                })

Sergey M․'s avatar
Sergey M․ committed
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
        def warn(message):
            self._downloader.report_warning(message)

        lookup_req = [
            username,
            None, [], None, 'US', None, None, 2, False, True,
            [
                None, None,
                [2, 1, None, 1,
                 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
                 None, [], 4],
                1, [None, None, []], None, None, None, True
            ],
            username,
        ]

145
        lookup_results = req(
Sergey M․'s avatar
Sergey M․ committed
146
            self._LOOKUP_URL, lookup_req,
147
148
149
150
            'Looking up account info', 'Unable to look up account info')

        if lookup_results is False:
            return False
151

Sergey M․'s avatar
Sergey M․ committed
152
153
154
155
156
157
158
159
160
161
162
163
        user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
        if not user_hash:
            warn('Unable to extract user hash')
            return False

        challenge_req = [
            user_hash,
            None, 1, None, [1, None, None, None, [password, None, True]],
            [
                None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
                1, [None, None, []], None, None, None, True
            ]]
164

Sergey M․'s avatar
Sergey M․ committed
165
166
167
        challenge_results = req(
            self._CHALLENGE_URL, challenge_req,
            'Logging in', 'Unable to log in')
168

Sergey M․'s avatar
Sergey M․ committed
169
        if challenge_results is False:
170
            return
171

Sergey M․'s avatar
Sergey M․ committed
172
173
174
175
176
177
178
179
180
181
182
183
184
        login_res = try_get(challenge_results, lambda x: x[0][5], list)
        if login_res:
            login_msg = try_get(login_res, lambda x: x[5], compat_str)
            warn(
                'Unable to login: %s' % 'Invalid password'
                if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
            return False

        res = try_get(challenge_results, lambda x: x[0][-1], list)
        if not res:
            warn('Unable to extract result entry')
            return False

185
186
187
188
        login_challenge = try_get(res, lambda x: x[0][0], list)
        if login_challenge:
            challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
            if challenge_str == 'TWO_STEP_VERIFICATION':
Sergey M․'s avatar
Sergey M․ committed
189
190
                # SEND_SUCCESS - TFA code has been successfully sent to phone
                # QUOTA_EXCEEDED - reached the limit of TFA codes
191
                status = try_get(login_challenge, lambda x: x[5], compat_str)
Sergey M․'s avatar
Sergey M․ committed
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
                if status == 'QUOTA_EXCEEDED':
                    warn('Exceeded the limit of TFA codes, try later')
                    return False

                tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
                if not tl:
                    warn('Unable to extract TL')
                    return False

                tfa_code = self._get_tfa_info('2-step verification code')

                if not tfa_code:
                    warn(
                        'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
                        '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
                    return False

                tfa_code = remove_start(tfa_code, 'G-')

                tfa_req = [
                    user_hash, None, 2, None,
                    [
                        9, None, None, None, None, None, None, None,
                        [None, tfa_code, True, 2]
                    ]]

                tfa_results = req(
                    self._TFA_URL.format(tl), tfa_req,
                    'Submitting TFA code', 'Unable to submit TFA code')

                if tfa_results is False:
                    return False

                tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
                if tfa_res:
                    tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
                    warn(
                        'Unable to finish TFA: %s' % 'Invalid TFA code'
                        if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
                    return False

                check_cookie_url = try_get(
                    tfa_results, lambda x: x[0][-1][2], compat_str)
235
236
237
238
239
240
241
242
243
244
245
            else:
                CHALLENGES = {
                    'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
                    'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
                    'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
                }
                challenge = CHALLENGES.get(
                    challenge_str,
                    '%s returned error %s.' % (self.IE_NAME, challenge_str))
                warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
                return False
Sergey M․'s avatar
Sergey M․ committed
246
247
248
249
250
251
        else:
            check_cookie_url = try_get(res, lambda x: x[2], compat_str)

        if not check_cookie_url:
            warn('Unable to extract CheckCookie URL')
            return False
252
253

        check_cookie_results = self._download_webpage(
Sergey M․'s avatar
Sergey M․ committed
254
255
256
257
            check_cookie_url, None, 'Checking cookie', fatal=False)

        if check_cookie_results is False:
            return False
258

Sergey M․'s avatar
Sergey M․ committed
259
260
        if 'https://myaccount.google.com/' not in check_cookie_results:
            warn('Unable to log in')
261
            return False
262

263
264
        return True

265
266
    def _download_webpage_handle(self, *args, **kwargs):
        return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
267
268
            *args, **compat_kwargs(kwargs))

269
270
271
    def _real_initialize(self):
        if self._downloader is None:
            return
272
        self._set_consent()
273
274
        if not self._login():
            return
275

276

277
class YoutubeIE(YoutubeBaseInfoExtractor):
Philipp Hagemeister's avatar
Philipp Hagemeister committed
278
    IE_DESC = 'YouTube.com'
279
    _VALID_URL = r"""(?x)^
280
                     (
281
                         (?:https?://|//)                                    # http(s):// or protocol-independent URL
282
                         (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
283
                            (?:www\.)?deturl\.com/www\.youtube\.com/|
284
                            (?:www\.)?pwnyoutube\.com/|
285
                            (?:www\.)?hooktube\.com/|
286
                            (?:www\.)?yourepeat\.com/|
287
                            tube\.majestyc\.net/|
288
                            # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
289
                            (?:(?:www|dev)\.)?invidio\.us/|
290
                            (?:(?:www|no)\.)?invidiou\.sh/|
291
                            (?:(?:www|fi)\.)?invidious\.snopyta\.org/|
292
                            (?:www\.)?invidious\.kabi\.tk/|
293
                            (?:www\.)?invidious\.13ad\.de/|
294
                            (?:www\.)?invidious\.mastodon\.host/|
295
296
297
298
299
300
                            (?:www\.)?invidious\.zapashcanon\.fr/|
                            (?:www\.)?invidious\.kavin\.rocks/|
                            (?:www\.)?invidious\.tube/|
                            (?:www\.)?invidiou\.site/|
                            (?:www\.)?invidious\.site/|
                            (?:www\.)?invidious\.xyz/|
301
                            (?:www\.)?invidious\.nixnet\.xyz/|
302
                            (?:www\.)?invidious\.drycat\.fr/|
303
                            (?:www\.)?tube\.poal\.co/|
304
                            (?:www\.)?tube\.connect\.cafe/|
305
                            (?:www\.)?vid\.wxzm\.sx/|
306
                            (?:www\.)?vid\.mint\.lgbt/|
307
                            (?:www\.)?yewtu\.be/|
308
                            (?:www\.)?yt\.elukerio\.org/|
309
                            (?:www\.)?yt\.lelux\.fi/|
310
311
312
313
314
315
                            (?:www\.)?invidious\.ggc-project\.de/|
                            (?:www\.)?yt\.maisputain\.ovh/|
                            (?:www\.)?invidious\.13ad\.de/|
                            (?:www\.)?invidious\.toot\.koeln/|
                            (?:www\.)?invidious\.fdn\.fr/|
                            (?:www\.)?watch\.nettohikari\.com/|
316
317
318
319
320
321
                            (?:www\.)?kgg2m7yk5aybusll\.onion/|
                            (?:www\.)?qklhadlycap4cnod\.onion/|
                            (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
                            (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
                            (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
                            (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
322
                            (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
323
                            (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
324
                            youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains
325
326
                         (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
                         (?:                                                  # the various things that can precede the ID:
327
                             (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
328
                             |(?:                                             # or the v= param in all its forms
329
                                 (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
330
                                 (?:\?|\#!?)                                  # the params delimiter ? or # or #!
331
                                 (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
332
333
                                 v=
                             )
334
                         ))
335
336
                         |(?:
                            youtu\.be|                                        # just youtu.be/xxxx
337
338
339
340
                            (?:www\.)?youtube\.com/(?:
                                shorts|                                       # or youtube.com/shorts/xxx
                                video                                         # or youtube.com/video/xxx
                            )|
341
342
                            vid\.plus|                                        # or vid.plus/xxxx
                            zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
343
                         )/
344
                         |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
345
                         )
346
                     )?                                                       # all until now is optional -> you can pass the naked ID
347
                     ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
348
349
350
351
352
353
                     (?!.*?\blist=
                        (?:
                            %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
                            WL                                                # WL are handled by the watch later IE
                        )
                     )
354
                     (?(1).+)?                                                # if we found the ID, everything can follow
355
                     $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
356
    _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
357
358
359
360
    _PLAYER_INFO_RE = (
        r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
        r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
    )
361
    _formats = {
362
363
364
365
        # '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
        # '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
        # '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
        # '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
366
        '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
367
        # '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
368
369
        # '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
        # '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
370
        # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
371
372
373
374
375
376
377
378
379
        # '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
        # '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
        # '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
        # '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
        # '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
        # '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
        # '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
        # '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
        # '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
380
381
382


        # 3D videos
383
384
385
386
        '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
        '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
        '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
        '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
387
388
389
        '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
        '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
        '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
390

Allan Zhou's avatar
Allan Zhou committed
391
        # Apple HTTP Live Streaming
Yen Chi Hsuan's avatar
Yen Chi Hsuan committed
392
        '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
393
394
395
396
397
        '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
        '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
        '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
        '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
        '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
398
399
        '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
        '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
400
401

        # DASH mp4 video
402
403
404
405
406
        '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
        '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
        '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
        '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
        '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
407
        '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
408
409
410
411
412
413
        '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
        '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
        '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
        '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
        '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
        '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
414

415
        # Dash mp4 audio
416
417
418
419
420
421
422
        '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
        '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
        '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
        '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
        '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
        '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
        '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
423
424

        # Dash webm
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
        '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
        '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
        '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
        '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
        '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
        '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
        '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
        '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
        '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
        '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
        '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
        '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
        '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
        '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
        '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
440
        # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
441
442
443
444
445
446
        '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
        '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
        '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
        '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
        '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
        '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
447
448

        # Dash webm audio
449
450
        '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
        '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
451

452
        # Dash webm audio with opus inside
453
454
455
        '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
        '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
        '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
456

457
        # RTMP (unnamed)
458
        # '_rtmp': {'protocol': 'rtmp'},
459
460
461
462
463
464

        # av01 video only formats sometimes served with "unknown" codecs
        '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
        '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
        '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
        '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
465
    }
466
    _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
467

468
469
    _GEO_BYPASS = False

Philipp Hagemeister's avatar
Philipp Hagemeister committed
470
    IE_NAME = 'youtube'
471
472
    _TESTS = [
        {
473
            'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
Philipp Hagemeister's avatar
Philipp Hagemeister committed
474
475
476
            'info_dict': {
                'id': 'BaW_jenozKc',
                'ext': 'mp4',
477
                'title': 'youtube-dl test video "\'/\\ä↭𝕐',
Philipp Hagemeister's avatar
Philipp Hagemeister committed
478
479
                'uploader': 'Philipp Hagemeister',
                'uploader_id': 'phihag',
480
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
481
482
                'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
                'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
Philipp Hagemeister's avatar
Philipp Hagemeister committed
483
                'upload_date': '20121002',
484
                'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
Philipp Hagemeister's avatar
Philipp Hagemeister committed
485
                'categories': ['Science & Technology'],
486
                'tags': ['youtube-dl'],
487
                'duration': 10,
488
                'view_count': int,
489
490
                'like_count': int,
                'dislike_count': int,
491
                'start_time': 1,
492
                'end_time': 9,
493
            }
494
495
        },
        {
496
497
            'url': 'https://www.youtube.com/watch?v=MURua52_YPg',
            'note': 'cipher "encrypted" signature',
Philipp Hagemeister's avatar
Philipp Hagemeister committed
498
            'info_dict': {
499
                'id': 'MURua52_YPg',
Philipp Hagemeister's avatar
Philipp Hagemeister committed
500
                'ext': 'mp4',
501
502
503
504
505
506
507
508
509
510
                'upload_date': '20200418',
                'title': 'My Ordinary Life',
                'description': 'Provided to YouTube by Fandalism\n\nMy Ordinary Life · The Living Tombstone\n\nMy Ordinary Life\n\n℗ The Living Tombstone\n\nReleased on: 2017-11-23\n\nAuto-generated by YouTube.',
                'duration': 231,
                'uploader': 'The Living Tombstone - Topic',
                'uploader_id': 'UC5EH9egdct4dmAo3AHwzPBA',
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC5EH9egdct4dmAo3AHwzPBA',
                'creator': 'The Living Tombstone',
                'track': 'My Ordinary Life',
                'artist': 'The Living Tombstone',
511
            }
512
513
        },
        {
Philipp Hagemeister's avatar
Philipp Hagemeister committed
514
515
516
517
518
519
            'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
            'note': 'Test VEVO video with age protection (#956)',
            'info_dict': {
                'id': '07FYdnEawAQ',
                'ext': 'mp4',
                'upload_date': '20130703',
520
                'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
521
                'description': 'md5:fc2016d319b50b68c5541c3fd66ffd16',
522
                'duration': 419,
Philipp Hagemeister's avatar
Philipp Hagemeister committed
523
524
                'uploader': 'justintimberlakeVEVO',
                'uploader_id': 'justintimberlakeVEVO',
525
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
Sergey M․'s avatar
Sergey M․ committed
526
                'age_limit': 18,
527
528
            }
        },
529
        {
530
            'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
531
532
533
534
            'note': 'Use the first video ID in the URL',
            'info_dict': {
                'id': 'BaW_jenozKc',
                'ext': 'mp4',
535
                'title': 'youtube-dl test video "\'/\\ä↭𝕐',
536
537
                'uploader': 'Philipp Hagemeister',
                'uploader_id': 'phihag',
538
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
539
                'upload_date': '20121002',
540
                'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
541
                'categories': ['Science & Technology'],
542
                'tags': ['youtube-dl'],
543
                'duration': 10,
544
                'view_count': int,
545
546
                'like_count': int,
                'dislike_count': int,
547
548
549
550
            },
            'params': {
                'skip_download': True,
            },
551
        },
552
553
        # DASH manifest with encrypted signature
        {
Philipp Hagemeister's avatar
Philipp Hagemeister committed
554
555
556
557
            'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
            'info_dict': {
                'id': 'IB3lcPjvWLA',
                'ext': 'm4a',
558
559
                'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
                'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
560
                'duration': 244,
Philipp Hagemeister's avatar
Philipp Hagemeister committed
561
562
563
                'uploader': 'AfrojackVEVO',
                'uploader_id': 'AfrojackVEVO',
                'upload_date': '20131011',
564
            },
Philipp Hagemeister's avatar
Philipp Hagemeister committed
565
            'params': {
Philipp Hagemeister's avatar
Philipp Hagemeister committed
566
                'youtube_include_dash_manifest': True,
Sergey M․'s avatar
Sergey M․ committed
567
                'format': '141/bestaudio[ext=m4a]',
568
569
            },
        },
570
571
        # Normal age-gate video (No vevo, embed allowed)
        {
572
            'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
573
574
575
576
            'info_dict': {
                'id': 'HtVdAasjOgU',
                'ext': 'mp4',
                'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
577
                'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
578
                'duration': 142,
579
580
                'uploader': 'The Witcher',
                'uploader_id': 'WitcherGame',
581
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
582
                'upload_date': '20140605',
Sergey M․'s avatar
Sergey M․ committed
583
                'age_limit': 18,
584
585
            },
        },
586
587
        # Age-gate video with encrypted signature
        {
588
            'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
589
590
            'info_dict': {
                'id': '6kLq3WMV1nU',
591
                'ext': 'mp4',
592
593
                'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
                'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
Sergey M․'s avatar
Sergey M․ committed
594
                'duration': 246,
595
596
                'uploader': 'LloydVEVO',
                'uploader_id': 'LloydVEVO',
597
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
598
                'upload_date': '20110629',
Sergey M․'s avatar
Sergey M․ committed
599
                'age_limit': 18,
600
601
            },
        },
602
        # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
603
        # YouTube Red ad is not captured for creator
604
605
606
607
608
        {
            'url': '__2ABJjxzNo',
            'info_dict': {
                'id': '__2ABJjxzNo',
                'ext': 'mp4',
609
                'duration': 266,
610
611
                'upload_date': '20100430',
                'uploader_id': 'deadmau5',
612
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
613
                'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
614
615
616
617
618
619
                'uploader': 'deadmau5',
                'title': 'Deadmau5 - Some Chords (HD)',
            },
            'expected_warnings': [
                'DASH manifest missing',
            ]
620
        },
621
622
623
624
625
626
627
        # Non-square pixels
        {
            'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
            'info_dict': {
                'id': '_b-2C3KPAM0',
                'ext': 'mp4',
                'stretched_ratio': 16 / 9.,
628
                'duration': 85,
629
630
                'upload_date': '20110310',
                'uploader_id': 'AllenMeow',
631
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
632
                'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
Sergey M․'s avatar
Sergey M․ committed
633
                'uploader': '孫ᄋᄅ',
634
635
                'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
            },
Sergey M․'s avatar
Sergey M․ committed
636
        },
637
        {
638
            'url': 'https://vid.plus/FlRa-iH7PGw',
639
            'only_matching': True,
640
        },
641
        {
642
            'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
643
644
            'only_matching': True,
        },
645
        {
646
            # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
647
            # Also tests cut-off URL expansion in video description (see
648
649
            # https://github.com/ytdl-org/youtube-dl/issues/1892,
            # https://github.com/ytdl-org/youtube-dl/issues/8164)
650
651
652
653
654
655
            'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
            'info_dict': {
                'id': 'lsguqyKfVQg',
                'ext': 'mp4',
                'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
                'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
656
                'duration': 133,
657
658
                'upload_date': '20151119',
                'uploader_id': 'IronSoulElf',
659
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
660
                'uploader': 'IronSoulElf',
Sergey M․'s avatar
Sergey M․ committed
661
662
663
                'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
                'track': 'Dark Walk - Position Music',
                'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
664
                'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
665
666
667
668
669
            },
            'params': {
                'skip_download': True,
            },
        },
670
        {
671
            # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
672
673
674
            'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
            'only_matching': True,
        },
675
676
677
678
679
680
681
682
        {
            # Video licensed under Creative Commons
            'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
            'info_dict': {
                'id': 'M4gD1WSo5mA',
                'ext': 'mp4',
                'title': 'md5:e41008789470fc2533a3252216f1c1d1',
                'description': 'md5:a677553cf0840649b731a3024aeff4cc',
683
                'duration': 721,
684
685
                'upload_date': '20150127',
                'uploader_id': 'BerkmanCenter',
686
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
687
                'uploader': 'The Berkman Klein Center for Internet & Society',
688
689
690
691
692
693
                'license': 'Creative Commons Attribution license (reuse allowed)',
            },
            'params': {
                'skip_download': True,
            },
        },
694
695
696
697
698
699
700
701
        {
            # Channel-like uploader_url
            'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
            'info_dict': {
                'id': 'eQcmzGIKrzg',
                'ext': 'mp4',
                'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
                'description': 'md5:dda0d780d5a6e120758d1711d062a867',
702
                'duration': 4060,
703
                'upload_date': '20151119',
Sergey M․'s avatar
Sergey M․ committed
704
                'uploader': 'Bernie Sanders',
705
                'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
706
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
707
708
709
710
711
712
                'license': 'Creative Commons Attribution license (reuse allowed)',
            },
            'params': {
                'skip_download': True,
            },
        },
713
714
715
        {
            'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
            'only_matching': True,
716
717
        },
        {
718
            # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
719
720
            'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
            'only_matching': True,
721
        },
722
723
724
725
726
727
728
        {
            # YouTube Red video with episode data
            'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
            'info_dict': {
                'id': 'iqKdEhx-dD4',
                'ext': 'mp4',
                'title': 'Isolation - Mind Field (Ep 1)',
729
                'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
730
                'duration': 2085,
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
                'upload_date': '20170118',
                'uploader': 'Vsauce',
                'uploader_id': 'Vsauce',
                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
                'series': 'Mind Field',
                'season_number': 1,
                'episode_number': 1,
            },
            'params': {
                'skip_download': True,
            },
            'expected_warnings': [
                'Skipping DASH manifest',
            ],
        },
746
747
748
749
        {
            'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
            'only_matching': True,
        },
750
751
752
753
        {
            'url': 'https://invidio.us/watch?v=BaW_jenozKc',
            'only_matching': True,
        },
754
        {
755
            # Youtube Music Auto-generated description
756
757
758
759
760
761
762
            'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
            'info_dict': {
                'id': 'MgNrAu2pzNs',
                'ext': 'mp4',
                'title': 'Voyeur Girl',
                'description': 'md5:7ae382a65843d6df2685993e90a8628f',
                'upload_date': '20190312',
Sergey M․'s avatar
Sergey M․ committed
763
764
                'uploader': 'Stephen - Topic',
                'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
765
766
767
768
769
770
771
772
773
774
775
                'artist': 'Stephen',
                'track': 'Voyeur Girl',
                'album': 'it\'s too much love to know my dear',
                'release_date': '20190313',
                'release_year': 2019,
            },
            'params': {
                'skip_download': True,
            },
        },
        {
776
            # Youtube Music Auto-generated description
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
            # Retrieve 'artist' field from 'Artist:' in video description
            # when it is present on youtube music video
            'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
            'info_dict': {
                'id': 'k0jLE7tTwjY',
                'ext': 'mp4',
                'title': 'Latch Feat. Sam Smith',
                'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
                'upload_date': '20150110',
                'uploader': 'Various Artists - Topic',
                'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
                'artist': 'Disclosure',
                'track': 'Latch Feat. Sam Smith',
                'album': 'Latch Featuring Sam Smith',
                'release_date': '20121008',
                'release_year': 2012,
            },
            'params': {
                'skip_download': True,
            },
        },
        {
799
            # Youtube Music Auto-generated description
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
            # handle multiple artists on youtube music video
            'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
            'info_dict': {
                'id': '74qn0eJSjpA',
                'ext': 'mp4',
                'title': 'Eastside',
                'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
                'upload_date': '20180710',
                'uploader': 'Benny Blanco - Topic',
                'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
                'artist': 'benny blanco, Halsey, Khalid',
                'track': 'Eastside',
                'album': 'Eastside',
                'release_date': '20180713',
                'release_year': 2018,
            },
            'params': {
                'skip_download': True,
            },
        },
        {
821
            # Youtube Music Auto-generated description
822
823
824
825
826
827
            # handle youtube music video with release_year and no release_date
            'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
            'info_dict': {
                'id': '-hcAI0g-f5M',
                'ext': 'mp4',
                'title': 'Put It On Me',
Sergey M․'s avatar
Sergey M․ committed
828
                'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
829
830
831
832
833
834
835
836
837
838
839
840
841
                'upload_date': '20180426',
                'uploader': 'Matt Maeson - Topic',
                'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
                'artist': 'Matt Maeson',
                'track': 'Put It On Me',
                'album': 'The Hearse',
                'release_date': None,
                'release_year': 2018,
            },
            'params': {
                'skip_download': True,
            },
        },
842
        {
843
            'url': 'https://www.youtubekids.com/watch?v=BnC-cpUCdns',
844
845
            'only_matching': True,
        },
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
        {
            # empty description results in an empty string
            'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
            'info_dict': {
                'id': 'x41yOUIvK2k',
                'ext': 'mp4',
                'title': 'IMG 3456',
                'description': '',
                'upload_date': '20170613',
                'uploader_id': 'ElevageOrVert',
                'uploader': 'ElevageOrVert',
            },
            'params': {
                'skip_download': True,
            },
        },
862
863
864
865
        {
            'url': 'https://youtube.com/shorts/7awd-y_DTQY',
            'only_matching': True,
        },
866
867
868
869
        {
            'url': 'https://www.youtube.com/video/2NDLF-k2PwA',
            'only_matching': True,
        }
870
871
    ]

872
873
    _VALID_SIG_VALUE_RE = r'^AO[a-zA-Z0-9_-]+=*$'

874
875
    def __init__(self, *args, **kwargs):
        super(YoutubeIE, self).__init__(*args, **kwargs)
876
        self._player_cache = {}
877

878
879
    def report_video_info_webpage_download(self, video_id):
        """Report attempt to download video info webpage."""
880
        self.to_screen('%s: Downloading video info webpage' % video_id)
881
882
883

    def report_information_extraction(self, video_id):
        """Report attempt to extract video information."""
884
        self.to_screen('%s: Extracting video information' % video_id)
885
886
887

    def report_unavailable_format(self, video_id, format):
        """Report extracted video URL."""
888
        self.to_screen('%s: Format %s not available' % (video_id, format))
889

890
891
    def _signature_cache_id(self, example_sig):
        """ Return a string representation of a signature """
Philipp Hagemeister's avatar
Philipp Hagemeister committed
892
        return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
893

894
895
896
897
898
899
900
    @classmethod
    def _extract_player_info(cls, player_url):
        for player_re in cls._PLAYER_INFO_RE:
            id_m = re.search(player_re, player_url)
            if id_m:
                break
        else:
901
            raise ExtractorError('Cannot identify player %r' % player_url)
902
        return id_m.group('id')
903
904

    def _extract_signature_function(self, video_id, player_url, example_sig):
905
        player_id = self._extract_player_info(player_url)
906

907
        # Read from filesystem cache
908
        cache_spec = self._downloader.cache.load('youtube-sigfuncs', player_id)
909
        if cache_spec is not None:
910
            return cache_spec
911

912
913
        if not player_url.startswith('http'):
            player_url = 'https://www.youtube.com' + player_url
914
915
916
        download_note = (
            'Downloading player %s' % player_url
            if self._downloader.params.get('verbose') else
917
            'Downloading js player %s' % player_id
918
        )
919
920
921
922
923
        code = self._download_webpage(
            player_url, video_id,
            note=download_note,
            errnote='Download of js player %s failed' % player_url)
        res = self._parse_sig_js(code)
Dominika Liberda's avatar
Dominika Liberda committed
924

925
        self._downloader.cache.store('youtube-sigfuncs', player_id, res)
926
927
        return res

928
    def _parse_sig_js(self, js_player):
929
        shit_parser = re.search(r'[a-z]\=a\.split\((?:""|\'\')\);(([a-zA-Z_][a-zA-Z\d_]+).*);return a\.join', js_player)
930
931
932
        if not shit_parser:
            raise ExtractorError('Signature decryption code not found')
        func, obfuscated_name = shit_parser.group(1, 2)
933
        obfuscated_func = re.search(r'%s\s*=\s*{([\s\w(){}[\].,:;=%s"\']*?})};' % (re.escape(obfuscated_name), '%'),
934
935
936
937
938
                                    js_player)
        if not obfuscated_func:
            raise ExtractorError('Signature decrypting deobfuscated functions not found')
        obfuscated_stack = obfuscated_func.group(1)
        obf_map = {}
939
940
        for obffun in re.finditer(r'(?P<kp>["\'`]?)([a-zA-Z_][a-zA-Z\d_]+)(?P=kp):function\(a(?:,b)?\){(.*?)}', obfuscated_stack):
            obfname, obfval = obffun.group(2, 3)
941
            if 'splice' in obfval:
942
                obf_map[obfname] = 'splice'
943
            elif 'reverse' in obfval:
944
                obf_map[obfname] = 'reverse'
945
            elif 'var' in obfval and 'length' in obfval:
946
                obf_map[obfname] = 'mess'