[youtube] cleanup, speed up age-gated extraction, fix videos with js-like syntax
parent
9373a2f667
commit
ed273bfbf2
|
@ -28,7 +28,6 @@ from ..utils import (
|
|||
float_or_none,
|
||||
get_element_by_id,
|
||||
int_or_none,
|
||||
list_geoblocked_countres,
|
||||
mimetype2ext,
|
||||
parse_codecs,
|
||||
parse_duration,
|
||||
|
@ -44,6 +43,7 @@ from ..utils import (
|
|||
uppercase_escape,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
GeoRestrictedError,
|
||||
)
|
||||
|
||||
|
||||
|
@ -567,24 +567,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'format': '141/bestaudio[ext=m4a]',
|
||||
},
|
||||
},
|
||||
# JS player signature function name containing $
|
||||
{
|
||||
'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
|
||||
'info_dict': {
|
||||
'id': 'nfWlot6h_JM',
|
||||
'ext': 'm4a',
|
||||
'title': 'Taylor Swift - Shake It Off',
|
||||
'description': 'md5:9dc0bd58efe700594b54f7d82bed0bac',
|
||||
'duration': 242,
|
||||
'uploader': 'TaylorSwiftVEVO',
|
||||
'uploader_id': 'TaylorSwiftVEVO',
|
||||
'upload_date': '20140818',
|
||||
},
|
||||
'params': {
|
||||
'youtube_include_dash_manifest': True,
|
||||
'format': '141/bestaudio[ext=m4a]',
|
||||
},
|
||||
},
|
||||
# Normal age-gate video (No vevo, embed allowed)
|
||||
{
|
||||
'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
|
||||
|
@ -636,24 +618,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'DASH manifest missing',
|
||||
]
|
||||
},
|
||||
# Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
|
||||
{
|
||||
'url': 'lqQg6PlCWgI',
|
||||
'info_dict': {
|
||||
'id': 'lqQg6PlCWgI',
|
||||
'ext': 'mp4',
|
||||
'duration': 6085,
|
||||
'upload_date': '20150827',
|
||||
'uploader_id': 'olympic',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
|
||||
'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
|
||||
'uploader': 'Olympic',
|
||||
'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'requires avconv',
|
||||
}
|
||||
},
|
||||
# Non-square pixels
|
||||
{
|
||||
'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
|
||||
|
@ -879,26 +843,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'url': 'https://www.youtubekids.com/watch?v=BnC-cpUCdns',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
# invalid -> valid video id redirection
|
||||
'url': 'DJztXj2GPfl',
|
||||
'info_dict': {
|
||||
'id': 'DJztXj2GPfk',
|
||||
'ext': 'mp4',
|
||||
'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
|
||||
'description': 'md5:bf577a41da97918e94fa9798d9228825',
|
||||
'upload_date': '20090125',
|
||||
'uploader': 'Prochorowka',
|
||||
'uploader_id': 'Prochorowka',
|
||||
'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
|
||||
'artist': 'Panjabi MC',
|
||||
'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
|
||||
'album': 'Beware of the Boys (Mundian To Bach Ke)',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
},
|
||||
{
|
||||
# empty description results in an empty string
|
||||
'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
|
||||
|
@ -919,6 +863,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
'url': 'https://youtube.com/shorts/7awd-y_DTQY',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.youtube.com/video/2NDLF-k2PwA',
|
||||
'only_matching': True,
|
||||
}
|
||||
]
|
||||
|
||||
_VALID_SIG_VALUE_RE = r'^AO[a-zA-Z0-9_-]+=*$'
|
||||
|
@ -1132,7 +1080,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
# to be implemented in future that will replace this workaround (see
|
||||
# https://github.com/ytdl-org/youtube-dl/issues/7468,
|
||||
# https://github.com/ytdl-org/youtube-dl/pull/7599)
|
||||
r';ytplayer\.config\s*=\s*({.+?});ytplayer',
|
||||
r';ytplayer\.config\s*=\s*({.+?});\s*ytplayer',
|
||||
r';ytplayer\.config\s*=\s*({.+?});',
|
||||
)
|
||||
config = self._search_regex(
|
||||
|
@ -1473,14 +1421,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
|
||||
# Get video info
|
||||
video_info = {}
|
||||
embed_webpage = None
|
||||
if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
|
||||
or re.search(r'player-age-gate-content">', video_webpage) is not None):
|
||||
age_gate = True
|
||||
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
||||
# this can be viewed without login into Youtube
|
||||
url = proto + '://www.youtube.com/embed/%s' % video_id
|
||||
embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
|
||||
data = compat_urllib_parse_urlencode({
|
||||
'video_id': video_id,
|
||||
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
||||
|
@ -1490,8 +1435,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
try:
|
||||
video_info_webpage = self._download_webpage(
|
||||
video_info_url, video_id,
|
||||
note='Refetching age-gated info webpage',
|
||||
errnote='unable to download video info webpage')
|
||||
note='Downloading age-gated video info',
|
||||
errnote='unable to download video info')
|
||||
except ExtractorError:
|
||||
video_info_webpage = None
|
||||
if video_info_webpage:
|
||||
|
@ -1522,9 +1467,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
player_response = extract_player_response(args.get('player_response'), video_id)
|
||||
if not player_response:
|
||||
player_response = extract_player_response(
|
||||
self._search_regex(
|
||||
self._search_regex((
|
||||
# js-like syntax
|
||||
r'(?:window(?:\["|\.)|var )ytInitialPlayerResponse(?:"])?\s*=\s*({.+?(?!\\)});(?:if \(ytcsi|var [a-zA-Z\_])',
|
||||
r'(?:window(?:\["|\.)|var )ytInitialPlayerResponse(?:"])?\s*=\s*({.+?(?!\\)});',
|
||||
video_webpage, 'ytInitialPlayerResponse', fatal=False), video_id)
|
||||
), video_webpage, 'ytInitialPlayerResponse', fatal=False), video_id)
|
||||
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
|
||||
add_dash_mpd_pr(player_response)
|
||||
|
||||
|
@ -1722,24 +1669,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
ASSETS_RE = r'"jsUrl":"(/s/player/.*?/player_ias.vflset/.*?/base.js)'
|
||||
|
||||
player_url = self._search_regex(
|
||||
ASSETS_RE,
|
||||
embed_webpage if age_gate else video_webpage, '', default=player_url)
|
||||
ASSETS_RE, video_webpage, '', default=player_url)
|
||||
|
||||
if not player_url and not age_gate:
|
||||
# We need the embed website after all
|
||||
if embed_webpage is None:
|
||||
embed_url = proto + '://www.youtube.com/embed/%s' % video_id
|
||||
embed_webpage = self._download_webpage(
|
||||
embed_url, video_id, 'Downloading embed webpage')
|
||||
embed_url = proto + '://www.youtube.com/embed/%s' % video_id
|
||||
embed_webpage = self._download_webpage(
|
||||
embed_url, video_id, 'Downloading embed webpage')
|
||||
player_url = self._search_regex(
|
||||
ASSETS_RE, embed_webpage, 'JS player URL')
|
||||
|
||||
# if player_url is None:
|
||||
# player_url_json = self._search_regex(
|
||||
# r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
|
||||
# video_webpage, 'age gate player URL')
|
||||
# player_url = json.loads(player_url_json)
|
||||
|
||||
if 'sig' in url_data:
|
||||
url += '&signature=' + url_data['sig'][0]
|
||||
elif 's' in url_data:
|
||||
|
@ -1871,11 +1810,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
or ', who has blocked it on copyright grounds' in error_desc
|
||||
or 'It is not available in your country.' in error_desc
|
||||
or ', who has blocked it in your country on copyright grounds.' in error_desc):
|
||||
raise ExtractorError(
|
||||
list_geoblocked_countres(
|
||||
self._search_regex(
|
||||
r'<meta itemprop="regionsAllowed" content="((?:(?:[A-Z]{2},)*[A-Z]{2})?)">',
|
||||
video_webpage, 'allowed region list').split(',')),
|
||||
raise GeoRestrictedError(
|
||||
error_desc,
|
||||
countries=self._search_regex(
|
||||
r'<meta itemprop="regionsAllowed" content="((?:(?:[A-Z]{2},)*[A-Z]{2})?)">',
|
||||
video_webpage, 'allowed region list').split(','),
|
||||
expected=True)
|
||||
if error_desc and 'Playback on other websites has been disabled' in error_desc:
|
||||
raise ExtractorError(
|
||||
|
|
|
@ -5775,284 +5775,3 @@ def clean_podcast_url(url):
|
|||
st\.fm # https://podsights.com/docs/
|
||||
)/e
|
||||
)/''', '', url)
|
||||
|
||||
|
||||
# http://country.io/names.json
|
||||
country_list = {
|
||||
"BD": "Bangladesh",
|
||||
"BE": "Belgium",
|
||||
"BF": "Burkina Faso",
|
||||
"BG": "Bulgaria",
|
||||
"BA": "Bosnia and Herzegovina",
|
||||
"BB": "Barbados",
|
||||
"WF": "Wallis and Futuna",
|
||||
"BL": "Saint Barthelemy",
|
||||
"BM": "Bermuda",
|
||||
"BN": "Brunei",
|
||||
"BO": "Bolivia",
|
||||
"BH": "Bahrain",
|
||||
"BI": "Burundi",
|
||||
"BJ": "Benin",
|
||||
"BT": "Bhutan",
|
||||
"JM": "Jamaica",
|
||||
"BV": "Bouvet Island",
|
||||
"BW": "Botswana",
|
||||
"WS": "Samoa",
|
||||
"BQ": "Bonaire, Saint Eustatius and Saba ",
|
||||
"BR": "Brazil",
|
||||
"BS": "Bahamas",
|
||||
"JE": "Jersey",
|
||||
"BY": "Belarus",
|
||||
"BZ": "Belize",
|
||||
"RU": "Russia",
|
||||
"RW": "Rwanda",
|
||||
"RS": "Serbia",
|
||||
"TL": "East Timor",
|
||||
"RE": "Reunion",
|
||||
"TM": "Turkmenistan",
|
||||
"TJ": "Tajikistan",
|
||||
"RO": "Romania",
|
||||
"TK": "Tokelau",
|
||||
"GW": "Guinea-Bissau",
|
||||
"GU": "Guam",
|
||||
"GT": "Guatemala",
|
||||
"GS": "South Georgia and the South Sandwich Islands",
|
||||
"GR": "Greece",
|
||||
"GQ": "Equatorial Guinea",
|
||||
"GP": "Guadeloupe",
|
||||
"JP": "Japan",
|
||||
"GY": "Guyana",
|
||||
"GG": "Guernsey",
|
||||
"GF": "French Guiana",
|
||||
"GE": "Georgia",
|
||||
"GD": "Grenada",
|
||||
"GB": "United Kingdom",
|
||||
"GA": "Gabon",
|
||||
"SV": "El Salvador",
|
||||
"GN": "Guinea",
|
||||
"GM": "Gambia",
|
||||
"GL": "Greenland",
|
||||
"GI": "Gibraltar",
|
||||
"GH": "Ghana",
|
||||
"OM": "Oman",
|
||||
"TN": "Tunisia",
|
||||
"JO": "Jordan",
|
||||
"HR": "Croatia",
|
||||
"HT": "Haiti",
|
||||
"HU": "Hungary",
|
||||
"HK": "Hong Kong",
|
||||
"HN": "Honduras",
|
||||
"HM": "Heard Island and McDonald Islands",
|
||||
"VE": "Venezuela",
|
||||
"PR": "Puerto Rico",
|
||||
"PS": "Palestinian Territory",
|
||||
"PW": "Palau",
|
||||
"PT": "Portugal",
|
||||
"SJ": "Svalbard and Jan Mayen",
|
||||
"PY": "Paraguay",
|
||||
"IQ": "Iraq",
|
||||
"PA": "Panama",
|
||||
"PF": "French Polynesia",
|
||||
"PG": "Papua New Guinea",
|
||||
"PE": "Peru",
|
||||
"PK": "Pakistan",
|
||||
"PH": "Philippines",
|
||||
"PN": "Pitcairn",
|
||||
"PL": "Poland",
|
||||
"PM": "Saint Pierre and Miquelon",
|
||||
"ZM": "Zambia",
|
||||
"EH": "Western Sahara",
|
||||
"EE": "Estonia",
|
||||
"EG": "Egypt",
|
||||
"ZA": "South Africa",
|
||||
"EC": "Ecuador",
|
||||
"IT": "Italy",
|
||||
"VN": "Vietnam",
|
||||
"SB": "Solomon Islands",
|
||||
"ET": "Ethiopia",
|
||||
"SO": "Somalia",
|
||||
"ZW": "Zimbabwe",
|
||||
"SA": "Saudi Arabia",
|
||||
"ES": "Spain",
|
||||
"ER": "Eritrea",
|
||||
"ME": "Montenegro",
|
||||
"MD": "Moldova",
|
||||
"MG": "Madagascar",
|
||||
"MF": "Saint Martin",
|
||||
"MA": "Morocco",
|
||||
"MC": "Monaco",
|
||||
"UZ": "Uzbekistan",
|
||||
"MM": "Myanmar",
|
||||
"ML": "Mali",
|
||||
"MO": "Macao",
|
||||
"MN": "Mongolia",
|
||||
"MH": "Marshall Islands",
|
||||
"MK": "Macedonia",
|
||||
"MU": "Mauritius",
|
||||
"MT": "Malta",
|
||||
"MW": "Malawi",
|
||||
"MV": "Maldives",
|
||||
"MQ": "Martinique",
|
||||
"MP": "Northern Mariana Islands",
|
||||
"MS": "Montserrat",
|
||||
"MR": "Mauritania",
|
||||
"IM": "Isle of Man",
|
||||
"UG": "Uganda",
|
||||
"TZ": "Tanzania",
|
||||
"MY": "Malaysia",
|
||||
"MX": "Mexico",
|
||||
"IL": "Israel",
|
||||
"FR": "France",
|
||||
"IO": "British Indian Ocean Territory",
|
||||
"SH": "Saint Helena",
|
||||
"FI": "Finland",
|
||||
"FJ": "Fiji",
|
||||
"FK": "Falkland Islands",
|
||||
"FM": "Micronesia",
|
||||
"FO": "Faroe Islands",
|
||||
"NI": "Nicaragua",
|
||||
"NL": "Netherlands",
|
||||
"NO": "Norway",
|
||||
"NA": "Namibia",
|
||||
"VU": "Vanuatu",
|
||||
"NC": "New Caledonia",
|
||||
"NE": "Niger",
|
||||
"NF": "Norfolk Island",
|
||||
"NG": "Nigeria",
|
||||
"NZ": "New Zealand",
|
||||
"NP": "Nepal",
|
||||
"NR": "Nauru",
|
||||
"NU": "Niue",
|
||||
"CK": "Cook Islands",
|
||||
"XK": "Kosovo",
|
||||
"CI": "Ivory Coast",
|
||||
"CH": "Switzerland",
|
||||
"CO": "Colombia",
|
||||
"CN": "China",
|
||||
"CM": "Cameroon",
|
||||
"CL": "Chile",
|
||||
"CC": "Cocos Islands",
|
||||
"CA": "Canada",
|
||||
"CG": "Republic of the Congo",
|
||||
"CF": "Central African Republic",
|
||||
"CD": "Democratic Republic of the Congo",
|
||||
"CZ": "Czech Republic",
|
||||
"CY": "Cyprus",
|
||||
"CX": "Christmas Island",
|
||||
"CR": "Costa Rica",
|
||||
"CW": "Curacao",
|
||||
"CV": "Cape Verde",
|
||||
"CU": "Cuba",
|
||||
"SZ": "Swaziland",
|
||||
"SY": "Syria",
|
||||
"SX": "Sint Maarten",
|
||||
"KG": "Kyrgyzstan",
|
||||
"KE": "Kenya",
|
||||
"SS": "South Sudan",
|
||||
"SR": "Suriname",
|
||||
"KI": "Kiribati",
|
||||
"KH": "Cambodia",
|
||||
"KN": "Saint Kitts and Nevis",
|
||||
"KM": "Comoros",
|
||||
"ST": "Sao Tome and Principe",
|
||||
"SK": "Slovakia",
|
||||
"KR": "South Korea",
|
||||
"SI": "Slovenia",
|
||||
"KP": "North Korea",
|
||||
"KW": "Kuwait",
|
||||
"SN": "Senegal",
|
||||
"SM": "San Marino",
|
||||
"SL": "Sierra Leone",
|
||||
"SC": "Seychelles",
|
||||
"KZ": "Kazakhstan",
|
||||
"KY": "Cayman Islands",
|
||||
"SG": "Singapore",
|
||||
"SE": "Sweden",
|
||||
"SD": "Sudan",
|
||||
"DO": "Dominican Republic",
|
||||
"DM": "Dominica",
|
||||
"DJ": "Djibouti",
|
||||
"DK": "Denmark",
|
||||
"VG": "British Virgin Islands",
|
||||
"DE": "Germany",
|
||||
"YE": "Yemen",
|
||||
"DZ": "Algeria",
|
||||
"US": "United States",
|
||||
"UY": "Uruguay",
|
||||
"YT": "Mayotte",
|
||||
"UM": "United States Minor Outlying Islands",
|
||||
"LB": "Lebanon",
|
||||
"LC": "Saint Lucia",
|
||||
"LA": "Laos",
|
||||
"TV": "Tuvalu",
|
||||
"TW": "Taiwan",
|
||||
"TT": "Trinidad and Tobago",
|
||||
"TR": "Turkey",
|
||||
"LK": "Sri Lanka",
|
||||
"LI": "Liechtenstein",
|
||||
"LV": "Latvia",
|
||||
"TO": "Tonga",
|
||||
"LT": "Lithuania",
|
||||
"LU": "Luxembourg",
|
||||
"LR": "Liberia",
|
||||
"LS": "Lesotho",
|
||||
"TH": "Thailand",
|
||||
"TF": "French Southern Territories",
|
||||
"TG": "Togo",
|
||||
"TD": "Chad",
|
||||
"TC": "Turks and Caicos Islands",
|
||||
"LY": "Libya",
|
||||
"VA": "Vatican",
|
||||
"VC": "Saint Vincent and the Grenadines",
|
||||
"AE": "United Arab Emirates",
|
||||
"AD": "Andorra",
|
||||
"AG": "Antigua and Barbuda",
|
||||
"AF": "Afghanistan",
|
||||
"AI": "Anguilla",
|
||||
"VI": "U.S. Virgin Islands",
|
||||
"IS": "Iceland",
|
||||
"IR": "Iran",
|
||||
"AM": "Armenia",
|
||||
"AL": "Albania",
|
||||
"AO": "Angola",
|
||||
"AQ": "Antarctica",
|
||||
"AS": "American Samoa",
|
||||
"AR": "Argentina",
|
||||
"AU": "Australia",
|
||||
"AT": "Austria",
|
||||
"AW": "Aruba",
|
||||
"IN": "India",
|
||||
"AX": "Aland Islands",
|
||||
"AZ": "Azerbaijan",
|
||||
"IE": "Ireland",
|
||||
"ID": "Indonesia",
|
||||
"UA": "Ukraine",
|
||||
"QA": "Qatar",
|
||||
"MZ": "Mozambique"
|
||||
}
|
||||
|
||||
|
||||
def list_countries():
|
||||
return country_list.keys()
|
||||
|
||||
|
||||
def list_geoblocked_countres(allowed_countries, reverse=False):
|
||||
geoblocked = []
|
||||
geounlocked = []
|
||||
for country in list_countries():
|
||||
if country in allowed_countries or (country not in allowed_countries and reverse is True):
|
||||
geounlocked.append(country)
|
||||
elif country not in allowed_countries or (country in allowed_countries and reverse is True):
|
||||
geoblocked.append(country)
|
||||
if len(geounlocked) == 0:
|
||||
return 'This video is blocked in all countries'
|
||||
if len(geoblocked) <= 10:
|
||||
return 'This video is blocked in these countries: %s' % ', '.join(sorted(country_list[cnt] for cnt in geoblocked))
|
||||
if len(geounlocked) <= 10:
|
||||
geoblocked.sort()
|
||||
return 'This video is only available in these countries: %s' % ', '.join(country_list[cnt] for cnt in geounlocked)
|
||||
if len(geoblocked) >= len(geounlocked):
|
||||
geounlocked.sort()
|
||||
return 'This video is only available in these countries: %s' % ', '.join(geounlocked)
|
||||
geoblocked.sort()
|
||||
return 'This video is blocked in these countries: %s' % ', '.join(geoblocked)
|
||||
|
|
Loading…
Reference in New Issue