diff --git a/haruhi_dl/extractor/youtube.py b/haruhi_dl/extractor/youtube.py index 7939a3211..d247e2eea 100644 --- a/haruhi_dl/extractor/youtube.py +++ b/haruhi_dl/extractor/youtube.py @@ -28,7 +28,6 @@ from ..utils import ( float_or_none, get_element_by_id, int_or_none, - list_geoblocked_countres, mimetype2ext, parse_codecs, parse_duration, @@ -44,6 +43,7 @@ from ..utils import ( uppercase_escape, url_or_none, urlencode_postdata, + GeoRestrictedError, ) @@ -567,24 +567,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'format': '141/bestaudio[ext=m4a]', }, }, - # JS player signature function name containing $ - { - 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM', - 'info_dict': { - 'id': 'nfWlot6h_JM', - 'ext': 'm4a', - 'title': 'Taylor Swift - Shake It Off', - 'description': 'md5:9dc0bd58efe700594b54f7d82bed0bac', - 'duration': 242, - 'uploader': 'TaylorSwiftVEVO', - 'uploader_id': 'TaylorSwiftVEVO', - 'upload_date': '20140818', - }, - 'params': { - 'youtube_include_dash_manifest': True, - 'format': '141/bestaudio[ext=m4a]', - }, - }, # Normal age-gate video (No vevo, embed allowed) { 'url': 'https://youtube.com/watch?v=HtVdAasjOgU', @@ -636,24 +618,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'DASH manifest missing', ] }, - # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431) - { - 'url': 'lqQg6PlCWgI', - 'info_dict': { - 'id': 'lqQg6PlCWgI', - 'ext': 'mp4', - 'duration': 6085, - 'upload_date': '20150827', - 'uploader_id': 'olympic', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic', - 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', - 'uploader': 'Olympic', - 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games', - }, - 'params': { - 'skip_download': 'requires avconv', - } - }, # Non-square pixels { 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0', @@ -879,26 +843,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': 'https://www.youtubekids.com/watch?v=BnC-cpUCdns', 'only_matching': True, }, - { - # invalid -> valid video id redirection - 'url': 'DJztXj2GPfl', - 'info_dict': { - 'id': 'DJztXj2GPfk', - 'ext': 'mp4', - 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)', - 'description': 'md5:bf577a41da97918e94fa9798d9228825', - 'upload_date': '20090125', - 'uploader': 'Prochorowka', - 'uploader_id': 'Prochorowka', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka', - 'artist': 'Panjabi MC', - 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix', - 'album': 'Beware of the Boys (Mundian To Bach Ke)', - }, - 'params': { - 'skip_download': True, - }, - }, { # empty description results in an empty string 'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k', @@ -919,6 +863,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': 'https://youtube.com/shorts/7awd-y_DTQY', 'only_matching': True, }, + { + 'url': 'https://www.youtube.com/video/2NDLF-k2PwA', + 'only_matching': True, + } ] _VALID_SIG_VALUE_RE = r'^AO[a-zA-Z0-9_-]+=*$' @@ -1132,7 +1080,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # to be implemented in future that will replace this workaround (see # https://github.com/ytdl-org/youtube-dl/issues/7468, # https://github.com/ytdl-org/youtube-dl/pull/7599) - r';ytplayer\.config\s*=\s*({.+?});ytplayer', + r';ytplayer\.config\s*=\s*({.+?});\s*ytplayer', r';ytplayer\.config\s*=\s*({.+?});', ) config = self._search_regex( @@ -1473,14 +1421,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Get video info video_info = {} - embed_webpage = None if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+' or re.search(r'player-age-gate-content">', video_webpage) is not None): age_gate = True # We simulate the access to the video from www.youtube.com/v/{video_id} # this can be viewed without login into Youtube - url = proto + '://www.youtube.com/embed/%s' % video_id - embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage') data = compat_urllib_parse_urlencode({ 'video_id': video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id, @@ -1490,8 +1435,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): try: video_info_webpage = self._download_webpage( video_info_url, video_id, - note='Refetching age-gated info webpage', - errnote='unable to download video info webpage') + note='Downloading age-gated video info', + errnote='unable to download video info') except ExtractorError: video_info_webpage = None if video_info_webpage: @@ -1522,9 +1467,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): player_response = extract_player_response(args.get('player_response'), video_id) if not player_response: player_response = extract_player_response( - self._search_regex( + self._search_regex(( + # js-like syntax + r'(?:window(?:\["|\.)|var )ytInitialPlayerResponse(?:"])?\s*=\s*({.+?(?!\\)});(?:if \(ytcsi|var [a-zA-Z\_])', r'(?:window(?:\["|\.)|var )ytInitialPlayerResponse(?:"])?\s*=\s*({.+?(?!\\)});', - video_webpage, 'ytInitialPlayerResponse', fatal=False), video_id) + ), video_webpage, 'ytInitialPlayerResponse', fatal=False), video_id) if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): add_dash_mpd_pr(player_response) @@ -1722,24 +1669,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ASSETS_RE = r'"jsUrl":"(/s/player/.*?/player_ias.vflset/.*?/base.js)' player_url = self._search_regex( - ASSETS_RE, - embed_webpage if age_gate else video_webpage, '', default=player_url) + ASSETS_RE, video_webpage, '', default=player_url) if not player_url and not age_gate: # We need the embed website after all - if embed_webpage is None: - embed_url = proto + '://www.youtube.com/embed/%s' % video_id - embed_webpage = self._download_webpage( - embed_url, video_id, 'Downloading embed webpage') + embed_url = proto + '://www.youtube.com/embed/%s' % video_id + embed_webpage = self._download_webpage( + embed_url, video_id, 'Downloading embed webpage') player_url = self._search_regex( ASSETS_RE, embed_webpage, 'JS player URL') - # if player_url is None: - # player_url_json = self._search_regex( - # r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")', - # video_webpage, 'age gate player URL') - # player_url = json.loads(player_url_json) - if 'sig' in url_data: url += '&signature=' + url_data['sig'][0] elif 's' in url_data: @@ -1871,11 +1810,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): or ', who has blocked it on copyright grounds' in error_desc or 'It is not available in your country.' in error_desc or ', who has blocked it in your country on copyright grounds.' in error_desc): - raise ExtractorError( - list_geoblocked_countres( - self._search_regex( - r'', - video_webpage, 'allowed region list').split(',')), + raise GeoRestrictedError( + error_desc, + countries=self._search_regex( + r'', + video_webpage, 'allowed region list').split(','), expected=True) if error_desc and 'Playback on other websites has been disabled' in error_desc: raise ExtractorError( diff --git a/haruhi_dl/utils.py b/haruhi_dl/utils.py index 06b216898..c44ea78a6 100644 --- a/haruhi_dl/utils.py +++ b/haruhi_dl/utils.py @@ -5775,284 +5775,3 @@ def clean_podcast_url(url): st\.fm # https://podsights.com/docs/ )/e )/''', '', url) - - -# http://country.io/names.json -country_list = { - "BD": "Bangladesh", - "BE": "Belgium", - "BF": "Burkina Faso", - "BG": "Bulgaria", - "BA": "Bosnia and Herzegovina", - "BB": "Barbados", - "WF": "Wallis and Futuna", - "BL": "Saint Barthelemy", - "BM": "Bermuda", - "BN": "Brunei", - "BO": "Bolivia", - "BH": "Bahrain", - "BI": "Burundi", - "BJ": "Benin", - "BT": "Bhutan", - "JM": "Jamaica", - "BV": "Bouvet Island", - "BW": "Botswana", - "WS": "Samoa", - "BQ": "Bonaire, Saint Eustatius and Saba ", - "BR": "Brazil", - "BS": "Bahamas", - "JE": "Jersey", - "BY": "Belarus", - "BZ": "Belize", - "RU": "Russia", - "RW": "Rwanda", - "RS": "Serbia", - "TL": "East Timor", - "RE": "Reunion", - "TM": "Turkmenistan", - "TJ": "Tajikistan", - "RO": "Romania", - "TK": "Tokelau", - "GW": "Guinea-Bissau", - "GU": "Guam", - "GT": "Guatemala", - "GS": "South Georgia and the South Sandwich Islands", - "GR": "Greece", - "GQ": "Equatorial Guinea", - "GP": "Guadeloupe", - "JP": "Japan", - "GY": "Guyana", - "GG": "Guernsey", - "GF": "French Guiana", - "GE": "Georgia", - "GD": "Grenada", - "GB": "United Kingdom", - "GA": "Gabon", - "SV": "El Salvador", - "GN": "Guinea", - "GM": "Gambia", - "GL": "Greenland", - "GI": "Gibraltar", - "GH": "Ghana", - "OM": "Oman", - "TN": "Tunisia", - "JO": "Jordan", - "HR": "Croatia", - "HT": "Haiti", - "HU": "Hungary", - "HK": "Hong Kong", - "HN": "Honduras", - "HM": "Heard Island and McDonald Islands", - "VE": "Venezuela", - "PR": "Puerto Rico", - "PS": "Palestinian Territory", - "PW": "Palau", - "PT": "Portugal", - "SJ": "Svalbard and Jan Mayen", - "PY": "Paraguay", - "IQ": "Iraq", - "PA": "Panama", - "PF": "French Polynesia", - "PG": "Papua New Guinea", - "PE": "Peru", - "PK": "Pakistan", - "PH": "Philippines", - "PN": "Pitcairn", - "PL": "Poland", - "PM": "Saint Pierre and Miquelon", - "ZM": "Zambia", - "EH": "Western Sahara", - "EE": "Estonia", - "EG": "Egypt", - "ZA": "South Africa", - "EC": "Ecuador", - "IT": "Italy", - "VN": "Vietnam", - "SB": "Solomon Islands", - "ET": "Ethiopia", - "SO": "Somalia", - "ZW": "Zimbabwe", - "SA": "Saudi Arabia", - "ES": "Spain", - "ER": "Eritrea", - "ME": "Montenegro", - "MD": "Moldova", - "MG": "Madagascar", - "MF": "Saint Martin", - "MA": "Morocco", - "MC": "Monaco", - "UZ": "Uzbekistan", - "MM": "Myanmar", - "ML": "Mali", - "MO": "Macao", - "MN": "Mongolia", - "MH": "Marshall Islands", - "MK": "Macedonia", - "MU": "Mauritius", - "MT": "Malta", - "MW": "Malawi", - "MV": "Maldives", - "MQ": "Martinique", - "MP": "Northern Mariana Islands", - "MS": "Montserrat", - "MR": "Mauritania", - "IM": "Isle of Man", - "UG": "Uganda", - "TZ": "Tanzania", - "MY": "Malaysia", - "MX": "Mexico", - "IL": "Israel", - "FR": "France", - "IO": "British Indian Ocean Territory", - "SH": "Saint Helena", - "FI": "Finland", - "FJ": "Fiji", - "FK": "Falkland Islands", - "FM": "Micronesia", - "FO": "Faroe Islands", - "NI": "Nicaragua", - "NL": "Netherlands", - "NO": "Norway", - "NA": "Namibia", - "VU": "Vanuatu", - "NC": "New Caledonia", - "NE": "Niger", - "NF": "Norfolk Island", - "NG": "Nigeria", - "NZ": "New Zealand", - "NP": "Nepal", - "NR": "Nauru", - "NU": "Niue", - "CK": "Cook Islands", - "XK": "Kosovo", - "CI": "Ivory Coast", - "CH": "Switzerland", - "CO": "Colombia", - "CN": "China", - "CM": "Cameroon", - "CL": "Chile", - "CC": "Cocos Islands", - "CA": "Canada", - "CG": "Republic of the Congo", - "CF": "Central African Republic", - "CD": "Democratic Republic of the Congo", - "CZ": "Czech Republic", - "CY": "Cyprus", - "CX": "Christmas Island", - "CR": "Costa Rica", - "CW": "Curacao", - "CV": "Cape Verde", - "CU": "Cuba", - "SZ": "Swaziland", - "SY": "Syria", - "SX": "Sint Maarten", - "KG": "Kyrgyzstan", - "KE": "Kenya", - "SS": "South Sudan", - "SR": "Suriname", - "KI": "Kiribati", - "KH": "Cambodia", - "KN": "Saint Kitts and Nevis", - "KM": "Comoros", - "ST": "Sao Tome and Principe", - "SK": "Slovakia", - "KR": "South Korea", - "SI": "Slovenia", - "KP": "North Korea", - "KW": "Kuwait", - "SN": "Senegal", - "SM": "San Marino", - "SL": "Sierra Leone", - "SC": "Seychelles", - "KZ": "Kazakhstan", - "KY": "Cayman Islands", - "SG": "Singapore", - "SE": "Sweden", - "SD": "Sudan", - "DO": "Dominican Republic", - "DM": "Dominica", - "DJ": "Djibouti", - "DK": "Denmark", - "VG": "British Virgin Islands", - "DE": "Germany", - "YE": "Yemen", - "DZ": "Algeria", - "US": "United States", - "UY": "Uruguay", - "YT": "Mayotte", - "UM": "United States Minor Outlying Islands", - "LB": "Lebanon", - "LC": "Saint Lucia", - "LA": "Laos", - "TV": "Tuvalu", - "TW": "Taiwan", - "TT": "Trinidad and Tobago", - "TR": "Turkey", - "LK": "Sri Lanka", - "LI": "Liechtenstein", - "LV": "Latvia", - "TO": "Tonga", - "LT": "Lithuania", - "LU": "Luxembourg", - "LR": "Liberia", - "LS": "Lesotho", - "TH": "Thailand", - "TF": "French Southern Territories", - "TG": "Togo", - "TD": "Chad", - "TC": "Turks and Caicos Islands", - "LY": "Libya", - "VA": "Vatican", - "VC": "Saint Vincent and the Grenadines", - "AE": "United Arab Emirates", - "AD": "Andorra", - "AG": "Antigua and Barbuda", - "AF": "Afghanistan", - "AI": "Anguilla", - "VI": "U.S. Virgin Islands", - "IS": "Iceland", - "IR": "Iran", - "AM": "Armenia", - "AL": "Albania", - "AO": "Angola", - "AQ": "Antarctica", - "AS": "American Samoa", - "AR": "Argentina", - "AU": "Australia", - "AT": "Austria", - "AW": "Aruba", - "IN": "India", - "AX": "Aland Islands", - "AZ": "Azerbaijan", - "IE": "Ireland", - "ID": "Indonesia", - "UA": "Ukraine", - "QA": "Qatar", - "MZ": "Mozambique" -} - - -def list_countries(): - return country_list.keys() - - -def list_geoblocked_countres(allowed_countries, reverse=False): - geoblocked = [] - geounlocked = [] - for country in list_countries(): - if country in allowed_countries or (country not in allowed_countries and reverse is True): - geounlocked.append(country) - elif country not in allowed_countries or (country in allowed_countries and reverse is True): - geoblocked.append(country) - if len(geounlocked) == 0: - return 'This video is blocked in all countries' - if len(geoblocked) <= 10: - return 'This video is blocked in these countries: %s' % ', '.join(sorted(country_list[cnt] for cnt in geoblocked)) - if len(geounlocked) <= 10: - geoblocked.sort() - return 'This video is only available in these countries: %s' % ', '.join(country_list[cnt] for cnt in geounlocked) - if len(geoblocked) >= len(geounlocked): - geounlocked.sort() - return 'This video is only available in these countries: %s' % ', '.join(geounlocked) - geoblocked.sort() - return 'This video is blocked in these countries: %s' % ', '.join(geoblocked)