diff --git a/haruhi_dl/extractor/youtube.py b/haruhi_dl/extractor/youtube.py
index 7939a3211..d247e2eea 100644
--- a/haruhi_dl/extractor/youtube.py
+++ b/haruhi_dl/extractor/youtube.py
@@ -28,7 +28,6 @@ from ..utils import (
float_or_none,
get_element_by_id,
int_or_none,
- list_geoblocked_countres,
mimetype2ext,
parse_codecs,
parse_duration,
@@ -44,6 +43,7 @@ from ..utils import (
uppercase_escape,
url_or_none,
urlencode_postdata,
+ GeoRestrictedError,
)
@@ -567,24 +567,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'format': '141/bestaudio[ext=m4a]',
},
},
- # JS player signature function name containing $
- {
- 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
- 'info_dict': {
- 'id': 'nfWlot6h_JM',
- 'ext': 'm4a',
- 'title': 'Taylor Swift - Shake It Off',
- 'description': 'md5:9dc0bd58efe700594b54f7d82bed0bac',
- 'duration': 242,
- 'uploader': 'TaylorSwiftVEVO',
- 'uploader_id': 'TaylorSwiftVEVO',
- 'upload_date': '20140818',
- },
- 'params': {
- 'youtube_include_dash_manifest': True,
- 'format': '141/bestaudio[ext=m4a]',
- },
- },
# Normal age-gate video (No vevo, embed allowed)
{
'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
@@ -636,24 +618,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'DASH manifest missing',
]
},
- # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
- {
- 'url': 'lqQg6PlCWgI',
- 'info_dict': {
- 'id': 'lqQg6PlCWgI',
- 'ext': 'mp4',
- 'duration': 6085,
- 'upload_date': '20150827',
- 'uploader_id': 'olympic',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
- 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
- 'uploader': 'Olympic',
- 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
- },
- 'params': {
- 'skip_download': 'requires avconv',
- }
- },
# Non-square pixels
{
'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
@@ -879,26 +843,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'url': 'https://www.youtubekids.com/watch?v=BnC-cpUCdns',
'only_matching': True,
},
- {
- # invalid -> valid video id redirection
- 'url': 'DJztXj2GPfl',
- 'info_dict': {
- 'id': 'DJztXj2GPfk',
- 'ext': 'mp4',
- 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
- 'description': 'md5:bf577a41da97918e94fa9798d9228825',
- 'upload_date': '20090125',
- 'uploader': 'Prochorowka',
- 'uploader_id': 'Prochorowka',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
- 'artist': 'Panjabi MC',
- 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
- 'album': 'Beware of the Boys (Mundian To Bach Ke)',
- },
- 'params': {
- 'skip_download': True,
- },
- },
{
# empty description results in an empty string
'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
@@ -919,6 +863,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'url': 'https://youtube.com/shorts/7awd-y_DTQY',
'only_matching': True,
},
+ {
+ 'url': 'https://www.youtube.com/video/2NDLF-k2PwA',
+ 'only_matching': True,
+ }
]
_VALID_SIG_VALUE_RE = r'^AO[a-zA-Z0-9_-]+=*$'
@@ -1132,7 +1080,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# to be implemented in future that will replace this workaround (see
# https://github.com/ytdl-org/youtube-dl/issues/7468,
# https://github.com/ytdl-org/youtube-dl/pull/7599)
- r';ytplayer\.config\s*=\s*({.+?});ytplayer',
+ r';ytplayer\.config\s*=\s*({.+?});\s*ytplayer',
r';ytplayer\.config\s*=\s*({.+?});',
)
config = self._search_regex(
@@ -1473,14 +1421,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Get video info
video_info = {}
- embed_webpage = None
if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
or re.search(r'player-age-gate-content">', video_webpage) is not None):
age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id}
# this can be viewed without login into Youtube
- url = proto + '://www.youtube.com/embed/%s' % video_id
- embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
data = compat_urllib_parse_urlencode({
'video_id': video_id,
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
@@ -1490,8 +1435,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
try:
video_info_webpage = self._download_webpage(
video_info_url, video_id,
- note='Refetching age-gated info webpage',
- errnote='unable to download video info webpage')
+ note='Downloading age-gated video info',
+ errnote='unable to download video info')
except ExtractorError:
video_info_webpage = None
if video_info_webpage:
@@ -1522,9 +1467,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
player_response = extract_player_response(args.get('player_response'), video_id)
if not player_response:
player_response = extract_player_response(
- self._search_regex(
+ self._search_regex((
+ # js-like syntax
+ r'(?:window(?:\["|\.)|var )ytInitialPlayerResponse(?:"])?\s*=\s*({.+?(?!\\)});(?:if \(ytcsi|var [a-zA-Z\_])',
r'(?:window(?:\["|\.)|var )ytInitialPlayerResponse(?:"])?\s*=\s*({.+?(?!\\)});',
- video_webpage, 'ytInitialPlayerResponse', fatal=False), video_id)
+ ), video_webpage, 'ytInitialPlayerResponse', fatal=False), video_id)
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
add_dash_mpd_pr(player_response)
@@ -1722,24 +1669,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
ASSETS_RE = r'"jsUrl":"(/s/player/.*?/player_ias.vflset/.*?/base.js)'
player_url = self._search_regex(
- ASSETS_RE,
- embed_webpage if age_gate else video_webpage, '', default=player_url)
+ ASSETS_RE, video_webpage, '', default=player_url)
if not player_url and not age_gate:
# We need the embed website after all
- if embed_webpage is None:
- embed_url = proto + '://www.youtube.com/embed/%s' % video_id
- embed_webpage = self._download_webpage(
- embed_url, video_id, 'Downloading embed webpage')
+ embed_url = proto + '://www.youtube.com/embed/%s' % video_id
+ embed_webpage = self._download_webpage(
+ embed_url, video_id, 'Downloading embed webpage')
player_url = self._search_regex(
ASSETS_RE, embed_webpage, 'JS player URL')
- # if player_url is None:
- # player_url_json = self._search_regex(
- # r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
- # video_webpage, 'age gate player URL')
- # player_url = json.loads(player_url_json)
-
if 'sig' in url_data:
url += '&signature=' + url_data['sig'][0]
elif 's' in url_data:
@@ -1871,11 +1810,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
or ', who has blocked it on copyright grounds' in error_desc
or 'It is not available in your country.' in error_desc
or ', who has blocked it in your country on copyright grounds.' in error_desc):
- raise ExtractorError(
- list_geoblocked_countres(
- self._search_regex(
- r'',
- video_webpage, 'allowed region list').split(',')),
+ raise GeoRestrictedError(
+ error_desc,
+ countries=self._search_regex(
+ r'',
+ video_webpage, 'allowed region list').split(','),
expected=True)
if error_desc and 'Playback on other websites has been disabled' in error_desc:
raise ExtractorError(
diff --git a/haruhi_dl/utils.py b/haruhi_dl/utils.py
index 06b216898..c44ea78a6 100644
--- a/haruhi_dl/utils.py
+++ b/haruhi_dl/utils.py
@@ -5775,284 +5775,3 @@ def clean_podcast_url(url):
st\.fm # https://podsights.com/docs/
)/e
)/''', '', url)
-
-
-# http://country.io/names.json
-country_list = {
- "BD": "Bangladesh",
- "BE": "Belgium",
- "BF": "Burkina Faso",
- "BG": "Bulgaria",
- "BA": "Bosnia and Herzegovina",
- "BB": "Barbados",
- "WF": "Wallis and Futuna",
- "BL": "Saint Barthelemy",
- "BM": "Bermuda",
- "BN": "Brunei",
- "BO": "Bolivia",
- "BH": "Bahrain",
- "BI": "Burundi",
- "BJ": "Benin",
- "BT": "Bhutan",
- "JM": "Jamaica",
- "BV": "Bouvet Island",
- "BW": "Botswana",
- "WS": "Samoa",
- "BQ": "Bonaire, Saint Eustatius and Saba ",
- "BR": "Brazil",
- "BS": "Bahamas",
- "JE": "Jersey",
- "BY": "Belarus",
- "BZ": "Belize",
- "RU": "Russia",
- "RW": "Rwanda",
- "RS": "Serbia",
- "TL": "East Timor",
- "RE": "Reunion",
- "TM": "Turkmenistan",
- "TJ": "Tajikistan",
- "RO": "Romania",
- "TK": "Tokelau",
- "GW": "Guinea-Bissau",
- "GU": "Guam",
- "GT": "Guatemala",
- "GS": "South Georgia and the South Sandwich Islands",
- "GR": "Greece",
- "GQ": "Equatorial Guinea",
- "GP": "Guadeloupe",
- "JP": "Japan",
- "GY": "Guyana",
- "GG": "Guernsey",
- "GF": "French Guiana",
- "GE": "Georgia",
- "GD": "Grenada",
- "GB": "United Kingdom",
- "GA": "Gabon",
- "SV": "El Salvador",
- "GN": "Guinea",
- "GM": "Gambia",
- "GL": "Greenland",
- "GI": "Gibraltar",
- "GH": "Ghana",
- "OM": "Oman",
- "TN": "Tunisia",
- "JO": "Jordan",
- "HR": "Croatia",
- "HT": "Haiti",
- "HU": "Hungary",
- "HK": "Hong Kong",
- "HN": "Honduras",
- "HM": "Heard Island and McDonald Islands",
- "VE": "Venezuela",
- "PR": "Puerto Rico",
- "PS": "Palestinian Territory",
- "PW": "Palau",
- "PT": "Portugal",
- "SJ": "Svalbard and Jan Mayen",
- "PY": "Paraguay",
- "IQ": "Iraq",
- "PA": "Panama",
- "PF": "French Polynesia",
- "PG": "Papua New Guinea",
- "PE": "Peru",
- "PK": "Pakistan",
- "PH": "Philippines",
- "PN": "Pitcairn",
- "PL": "Poland",
- "PM": "Saint Pierre and Miquelon",
- "ZM": "Zambia",
- "EH": "Western Sahara",
- "EE": "Estonia",
- "EG": "Egypt",
- "ZA": "South Africa",
- "EC": "Ecuador",
- "IT": "Italy",
- "VN": "Vietnam",
- "SB": "Solomon Islands",
- "ET": "Ethiopia",
- "SO": "Somalia",
- "ZW": "Zimbabwe",
- "SA": "Saudi Arabia",
- "ES": "Spain",
- "ER": "Eritrea",
- "ME": "Montenegro",
- "MD": "Moldova",
- "MG": "Madagascar",
- "MF": "Saint Martin",
- "MA": "Morocco",
- "MC": "Monaco",
- "UZ": "Uzbekistan",
- "MM": "Myanmar",
- "ML": "Mali",
- "MO": "Macao",
- "MN": "Mongolia",
- "MH": "Marshall Islands",
- "MK": "Macedonia",
- "MU": "Mauritius",
- "MT": "Malta",
- "MW": "Malawi",
- "MV": "Maldives",
- "MQ": "Martinique",
- "MP": "Northern Mariana Islands",
- "MS": "Montserrat",
- "MR": "Mauritania",
- "IM": "Isle of Man",
- "UG": "Uganda",
- "TZ": "Tanzania",
- "MY": "Malaysia",
- "MX": "Mexico",
- "IL": "Israel",
- "FR": "France",
- "IO": "British Indian Ocean Territory",
- "SH": "Saint Helena",
- "FI": "Finland",
- "FJ": "Fiji",
- "FK": "Falkland Islands",
- "FM": "Micronesia",
- "FO": "Faroe Islands",
- "NI": "Nicaragua",
- "NL": "Netherlands",
- "NO": "Norway",
- "NA": "Namibia",
- "VU": "Vanuatu",
- "NC": "New Caledonia",
- "NE": "Niger",
- "NF": "Norfolk Island",
- "NG": "Nigeria",
- "NZ": "New Zealand",
- "NP": "Nepal",
- "NR": "Nauru",
- "NU": "Niue",
- "CK": "Cook Islands",
- "XK": "Kosovo",
- "CI": "Ivory Coast",
- "CH": "Switzerland",
- "CO": "Colombia",
- "CN": "China",
- "CM": "Cameroon",
- "CL": "Chile",
- "CC": "Cocos Islands",
- "CA": "Canada",
- "CG": "Republic of the Congo",
- "CF": "Central African Republic",
- "CD": "Democratic Republic of the Congo",
- "CZ": "Czech Republic",
- "CY": "Cyprus",
- "CX": "Christmas Island",
- "CR": "Costa Rica",
- "CW": "Curacao",
- "CV": "Cape Verde",
- "CU": "Cuba",
- "SZ": "Swaziland",
- "SY": "Syria",
- "SX": "Sint Maarten",
- "KG": "Kyrgyzstan",
- "KE": "Kenya",
- "SS": "South Sudan",
- "SR": "Suriname",
- "KI": "Kiribati",
- "KH": "Cambodia",
- "KN": "Saint Kitts and Nevis",
- "KM": "Comoros",
- "ST": "Sao Tome and Principe",
- "SK": "Slovakia",
- "KR": "South Korea",
- "SI": "Slovenia",
- "KP": "North Korea",
- "KW": "Kuwait",
- "SN": "Senegal",
- "SM": "San Marino",
- "SL": "Sierra Leone",
- "SC": "Seychelles",
- "KZ": "Kazakhstan",
- "KY": "Cayman Islands",
- "SG": "Singapore",
- "SE": "Sweden",
- "SD": "Sudan",
- "DO": "Dominican Republic",
- "DM": "Dominica",
- "DJ": "Djibouti",
- "DK": "Denmark",
- "VG": "British Virgin Islands",
- "DE": "Germany",
- "YE": "Yemen",
- "DZ": "Algeria",
- "US": "United States",
- "UY": "Uruguay",
- "YT": "Mayotte",
- "UM": "United States Minor Outlying Islands",
- "LB": "Lebanon",
- "LC": "Saint Lucia",
- "LA": "Laos",
- "TV": "Tuvalu",
- "TW": "Taiwan",
- "TT": "Trinidad and Tobago",
- "TR": "Turkey",
- "LK": "Sri Lanka",
- "LI": "Liechtenstein",
- "LV": "Latvia",
- "TO": "Tonga",
- "LT": "Lithuania",
- "LU": "Luxembourg",
- "LR": "Liberia",
- "LS": "Lesotho",
- "TH": "Thailand",
- "TF": "French Southern Territories",
- "TG": "Togo",
- "TD": "Chad",
- "TC": "Turks and Caicos Islands",
- "LY": "Libya",
- "VA": "Vatican",
- "VC": "Saint Vincent and the Grenadines",
- "AE": "United Arab Emirates",
- "AD": "Andorra",
- "AG": "Antigua and Barbuda",
- "AF": "Afghanistan",
- "AI": "Anguilla",
- "VI": "U.S. Virgin Islands",
- "IS": "Iceland",
- "IR": "Iran",
- "AM": "Armenia",
- "AL": "Albania",
- "AO": "Angola",
- "AQ": "Antarctica",
- "AS": "American Samoa",
- "AR": "Argentina",
- "AU": "Australia",
- "AT": "Austria",
- "AW": "Aruba",
- "IN": "India",
- "AX": "Aland Islands",
- "AZ": "Azerbaijan",
- "IE": "Ireland",
- "ID": "Indonesia",
- "UA": "Ukraine",
- "QA": "Qatar",
- "MZ": "Mozambique"
-}
-
-
-def list_countries():
- return country_list.keys()
-
-
-def list_geoblocked_countres(allowed_countries, reverse=False):
- geoblocked = []
- geounlocked = []
- for country in list_countries():
- if country in allowed_countries or (country not in allowed_countries and reverse is True):
- geounlocked.append(country)
- elif country not in allowed_countries or (country in allowed_countries and reverse is True):
- geoblocked.append(country)
- if len(geounlocked) == 0:
- return 'This video is blocked in all countries'
- if len(geoblocked) <= 10:
- return 'This video is blocked in these countries: %s' % ', '.join(sorted(country_list[cnt] for cnt in geoblocked))
- if len(geounlocked) <= 10:
- geoblocked.sort()
- return 'This video is only available in these countries: %s' % ', '.join(country_list[cnt] for cnt in geounlocked)
- if len(geoblocked) >= len(geounlocked):
- geounlocked.sort()
- return 'This video is only available in these countries: %s' % ', '.join(geounlocked)
- geoblocked.sort()
- return 'This video is blocked in these countries: %s' % ', '.join(geoblocked)