diff --git a/haruhi_dl/extractor/agora.py b/haruhi_dl/extractor/agora.py index 37cae6ac6..aab6bffcc 100644 --- a/haruhi_dl/extractor/agora.py +++ b/haruhi_dl/extractor/agora.py @@ -63,7 +63,12 @@ class WyborczaVideoIE(InfoExtractor): class WyborczaPodcastIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?wyborcza\.pl/podcast(?:/0,172673\.html(?:\?(?:[^&]+?&)*?podcast=(?P\d+))?)?' + _VALID_URL = r'''(?x) + https?://(?:www\.)? + (?:wyborcza\.pl/podcast(?:/0,172673\.html)? + |wysokieobcasy\.pl/wysokie-obcasy/0,176631\.html) + (?:\?(?:[^&]+?&)*?podcast=(?P\d+))? + ''' _TESTS = [{ 'url': 'https://wyborcza.pl/podcast/0,172673.html?podcast=100720#S.main_topic-K.C-B.6-L.1.podcast', 'info_dict': { @@ -74,6 +79,16 @@ class WyborczaPodcastIE(InfoExtractor): 'upload_date': '20210117', 'description': 'md5:49f0a06ffc4c1931210d3ab1416a651d', }, + }, { + 'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html?podcast=100673', + 'info_dict': { + 'id': '100673', + 'ext': 'mp3', + 'title': 'Czym jest ubóstwo menstruacyjne i dlaczego dotyczy każdej i każdego z nas?', + 'uploader': 'Agnieszka Urazińska ', + 'upload_date': '20210115', + 'description': 'md5:c161dc035f8dbb60077011fc41274899', + }, }, { 'url': 'https://wyborcza.pl/podcast', 'info_dict': { @@ -81,6 +96,13 @@ class WyborczaPodcastIE(InfoExtractor): 'title': 'Gościnnie w TOK FM: Wyborcza, 8:10', }, 'playlist_mincount': 370, + }, { + 'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html', + 'info_dict': { + 'id': '395', + 'title': 'Gościnnie w TOK FM: Wysokie Obcasy', + }, + 'playlist_mincount': 12, }] def _real_extract(self, url): @@ -90,10 +112,12 @@ class WyborczaPodcastIE(InfoExtractor): if not podcast_id: return { '_type': 'url', - 'url': 'tokfm:audition:334', + 'url': 'tokfm:audition:%s' % ('395' if 'wysokieobcasy.pl/' in url else '334'), 'ie_key': 'TokFMAudition', } - meta = self._download_json('https://wyborcza.pl/api/podcast?guid=%s' % podcast_id, podcast_id) + meta = self._download_json('https://wyborcza.pl/api/podcast?guid=%s%s' % (podcast_id, + '&type=wo' if 'wysokieobcasy.pl/' in url else ''), + podcast_id) published_date = meta['publishedDate'].split(' ') upload_date = '%s%s%s' % (published_date[2], { 'stycznia': '01', @@ -184,18 +208,31 @@ class TokFMAuditionIE(InfoExtractor): def _real_extract(self, url): audition_id = self._match_id(url) + headers = { + 'User-Agent': 'Mozilla/5.0 (Linux; Android 9; Redmi 3S Build/PQ3A.190801.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.101 Mobile Safari/537.36', + } + data = self._download_json( 'https://api.podcast.radioagora.pl/api4/getSeries?series_id=%s' % (audition_id), - audition_id, 'Downloading audition metadata') + audition_id, 'Downloading audition metadata', headers=headers) if len(data) == 0: raise ExtractorError('No such audition') data = data[0] entries = [] for page in range(0, (int(data['total_podcasts']) // 30) + 1): - podcast_page = self._download_json( - 'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id=%s&limit=30&offset=%d&with_guests=true&with_leaders_for_mobile=true' % (audition_id, page), - audition_id, 'Downloading podcast list (page #%d)' % (page + 1)) + podcast_page = False + retries = 0 + while retries <= 5 and podcast_page is False: + podcast_page = self._download_json( + 'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id=%s&limit=30&offset=%d&with_guests=true&with_leaders_for_mobile=true' % (audition_id, page), + audition_id, 'Downloading podcast list (page #%d%s)' % ( + page + 1, + (', try %d' % retries) if retries > 0 else ''), + headers=headers) + retries += 1 + if podcast_page is False: + raise ExtractorError('Agora returned shit 5 times in a row', expected=True) for podcast in podcast_page: entries.append({ '_type': 'url_transparent',