[agora] wyborcza/wysokieobcasy/tokfm podcast fixes

This commit is contained in:
Laura Liberda 2021-01-23 00:31:40 +01:00
parent 3bb3d99229
commit 570cf794a9

View file

@ -63,7 +63,12 @@ class WyborczaVideoIE(InfoExtractor):
class WyborczaPodcastIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?wyborcza\.pl/podcast(?:/0,172673\.html(?:\?(?:[^&]+?&)*?podcast=(?P<episode_id>\d+))?)?'
_VALID_URL = r'''(?x)
https?://(?:www\.)?
(?:wyborcza\.pl/podcast(?:/0,172673\.html)?
|wysokieobcasy\.pl/wysokie-obcasy/0,176631\.html)
(?:\?(?:[^&]+?&)*?podcast=(?P<episode_id>\d+))?
'''
_TESTS = [{
'url': 'https://wyborcza.pl/podcast/0,172673.html?podcast=100720#S.main_topic-K.C-B.6-L.1.podcast',
'info_dict': {
@ -74,6 +79,16 @@ class WyborczaPodcastIE(InfoExtractor):
'upload_date': '20210117',
'description': 'md5:49f0a06ffc4c1931210d3ab1416a651d',
},
}, {
'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html?podcast=100673',
'info_dict': {
'id': '100673',
'ext': 'mp3',
'title': 'Czym jest ubóstwo menstruacyjne i dlaczego dotyczy każdej i każdego z nas?',
'uploader': 'Agnieszka Urazińska ',
'upload_date': '20210115',
'description': 'md5:c161dc035f8dbb60077011fc41274899',
},
}, {
'url': 'https://wyborcza.pl/podcast',
'info_dict': {
@ -81,6 +96,13 @@ class WyborczaPodcastIE(InfoExtractor):
'title': 'Gościnnie w TOK FM: Wyborcza, 8:10',
},
'playlist_mincount': 370,
}, {
'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html',
'info_dict': {
'id': '395',
'title': 'Gościnnie w TOK FM: Wysokie Obcasy',
},
'playlist_mincount': 12,
}]
def _real_extract(self, url):
@ -90,10 +112,12 @@ class WyborczaPodcastIE(InfoExtractor):
if not podcast_id:
return {
'_type': 'url',
'url': 'tokfm:audition:334',
'url': 'tokfm:audition:%s' % ('395' if 'wysokieobcasy.pl/' in url else '334'),
'ie_key': 'TokFMAudition',
}
meta = self._download_json('https://wyborcza.pl/api/podcast?guid=%s' % podcast_id, podcast_id)
meta = self._download_json('https://wyborcza.pl/api/podcast?guid=%s%s' % (podcast_id,
'&type=wo' if 'wysokieobcasy.pl/' in url else ''),
podcast_id)
published_date = meta['publishedDate'].split(' ')
upload_date = '%s%s%s' % (published_date[2], {
'stycznia': '01',
@ -184,18 +208,31 @@ class TokFMAuditionIE(InfoExtractor):
def _real_extract(self, url):
audition_id = self._match_id(url)
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 9; Redmi 3S Build/PQ3A.190801.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.101 Mobile Safari/537.36',
}
data = self._download_json(
'https://api.podcast.radioagora.pl/api4/getSeries?series_id=%s' % (audition_id),
audition_id, 'Downloading audition metadata')
audition_id, 'Downloading audition metadata', headers=headers)
if len(data) == 0:
raise ExtractorError('No such audition')
data = data[0]
entries = []
for page in range(0, (int(data['total_podcasts']) // 30) + 1):
podcast_page = self._download_json(
'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id=%s&limit=30&offset=%d&with_guests=true&with_leaders_for_mobile=true' % (audition_id, page),
audition_id, 'Downloading podcast list (page #%d)' % (page + 1))
podcast_page = False
retries = 0
while retries <= 5 and podcast_page is False:
podcast_page = self._download_json(
'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id=%s&limit=30&offset=%d&with_guests=true&with_leaders_for_mobile=true' % (audition_id, page),
audition_id, 'Downloading podcast list (page #%d%s)' % (
page + 1,
(', try %d' % retries) if retries > 0 else ''),
headers=headers)
retries += 1
if podcast_page is False:
raise ExtractorError('Agora returned shit 5 times in a row', expected=True)
for podcast in podcast_page:
entries.append({
'_type': 'url_transparent',