[agora] wyborcza/wysokieobcasy/tokfm podcast fixes

This commit is contained in:
Laura Liberda 2021-01-23 00:31:40 +01:00
parent 3bb3d99229
commit 570cf794a9

View file

@ -63,7 +63,12 @@ class WyborczaVideoIE(InfoExtractor):
class WyborczaPodcastIE(InfoExtractor): class WyborczaPodcastIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?wyborcza\.pl/podcast(?:/0,172673\.html(?:\?(?:[^&]+?&)*?podcast=(?P<episode_id>\d+))?)?' _VALID_URL = r'''(?x)
https?://(?:www\.)?
(?:wyborcza\.pl/podcast(?:/0,172673\.html)?
|wysokieobcasy\.pl/wysokie-obcasy/0,176631\.html)
(?:\?(?:[^&]+?&)*?podcast=(?P<episode_id>\d+))?
'''
_TESTS = [{ _TESTS = [{
'url': 'https://wyborcza.pl/podcast/0,172673.html?podcast=100720#S.main_topic-K.C-B.6-L.1.podcast', 'url': 'https://wyborcza.pl/podcast/0,172673.html?podcast=100720#S.main_topic-K.C-B.6-L.1.podcast',
'info_dict': { 'info_dict': {
@ -74,6 +79,16 @@ class WyborczaPodcastIE(InfoExtractor):
'upload_date': '20210117', 'upload_date': '20210117',
'description': 'md5:49f0a06ffc4c1931210d3ab1416a651d', 'description': 'md5:49f0a06ffc4c1931210d3ab1416a651d',
}, },
}, {
'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html?podcast=100673',
'info_dict': {
'id': '100673',
'ext': 'mp3',
'title': 'Czym jest ubóstwo menstruacyjne i dlaczego dotyczy każdej i każdego z nas?',
'uploader': 'Agnieszka Urazińska ',
'upload_date': '20210115',
'description': 'md5:c161dc035f8dbb60077011fc41274899',
},
}, { }, {
'url': 'https://wyborcza.pl/podcast', 'url': 'https://wyborcza.pl/podcast',
'info_dict': { 'info_dict': {
@ -81,6 +96,13 @@ class WyborczaPodcastIE(InfoExtractor):
'title': 'Gościnnie w TOK FM: Wyborcza, 8:10', 'title': 'Gościnnie w TOK FM: Wyborcza, 8:10',
}, },
'playlist_mincount': 370, 'playlist_mincount': 370,
}, {
'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html',
'info_dict': {
'id': '395',
'title': 'Gościnnie w TOK FM: Wysokie Obcasy',
},
'playlist_mincount': 12,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -90,10 +112,12 @@ class WyborczaPodcastIE(InfoExtractor):
if not podcast_id: if not podcast_id:
return { return {
'_type': 'url', '_type': 'url',
'url': 'tokfm:audition:334', 'url': 'tokfm:audition:%s' % ('395' if 'wysokieobcasy.pl/' in url else '334'),
'ie_key': 'TokFMAudition', 'ie_key': 'TokFMAudition',
} }
meta = self._download_json('https://wyborcza.pl/api/podcast?guid=%s' % podcast_id, podcast_id) meta = self._download_json('https://wyborcza.pl/api/podcast?guid=%s%s' % (podcast_id,
'&type=wo' if 'wysokieobcasy.pl/' in url else ''),
podcast_id)
published_date = meta['publishedDate'].split(' ') published_date = meta['publishedDate'].split(' ')
upload_date = '%s%s%s' % (published_date[2], { upload_date = '%s%s%s' % (published_date[2], {
'stycznia': '01', 'stycznia': '01',
@ -184,18 +208,31 @@ class TokFMAuditionIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
audition_id = self._match_id(url) audition_id = self._match_id(url)
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 9; Redmi 3S Build/PQ3A.190801.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.101 Mobile Safari/537.36',
}
data = self._download_json( data = self._download_json(
'https://api.podcast.radioagora.pl/api4/getSeries?series_id=%s' % (audition_id), 'https://api.podcast.radioagora.pl/api4/getSeries?series_id=%s' % (audition_id),
audition_id, 'Downloading audition metadata') audition_id, 'Downloading audition metadata', headers=headers)
if len(data) == 0: if len(data) == 0:
raise ExtractorError('No such audition') raise ExtractorError('No such audition')
data = data[0] data = data[0]
entries = [] entries = []
for page in range(0, (int(data['total_podcasts']) // 30) + 1): for page in range(0, (int(data['total_podcasts']) // 30) + 1):
podcast_page = self._download_json( podcast_page = False
'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id=%s&limit=30&offset=%d&with_guests=true&with_leaders_for_mobile=true' % (audition_id, page), retries = 0
audition_id, 'Downloading podcast list (page #%d)' % (page + 1)) while retries <= 5 and podcast_page is False:
podcast_page = self._download_json(
'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id=%s&limit=30&offset=%d&with_guests=true&with_leaders_for_mobile=true' % (audition_id, page),
audition_id, 'Downloading podcast list (page #%d%s)' % (
page + 1,
(', try %d' % retries) if retries > 0 else ''),
headers=headers)
retries += 1
if podcast_page is False:
raise ExtractorError('Agora returned shit 5 times in a row', expected=True)
for podcast in podcast_page: for podcast in podcast_page:
entries.append({ entries.append({
'_type': 'url_transparent', '_type': 'url_transparent',