[agora] wyborcza/wysokieobcasy/tokfm podcast fixes
This commit is contained in:
parent
3bb3d99229
commit
570cf794a9
|
@ -63,7 +63,12 @@ class WyborczaVideoIE(InfoExtractor):
|
|||
|
||||
|
||||
class WyborczaPodcastIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?wyborcza\.pl/podcast(?:/0,172673\.html(?:\?(?:[^&]+?&)*?podcast=(?P<episode_id>\d+))?)?'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:www\.)?
|
||||
(?:wyborcza\.pl/podcast(?:/0,172673\.html)?
|
||||
|wysokieobcasy\.pl/wysokie-obcasy/0,176631\.html)
|
||||
(?:\?(?:[^&]+?&)*?podcast=(?P<episode_id>\d+))?
|
||||
'''
|
||||
_TESTS = [{
|
||||
'url': 'https://wyborcza.pl/podcast/0,172673.html?podcast=100720#S.main_topic-K.C-B.6-L.1.podcast',
|
||||
'info_dict': {
|
||||
|
@ -74,6 +79,16 @@ class WyborczaPodcastIE(InfoExtractor):
|
|||
'upload_date': '20210117',
|
||||
'description': 'md5:49f0a06ffc4c1931210d3ab1416a651d',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html?podcast=100673',
|
||||
'info_dict': {
|
||||
'id': '100673',
|
||||
'ext': 'mp3',
|
||||
'title': 'Czym jest ubóstwo menstruacyjne i dlaczego dotyczy każdej i każdego z nas?',
|
||||
'uploader': 'Agnieszka Urazińska ',
|
||||
'upload_date': '20210115',
|
||||
'description': 'md5:c161dc035f8dbb60077011fc41274899',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://wyborcza.pl/podcast',
|
||||
'info_dict': {
|
||||
|
@ -81,6 +96,13 @@ class WyborczaPodcastIE(InfoExtractor):
|
|||
'title': 'Gościnnie w TOK FM: Wyborcza, 8:10',
|
||||
},
|
||||
'playlist_mincount': 370,
|
||||
}, {
|
||||
'url': 'https://www.wysokieobcasy.pl/wysokie-obcasy/0,176631.html',
|
||||
'info_dict': {
|
||||
'id': '395',
|
||||
'title': 'Gościnnie w TOK FM: Wysokie Obcasy',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
|
@ -90,10 +112,12 @@ class WyborczaPodcastIE(InfoExtractor):
|
|||
if not podcast_id:
|
||||
return {
|
||||
'_type': 'url',
|
||||
'url': 'tokfm:audition:334',
|
||||
'url': 'tokfm:audition:%s' % ('395' if 'wysokieobcasy.pl/' in url else '334'),
|
||||
'ie_key': 'TokFMAudition',
|
||||
}
|
||||
meta = self._download_json('https://wyborcza.pl/api/podcast?guid=%s' % podcast_id, podcast_id)
|
||||
meta = self._download_json('https://wyborcza.pl/api/podcast?guid=%s%s' % (podcast_id,
|
||||
'&type=wo' if 'wysokieobcasy.pl/' in url else ''),
|
||||
podcast_id)
|
||||
published_date = meta['publishedDate'].split(' ')
|
||||
upload_date = '%s%s%s' % (published_date[2], {
|
||||
'stycznia': '01',
|
||||
|
@ -184,18 +208,31 @@ class TokFMAuditionIE(InfoExtractor):
|
|||
def _real_extract(self, url):
|
||||
audition_id = self._match_id(url)
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Linux; Android 9; Redmi 3S Build/PQ3A.190801.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.101 Mobile Safari/537.36',
|
||||
}
|
||||
|
||||
data = self._download_json(
|
||||
'https://api.podcast.radioagora.pl/api4/getSeries?series_id=%s' % (audition_id),
|
||||
audition_id, 'Downloading audition metadata')
|
||||
audition_id, 'Downloading audition metadata', headers=headers)
|
||||
|
||||
if len(data) == 0:
|
||||
raise ExtractorError('No such audition')
|
||||
data = data[0]
|
||||
entries = []
|
||||
for page in range(0, (int(data['total_podcasts']) // 30) + 1):
|
||||
podcast_page = self._download_json(
|
||||
'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id=%s&limit=30&offset=%d&with_guests=true&with_leaders_for_mobile=true' % (audition_id, page),
|
||||
audition_id, 'Downloading podcast list (page #%d)' % (page + 1))
|
||||
podcast_page = False
|
||||
retries = 0
|
||||
while retries <= 5 and podcast_page is False:
|
||||
podcast_page = self._download_json(
|
||||
'https://api.podcast.radioagora.pl/api4/getPodcasts?series_id=%s&limit=30&offset=%d&with_guests=true&with_leaders_for_mobile=true' % (audition_id, page),
|
||||
audition_id, 'Downloading podcast list (page #%d%s)' % (
|
||||
page + 1,
|
||||
(', try %d' % retries) if retries > 0 else ''),
|
||||
headers=headers)
|
||||
retries += 1
|
||||
if podcast_page is False:
|
||||
raise ExtractorError('Agora returned shit 5 times in a row', expected=True)
|
||||
for podcast in podcast_page:
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
|
|
Loading…
Reference in a new issue