From 3240e9f5829d5a98fa9789a8b4cac3a1c4a19238 Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Wed, 3 Mar 2021 20:17:44 +0100 Subject: [PATCH] acast player extractor --- haruhi_dl/extractor/acast.py | 77 +++++++++++++++++++++++++++++++ haruhi_dl/extractor/extractors.py | 1 + haruhi_dl/extractor/pulsembed.py | 11 +++++ 3 files changed, 89 insertions(+) diff --git a/haruhi_dl/extractor/acast.py b/haruhi_dl/extractor/acast.py index b9355a2c8..b57fb87b1 100644 --- a/haruhi_dl/extractor/acast.py +++ b/haruhi_dl/extractor/acast.py @@ -7,8 +7,12 @@ from .common import InfoExtractor from ..utils import ( clean_html, clean_podcast_url, + float_or_none, int_or_none, + js_to_json, parse_iso8601, + urljoin, + ExtractorError, ) @@ -124,3 +128,76 @@ class ACastChannelIE(ACastBaseIE): entries.append(self._extract_episode(episode, show_info)) return self.playlist_result( entries, show.get('id'), show.get('title'), show.get('description')) + + +class ACastPlayerIE(InfoExtractor): + IE_NAME = 'acast:player' + _VALID_URL = r'https?://player\.acast\.com/(?:[^/]+/episodes/)?(?P[^/?#]+)' + + _TESTS = [{ + 'url': 'https://player.acast.com/600595844cac453f8579eca0/episodes/maciej-konieczny-podatek-medialny-to-mechanizm-kontroli?theme=default&latest=1', + 'info_dict': { + 'id': '601dc897fb37095537d48e6f', + 'ext': 'mp3', + 'title': 'Maciej Konieczny: "Podatek medialny to bardziej mechanizm kontroli niż podatkowy”', + 'upload_date': '20210208', + 'timestamp': 1612764000, + }, + }, { + 'url': 'https://player.acast.com/5d09057251a90dcf7fa8e985?theme=default&latest=1', + 'info_dict': { + 'id': '5d09057251a90dcf7fa8e985', + 'title': 'DGPtalk: Obiektywnie o biznesie', + }, + 'playlist_mincount': 5, + }] + + @staticmethod + def _extract_urls(webpage, **kw): + return [mobj.group('url') + for mobj in re.finditer( + r'(?x)]+\bsrc=(["\'])(?P%s(?:\?[^#]+)?(?:\#.+?)?)\1' % ACastPlayerIE._VALID_URL, + webpage)] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + data = self._parse_json( + js_to_json( + self._search_regex( + r'(?s)var _global\s*=\s*({.+?});', + webpage, 'podcast data')), display_id) + + show = data['show'] + + players = [{ + 'id': player['_id'], + 'title': player['title'], + 'url': player['audio'], + 'duration': float_or_none(player.get('duration')), + 'timestamp': parse_iso8601(player.get('publishDate')), + 'thumbnail': urljoin('https://player.acast.com/', player.get('cover')), + 'series': show['title'], + 'episode': player['title'], + } for player in data['player']] + + if len(players) > 1: + info_dict = { + '_type': 'playlist', + 'entries': players, + 'id': show['_id'], + 'title': show['title'], + 'series': show['title'], + } + if show.get('cover'): + info_dict['thumbnails'] = [{ + 'url': urljoin('https://player.acast.com/', show['cover']['url']), + 'filesize': int_or_none(show['cover'].get('size')), + }] + return info_dict + + if len(players) == 1: + return players[0] + + raise ExtractorError('No podcast episodes found') diff --git a/haruhi_dl/extractor/extractors.py b/haruhi_dl/extractor/extractors.py index 6cb94b4c9..334c5b755 100644 --- a/haruhi_dl/extractor/extractors.py +++ b/haruhi_dl/extractor/extractors.py @@ -17,6 +17,7 @@ from .academicearth import AcademicEarthCourseIE from .acast import ( ACastIE, ACastChannelIE, + ACastPlayerIE, ) from .adn import ADNIE from .adobeconnect import AdobeConnectIE diff --git a/haruhi_dl/extractor/pulsembed.py b/haruhi_dl/extractor/pulsembed.py index 7a0a5b239..9627c49c4 100644 --- a/haruhi_dl/extractor/pulsembed.py +++ b/haruhi_dl/extractor/pulsembed.py @@ -23,6 +23,7 @@ from ..utils import ( from .libsyn import LibsynIE from .xnews import XLinkIE from .tvp import TVPEmbedIE +from .acast import ACastPlayerIE class PulseVideoIE(InfoExtractor): @@ -180,6 +181,15 @@ class PulsEmbedIE(InfoExtractor): 'timestamp': 1607174136, 'upload_date': '20201205', }, + }, { + 'url': 'pulsembed:q31qhd1LC', + 'info_dict': { + 'id': '601dc897fb37095537d48e6f', + 'ext': 'mp3', + 'title': 'Maciej Konieczny: "Podatek medialny to bardziej mechanizm kontroli niż podatkowy”', + 'upload_date': '20210208', + 'timestamp': 1612764000, + }, }] @staticmethod @@ -260,6 +270,7 @@ class PulsEmbedIE(InfoExtractor): LibsynIE, XLinkIE, TVPEmbedIE, + ACastPlayerIE, PulseVideoIE, ): embie_urls = embie._extract_urls(webpage, url=referer)