eurozet player extractors (#16)

2020-12-07 03:48:12 +01:00 · 2020-12-07 03:48:12 +01:00 · a6102b5483
parent 24a54d5d52
commit a6102b5483
2 changed files with 201 additions and 0 deletions
--- a/haruhi_dl/extractor/eurozet.py
+++ b/haruhi_dl/extractor/eurozet.py
@ -2,6 +2,14 @@
 from __future__ import unicode_literals

 from .common import InfoExtractor
+from ..utils import (
+    compat_str,
+    ExtractorError,
+    int_or_none,
+    str_or_none,
+    url_or_none,
+)
+import re


 class EurozetArticleIE(InfoExtractor):
@ -49,3 +57,193 @@ class EurozetArticleIE(InfoExtractor):
        })

        return info_dict
+
+
+class EurozetPlayerStreamIE(InfoExtractor):
+    IE_NAME = 'eurozet:player:stream'
+    _VALID_URL = r'https?://player\.(?P<id>radiozet|chillizet|antyradio|meloradio)\.pl/?(?:\?[^#]*)?(?:#.*)?$'
+
+    # this endpoint is on each player.[station].pl domain but it's always THE SAME FUCKING JSON WITH ALL STATIONS
+    _STREAM_LIST = 'https://player.radiozet.pl/api/stations'
+
+    _TESTS = [{
+        'url': 'https://player.antyradio.pl/',
+        'info_dict': {
+            'id': '11662',
+            'ext': 'livx',
+            'title': 'Antyradio',
+        },
+    }, {
+        'url': 'https://player.radiozet.pl/?fbclid=aoeu',
+        'only_matching': True,
+    }, {
+        'url': 'https://player.chillizet.pl/#',
+        'only_matching': True,
+    }, {
+        'url': 'https://player.meloradio.pl',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        station_codename = self._match_id(url)
+        station_list = self._download_json(self._STREAM_LIST, station_codename, 'Downloading station list')
+
+        station = None
+        for f_station in station_list:
+            if f_station.get('station') == station_codename:
+                station = f_station
+                break
+        if not station:
+            raise ExtractorError('Station not found')
+
+        return {
+            'id': str_or_none(station.get('node_id')),
+            'title': station.get('title')[len('Player '):],
+            'url': station['player']['stream'],
+            'is_live': True,
+        }
+
+
+class EurozetPlayerPodcastIE(InfoExtractor):
+    IE_NAME = 'eurozet:player:podcast'
+    _VALID_URL = r'https?://player\.(?P<station>radiozet|chillizet|antyradio|meloradio)\.pl/Podcasty/(?P<series>[^/\s#\?]+/)?(?P<id>[^/\s#\?]+)'
+
+    _PODCAST_LIST_URL_TPL = 'https://player.%(station)s.pl/api/podcasts/getPodcastListByProgram/(node)/%(podcast_node)s/(station)/%(station)s'
+
+    _TESTS = [{
+        'url': 'https://player.meloradio.pl/Podcasty/Horoskop-wrozbity-Macieja',
+        'info_dict': {
+            'id': '14501',
+            'title': 'Horoskop wróżbity Macieja',
+            'description': 'Wróżbita Maciej Skrzątek od poniedziałku do piątku o 9:15 w Meloradiu prezentuje starannie przygotowany horoskop dla wszystkich znaków zodiaku.',
+        },
+        'playlist_mincount': 300,
+    }, {
+        'url': 'https://player.antyradio.pl/Podcasty/Historia-niejednej-piosenki/Imagine-Johna-Lennona-w-zaskakujacej-wersji',
+        'info_dict': {
+            'id': '60358',
+            'ext': 'mp3',
+            'description': 'Tomasz Kasprzyk przedstawia ciekawostki i nieznane historie na temat powstania wielkich rockowych przebojów, ich coverów, autorów i wykonawców.',
+            'upload_date': '20201203',
+            'timestamp': 1606989840,
+            'title': 'Imagine Johna Lennona w zaskakującej wersji',
+        },
+    }, {
+        'url': 'https://player.radiozet.pl/Podcasty/Listy-do-redakcji-Radia-ZET-audycja-nie-do-konca-powazna/',
+        'only_matching': True,
+    }, {
+        'url': 'https://player.chillizet.pl/Podcasty/Tylko-dla-doroslych/Przypadek-Elliota-Page-a-rozmowa-o-transplciowosci-z-Emilia-Wisniewska',
+        'only_matching': True,
+    }]
+
+    def _podcast_to_info_dict(self, podcast_dict, station):
+        return {
+            'id': compat_str(podcast_dict['node_id']),
+            'title': str_or_none(podcast_dict.get('title', '')),
+            'url': url_or_none(podcast_dict['player']['stream']),
+            'duration': int_or_none(podcast_dict['player']['duration']),
+            'timestamp': int_or_none(podcast_dict.get('published_date')),
+            'webpage_url': 'https://player.%s.pl%s' % (station, podcast_dict.get('url')),
+        }
+
+    def _download_podcast_list(self, station, podcast_node, offset=0):
+        list_url = self._PODCAST_LIST_URL_TPL % {'station': station, 'podcast_node': podcast_node}
+        if offset > 0:
+            list_url += '/(offset)/%d' % offset
+        return self._download_json(list_url, podcast_node,
+                                   'Downloading podcast list%s' % (' (page #%d)' % (offset + 1) if offset > 0 else ''))
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        station_codename, series_slug, page_slug = mobj.group('station', 'series', 'id')
+
+        no_playlist = False if not series_slug else self._downloader.params.get('noplaylist', True)
+
+        webpage = self._download_webpage(url, page_slug)
+        podcast_node = self._html_search_regex(r'<div id="player"[^>]+data-program="(\d+)"', webpage, 'podcast list id')
+        podcast_id = self._html_search_regex(r'<div id="player"[^>]+data-id="(\d+)"', webpage, 'podcast id')
+
+        podcast_list = self._download_podcast_list(station_codename, podcast_node)
+
+        program = podcast_list['data'][0]['program']
+        info_dict = {
+            'id': podcast_node,
+            'title': program.get('title', '').strip(),
+            'description': program.get('desc', '').strip(),
+        }
+
+        if no_playlist:
+            for f_podcast in podcast_list['data']:
+                if str_or_none(f_podcast.get('node_id')) == podcast_id:
+                    info_dict.update(self._podcast_to_info_dict(f_podcast, station_codename))
+                    return info_dict
+        podcasts = podcast_list['data']
+
+        if len(podcasts) < podcast_list['info']['number_of_podcasts']:
+            pages = (podcast_list['info']['number_of_podcasts'] - len(podcasts)) / len(podcasts)
+            pages = int(pages) + 2 if int(pages) != pages else int(pages) + 1
+            for page in range(1, pages):
+                podcast_list = self._download_podcast_list(station_codename, podcast_node, offset=page)
+                if no_playlist:
+                    for f_podcast in podcast_list['data']:
+                        if str_or_none(f_podcast.get('node_id')) == podcast_id:
+                            info_dict.update(self._podcast_to_info_dict(f_podcast, station_codename))
+                            return info_dict
+                else:
+                    podcasts.extend(podcast_list['data'])
+
+        if no_playlist:
+            raise ExtractorError('Podcast episode not found')
+
+        info_dict.update({
+            '_type': 'playlist',
+            'entries': [self._podcast_to_info_dict(x, station_codename) for x in podcasts],
+        })
+        return info_dict
+
+
+class EurozetPlayerMusicStreamIE(InfoExtractor):
+    IE_NAME = 'eurozet:player:musicstream'
+    _VALID_URL = r'https?://player\.(?P<station>radiozet|chillizet|antyradio|meloradio)\.pl/Kanaly-muzyczne/(?P<id>[^/\s#\?]+)'
+
+    _TESTS = [{
+        'url': 'https://player.radiozet.pl/Kanaly-muzyczne/Radio-ZET-Party',
+        'info_dict': {
+            'id': '12356',
+            'ext': 'mp3',
+            'title': 'Radio ZET Party',
+            'description': 'Imprezowe klasyki i nowości do dobrej zabawy',
+        },
+    }, {
+        'url': 'https://player.antyradio.pl/Kanaly-muzyczne/Antyradio-Hard',
+        'info_dict': {
+            'id': '13908',
+            'ext': 'mp3',
+            'title': 'Antyradio Hard',
+            'description': 'Muzyka dla fanów ostrych brzmień',
+        },
+    }, {
+        'url': 'https://player.meloradio.pl/Kanaly-muzyczne/Meloradio-Acoustic',
+        'only_matching': True,
+    }, {
+        'url': 'https://player.chillizet.pl/Kanaly-muzyczne/Chillizet-Covers',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        station_codename, page_slug = mobj.group('station', 'id')
+
+        webpage = self._download_webpage(url, page_slug)
+        stream_id = self._html_search_regex(r'<div id="player"[^>]+data-id="(\d+)"', webpage, 'stream id')
+
+        data = self._download_json('https://player.chillizet.pl/api/channels/(channel)/%s' % stream_id, stream_id)[0]
+
+        return {
+            'id': stream_id,
+            'url': data['player']['stream'],
+            'title': data['title'],
+            'alt_title': data.get('short_desc'),
+            'description': data.get('desc'),
+            'is_live': True,
+        }
--- a/haruhi_dl/extractor/extractors.py
+++ b/haruhi_dl/extractor/extractors.py
@ -332,6 +332,9 @@ from .esri import EsriVideoIE
 from .europa import EuropaIE
 from .eurozet import (
    EurozetArticleIE,
+    EurozetPlayerStreamIE,
+    EurozetPlayerPodcastIE,
+    EurozetPlayerMusicStreamIE,
 )
 from .everyonesmixtape import EveryonesMixtapeIE
 from .expotv import ExpoTVIE