From 44ed85b18b4435ec1822732a3f40cf2b0fa43c3d Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Tue, 13 Apr 2021 00:17:17 +0200 Subject: [PATCH] + castos extractors --- haruhi_dl/extractor/castos.py | 91 +++++++++++++++++++++++++++++++ haruhi_dl/extractor/extractors.py | 1 + haruhi_dl/extractor/generic.py | 27 +++++++++ 3 files changed, 119 insertions(+) create mode 100644 haruhi_dl/extractor/castos.py diff --git a/haruhi_dl/extractor/castos.py b/haruhi_dl/extractor/castos.py new file mode 100644 index 000000000..774e4a148 --- /dev/null +++ b/haruhi_dl/extractor/castos.py @@ -0,0 +1,91 @@ +# coding: utf-8 + +from .common import InfoExtractor +from ..utils import ( + parse_duration, +) + +import re + + +class CastosHostedIE(InfoExtractor): + _VALID_URL = r'https?://[^/.]+\.castos\.com/(?:player|episodes)/(?P[\da-zA-Z-]+)' + IE_NAME = 'castos:hosted' + + _TESTS = [{ + 'url': 'https://audience.castos.com/player/408278', + 'info_dict': { + 'id': '408278', + 'ext': 'mp3', + }, + }, { + 'url': 'https://audience.castos.com/episodes/improve-your-podcast-production', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage, **kw): + return [mobj.group(1) for mobj + in re.finditer( + r']+(?\s+([^<]+)', webpage, 'series name') + title = self._html_search_regex( + r'
([^<]+)
', webpage, 'episode title') + + audio_url = self._html_search_regex( + r'