+ castos extractors

This commit is contained in:
Lauren Liberda 2021-04-13 00:17:17 +02:00
parent 2bd0f6069a
commit 44ed85b18b
3 changed files with 119 additions and 0 deletions

View file

@ -0,0 +1,91 @@
# coding: utf-8
from .common import InfoExtractor
from ..utils import (
parse_duration,
)
import re
class CastosHostedIE(InfoExtractor):
_VALID_URL = r'https?://[^/.]+\.castos\.com/(?:player|episodes)/(?P<id>[\da-zA-Z-]+)'
IE_NAME = 'castos:hosted'
_TESTS = [{
'url': 'https://audience.castos.com/player/408278',
'info_dict': {
'id': '408278',
'ext': 'mp3',
},
}, {
'url': 'https://audience.castos.com/episodes/improve-your-podcast-production',
'only_matching': True,
}]
@staticmethod
def _extract_urls(webpage, **kw):
return [mobj.group(1) for mobj
in re.finditer(
r'<iframe\b[^>]+(?<!-)src="(https?://[^/.]+\.castos\.com/player/\d+)',
webpage)]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
series = self._html_search_regex(
r'<div class="show">\s+<strong>([^<]+)</strong>', webpage, 'series name')
title = self._html_search_regex(
r'<div class="episode-title">([^<]+)</div>', webpage, 'episode title')
audio_url = self._html_search_regex(
r'<audio class="clip">\s+<source\b[^>]+src="(https?://[^"]+)"', webpage, 'audio url')
duration = parse_duration(self._search_regex(
r'<time id="duration">(\d\d(?::\d\d)+)</time>', webpage, 'duration'))
return {
'id': video_id,
'title': title,
'url': audio_url,
'duration': duration,
'series': series,
'episode': title,
}
class CastosSSPIE(InfoExtractor):
@classmethod
def _extract_entries(self, webpage, **kw):
entries = []
for found in re.finditer(
r'(?s)<div class="castos-player[^"]*"[^>]*data-episode="(\d+)-[a-z\d]+">(.+?</nav>)\s*</div>',
webpage):
video_id, entry = found.group(1, 2)
def search_entry(regex):
res = re.search(regex, entry)
if res:
return res.group(1)
series = search_entry(r'<div class="show">\s+<strong>([^<]+)</strong>')
title = search_entry(r'<div class="episode-title">([^<]+)</div>')
audio_url = search_entry(
r'<audio class="clip[^"]*">\s+<source\b[^>]+src="(https?://[^"]+)"')
duration = parse_duration(
search_entry(r'<time id="duration[^"]*">(\d\d(?::\d\d)+)</time>'))
if not title or not audio_url:
continue
entries.append({
'id': video_id,
'title': title,
'url': audio_url,
'duration': duration,
'series': series,
'episode': title,
})
return entries

View file

@ -183,6 +183,7 @@ from .carambatv import (
CarambaTVPageIE, CarambaTVPageIE,
) )
from .cartoonnetwork import CartoonNetworkIE from .cartoonnetwork import CartoonNetworkIE
from .castos import CastosHostedIE
from .cbc import ( from .cbc import (
CBCIE, CBCIE,
CBCPlayerIE, CBCPlayerIE,

View file

@ -137,6 +137,10 @@ from .arcpublishing import ArcPublishingIE
from .medialaan import MedialaanIE from .medialaan import MedialaanIE
from .simplecast import SimplecastIE from .simplecast import SimplecastIE
from .spreaker import SpreakerIE from .spreaker import SpreakerIE
from .castos import (
CastosHostedIE,
CastosSSPIE,
)
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -2316,6 +2320,24 @@ class GenericIE(InfoExtractor):
'timestamp': 1617024666, 'timestamp': 1617024666,
}, },
}, },
{
# Castos (hosted) player
'url': 'https://castos.com/enhanced-podcast-player/',
'info_dict': {
'id': '210448',
'ext': 'mp3',
'title': '4 Ways To Create A Video Podcast (And Why You Should Try It)',
},
},
{
# Castos Super Simple Podcasting (WordPress plugin, selfhosted)
'url': 'https://pzbn.pl/4-heated-terf-moment/',
'info_dict': {
'id': '38',
'ext': 'mp3',
'title': '#4: Heated TERF moment',
},
},
] ]
def report_following_redirect(self, new_url): def report_following_redirect(self, new_url):
@ -2755,6 +2777,7 @@ class GenericIE(InfoExtractor):
MedialaanIE, MedialaanIE,
SimplecastIE, SimplecastIE,
SpreakerIE, SpreakerIE,
CastosHostedIE,
): ):
try: try:
ie_key = embie.ie_key() ie_key = embie.ie_key()
@ -3217,6 +3240,10 @@ class GenericIE(InfoExtractor):
if pulsembed_entries: if pulsembed_entries:
return self.playlist_result(pulsembed_entries, video_id, video_title) return self.playlist_result(pulsembed_entries, video_id, video_title)
castos_ssp_entries = CastosSSPIE._extract_entries(webpage)
if castos_ssp_entries:
return self.playlist_result(castos_ssp_entries, video_id, video_title)
# Look for HTML5 media # Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries: if entries: