[sejm.pl] support live streams

This commit is contained in:
Lauren Liberda 2021-04-14 14:01:54 +02:00
parent 44ed85b18b
commit 12a935cf42
2 changed files with 28 additions and 9 deletions

View file

@ -1109,7 +1109,7 @@ from .scte import (
) )
from .seeker import SeekerIE from .seeker import SeekerIE
from .sejmpl import ( from .sejmpl import (
SejmPlArchivalIE, SejmPlIE,
SejmPlVideoIE, SejmPlVideoIE,
) )
from .senateisvp import SenateISVPIE from .senateisvp import SenateISVPIE

View file

@ -3,7 +3,9 @@
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
int_or_none,
js_to_json, js_to_json,
try_get,
) )
import datetime import datetime
@ -11,9 +13,9 @@ import re
from urllib.parse import parse_qs from urllib.parse import parse_qs
class SejmPlArchivalIE(InfoExtractor): class SejmPlIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?sejm\.gov\.pl/Sejm(?P<term>\d+)\.nsf/transmisje_arch\.xsp(?:\?(?:[^&\s]+(?:&[^&\s]+)*)?)?(?:#|unid=)(?P<id>[\dA-F]+)' _VALID_URL = r'https?://(?:www\.)?sejm\.gov\.pl/Sejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp(?:\?(?:[^&\s]+(?:&[^&\s]+)*)?)?(?:#|unid=)(?P<id>[\dA-F]+)'
IE_NAME = 'sejm.pl:archival' IE_NAME = 'sejm.gov.pl'
_TESTS = [{ _TESTS = [{
# multiple cameras, PJM translator # multiple cameras, PJM translator
@ -23,6 +25,10 @@ class SejmPlArchivalIE(InfoExtractor):
'title': '11. posiedzenie Sejmu IX kadencji', 'title': '11. posiedzenie Sejmu IX kadencji',
}, },
'playlist_count': 10, 'playlist_count': 10,
}, {
# live stream
'url': 'https://www.sejm.gov.pl/Sejm9.nsf/transmisje.xsp?unid=DF7D229E316BBC5AC12586A8003E90AC#',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -33,6 +39,7 @@ class SejmPlArchivalIE(InfoExtractor):
video_id, headers={ video_id, headers={
'Referer': 'https://www.sejm.gov.pl/Sejm%s.nsf/transmisje_arch.xsp' % (term), 'Referer': 'https://www.sejm.gov.pl/Sejm%s.nsf/transmisje_arch.xsp' % (term),
}) })
# despite it says "transmisje_arch", it works for live streams too!
data = self._download_json( data = self._download_json(
'https://www.sejm.gov.pl/Sejm%s.nsf/transmisje_arch.xsp/json/%s' % (term, video_id), 'https://www.sejm.gov.pl/Sejm%s.nsf/transmisje_arch.xsp/json/%s' % (term, video_id),
video_id, headers={ video_id, headers={
@ -48,16 +55,22 @@ class SejmPlArchivalIE(InfoExtractor):
return int(date.timestamp() * 1000) return int(date.timestamp() * 1000)
start_time = iso_date_to_wtf_atende_wants(params['start']) start_time = iso_date_to_wtf_atende_wants(params['start'])
stop_time = iso_date_to_wtf_atende_wants(params['stop']) if 'transmisje_arch.xsp' in url:
stop_time = iso_date_to_wtf_atende_wants(params['stop'])
else:
stop_time = None
duration = stop_time - start_time duration = (stop_time - start_time) if stop_time else None
entries = [] entries = []
def add_entry(file): def add_entry(file):
if not file: if not file:
return return
file = 'https:%s?startTime=%d&stopTime=%d' % (file, start_time, stop_time) file = 'https:%s?startTime=%d' % (file, start_time)
# live streams don't use stopTime
if stop_time:
file += '&stopTime=%d' % stop_time
stream_id = self._search_regex(r'/o2/sejm/([^/]+)/[^./]+\.livx', file, 'stream id') stream_id = self._search_regex(r'/o2/sejm/([^/]+)/[^./]+\.livx', file, 'stream id')
entries.append({ entries.append({
'_type': 'url_transparent', '_type': 'url_transparent',
@ -84,6 +97,7 @@ class SejmPlArchivalIE(InfoExtractor):
'title': data['title'], 'title': data['title'],
'description': clean_html(data['desc']), 'description': clean_html(data['desc']),
'duration': duration, 'duration': duration,
'is_live': 'transmisje.xsp' in url,
} }
@ -105,9 +119,12 @@ class SejmPlVideoIE(InfoExtractor):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
house, camera, filename, qs = mobj.group('house', 'id', 'filename', 'qs') house, camera, filename, qs = mobj.group('house', 'id', 'filename', 'qs')
qs = parse_qs(qs) qs = parse_qs(qs)
start_time, stop_time = int(qs["startTime"][0]), int(qs["stopTime"][0]) start_time = int(qs["startTime"][0])
stop_time = int_or_none(try_get(qs, lambda x: x["stopTime"][0]))
file = f'https://r.dcs.redcdn.pl/%s/o2/{house}/{camera}/{filename}.livx?startTime={start_time}&stopTime={stop_time}' file = f'https://r.dcs.redcdn.pl/%s/o2/{house}/{camera}/{filename}.livx?startTime={start_time}'
if stop_time:
file += f'&stopTime={stop_time}'
file_index = file + '&indexMode=true' file_index = file + '&indexMode=true'
# sejm videos don't have an id, just a camera (pov) id and time range # sejm videos don't have an id, just a camera (pov) id and time range
@ -134,4 +151,6 @@ class SejmPlVideoIE(InfoExtractor):
'title': camera, 'title': camera,
'formats': formats, 'formats': formats,
'duration': duration, 'duration': duration,
# if there's no stop, it's live
'is_live': stop_time is None,
} }