diff --git a/haruhi_dl/extractor/extractors.py b/haruhi_dl/extractor/extractors.py index 1c7fa90eb..b6d3a6c82 100644 --- a/haruhi_dl/extractor/extractors.py +++ b/haruhi_dl/extractor/extractors.py @@ -1107,6 +1107,7 @@ from .scte import ( SCTECourseIE, ) from .seeker import SeekerIE +from .sejmpl import SejmPlArchivalIE from .senateisvp import SenateISVPIE from .sendtonews import SendtoNewsIE from .servus import ServusIE diff --git a/haruhi_dl/extractor/sejmpl.py b/haruhi_dl/extractor/sejmpl.py new file mode 100644 index 000000000..31e3d766c --- /dev/null +++ b/haruhi_dl/extractor/sejmpl.py @@ -0,0 +1,61 @@ +# coding: utf-8 + +from .common import InfoExtractor +from ..utils import ( + clean_html, +) + +import datetime +import re + + +class SejmPlArchivalIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?sejm\.gov\.pl/Sejm(?P\d+)\.nsf/transmisje_arch\.xsp(?:\?(?:[^&\s]+(?:&[^&\s]+)*)?)?(?:#|unid=)(?P[\dA-F]+)' + IE_NAME = 'sejm.pl:archival' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + term, video_id = mobj.group('term', 'id') + data = self._download_json( + 'https://www.sejm.gov.pl/Sejm%s.nsf/transmisje_arch.xsp/json/%s' % (term, video_id), + video_id, headers={ + 'Referer': 'https://www.sejm.gov.pl/Sejm%s.nsf/transmisje_arch.xsp' % (term), + }) + params = data['params'] + + def iso_date_to_wtf_atende_wants(date): + date = datetime.datetime.fromisoformat(date) + # atende uses timestamp but since 2001 instead of 1970 + date = date.replace(year=date.year - 31) + # also it's in milliseconds + return int(date.timestamp() * 1000) + + start_time = iso_date_to_wtf_atende_wants(params['start']) + stop_time = iso_date_to_wtf_atende_wants(params['stop']) + time_query = 'startTime=%d&stopTime=%d' % (start_time, stop_time) + + file = params['file'].replace('http:', 'https:') + formats = [{ + 'url': file + '?' + time_query, + 'ext': 'flv', + 'format_id': 'direct-0', + 'preference': -1, # VERY slow to download (~200 KiB/s, compared to ~10-15 MiB/s by DASH/HLS) + }] + formats.extend(self._extract_mpd_formats( + file.replace('/nvr/', '/livedash/') + '?indexMode=true&' + time_query, + video_id, mpd_id='dash')) + formats.extend(self._extract_m3u8_formats( + file.replace('/nvr/', '/livehls/') + '/playlist.m3u8?indexMode=true&' + time_query, + video_id, m3u8_id='hls')) + + self._sort_formats(formats) + + duration = stop_time - start_time + + return { + 'formats': formats, + 'id': video_id, + 'title': data['title'], + 'description': clean_html(data['desc']), + 'duration': duration, + }