From 00fc0dea8cede1f1a71254b655dfcf2cfe7a4aaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=3D=3FUTF-8=3Fq=3FSergey=3D20M=3DE2=3D80=3DA4=3F=3D?= Date: Fri, 26 Feb 2021 14:39:10 +0100 Subject: [PATCH] =?UTF-8?q?[cspan]=20Extract=20info=20from=20jwplayer=20da?= =?UTF-8?q?ta=20(closes=20#3672,=20closes=20#3734,=20=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …closes #10638, closes #13030, closes #18806, closes #23148, closes #24461, closes #26171, closes #26800, closes #27263) --- haruhi_dl/extractor/cspan.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/haruhi_dl/extractor/cspan.py b/haruhi_dl/extractor/cspan.py index 3356cc280..766942146 100644 --- a/haruhi_dl/extractor/cspan.py +++ b/haruhi_dl/extractor/cspan.py @@ -10,6 +10,8 @@ from ..utils import ( find_xpath_attr, get_element_by_class, int_or_none, + js_to_json, + merge_dicts, smuggle_url, unescapeHTML, ) @@ -98,6 +100,26 @@ class CSpanIE(InfoExtractor): bc_attr['data-bcid']) return self.url_result(smuggle_url(bc_url, {'source_url': url})) + def add_referer(formats): + for f in formats: + f.setdefault('http_headers', {})['Referer'] = url + + # As of 01.12.2020 this path looks to cover all cases making the rest + # of the code unnecessary + jwsetup = self._parse_json( + self._search_regex( + r'(?s)jwsetup\s*=\s*({.+?})\s*;', webpage, 'jwsetup', + default='{}'), + video_id, transform_source=js_to_json, fatal=False) + if jwsetup: + info = self._parse_jwplayer_data( + jwsetup, video_id, require_title=False, m3u8_id='hls', + base_url=url) + add_referer(info['formats']) + ld_info = self._search_json_ld(webpage, video_id, default={}) + return merge_dicts(info, ld_info) + + # Obsolete # We first look for clipid, because clipprog always appears before patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')] results = list(filter(None, (re.search(p, webpage) for p in patterns))) @@ -165,8 +187,7 @@ class CSpanIE(InfoExtractor): formats = self._extract_m3u8_formats( path, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }] - for f in formats: - f.setdefault('http_headers', {})['Referer'] = url + add_referer(formats) self._sort_formats(formats) entries.append({ 'id': '%s_%d' % (video_id, partnum + 1),