From 85b2a459dd251da8071b355fdbd86579164fdad3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=3D=3FUTF-8=3Fq=3FSergey=3D20M=3DE2=3D80=3DA4=3F=3D?= Date: Fri, 26 Feb 2021 15:11:39 +0100 Subject: [PATCH] =?UTF-8?q?[wdr]=20Extent=20subtitles=20extraction=20and?= =?UTF-8?q?=20improve=20overall=20extraction=20(clo=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …ses #22672, closes #22723) --- haruhi_dl/extractor/wdr.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/haruhi_dl/extractor/wdr.py b/haruhi_dl/extractor/wdr.py index ba97d983b..2903d189e 100644 --- a/haruhi_dl/extractor/wdr.py +++ b/haruhi_dl/extractor/wdr.py @@ -17,6 +17,7 @@ from ..utils import ( unified_strdate, update_url_query, urlhandle_detect_ext, + url_or_none, ) @@ -42,16 +43,20 @@ class WDRIE(InfoExtractor): is_live = metadata.get('mediaType') == 'live' tracker_data = metadata['trackerData'] + title = tracker_data['trackerClipTitle'] + media_resource = metadata['mediaResource'] formats = [] # check if the metadata contains a direct URL to a file - for kind, media_resource in media_resource.items(): + for kind, media in media_resource.items(): + if not isinstance(media, dict): + continue if kind not in ('dflt', 'alt'): continue - for tag_name, medium_url in media_resource.items(): + for tag_name, medium_url in media.items(): if tag_name not in ('videoURL', 'audioURL'): continue @@ -88,8 +93,16 @@ class WDRIE(InfoExtractor): 'url': caption_url, 'ext': 'ttml', }] - - title = tracker_data['trackerClipTitle'] + captions_hash = media_resource.get('captionsHash') + if isinstance(captions_hash, dict): + for ext, format_url in captions_hash.items(): + format_url = url_or_none(format_url) + if not format_url: + continue + subtitles.setdefault('de', []).append({ + 'url': format_url, + 'ext': determine_ext(format_url, None) or ext, + }) return { 'id': tracker_data.get('trackerClipId', video_id),