From 1643b0b49081e285e696574ca44ce807f4271786 Mon Sep 17 00:00:00 2001 From: Ben Rog-Wilhelm Date: Mon, 31 May 2021 23:19:55 +0200 Subject: [PATCH] [kaltura] Improve iframe extraction (#28969) Co-authored-by: Sergey M. --- haruhi_dl/extractor/gdcvault.py | 15 +++++++++++++++ haruhi_dl/extractor/kaltura.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/haruhi_dl/extractor/gdcvault.py b/haruhi_dl/extractor/gdcvault.py index 2f555c1d4..5ad40ee23 100644 --- a/haruhi_dl/extractor/gdcvault.py +++ b/haruhi_dl/extractor/gdcvault.py @@ -102,6 +102,21 @@ class GDCVaultIE(InfoExtractor): 'format': 'mp4-408', }, }, + { + # Kaltura embed, whitespace between quote and embedded URL in iframe's src + 'url': 'https://www.gdcvault.com/play/1025699', + 'info_dict': { + 'id': '0_zagynv0a', + 'ext': 'mp4', + 'title': 'Tech Toolbox', + 'upload_date': '20190408', + 'uploader_id': 'joe@blazestreaming.com', + 'timestamp': 1554764629, + }, + 'params': { + 'skip_download': True, + }, + }, ] def _login(self, webpage_url, display_id): diff --git a/haruhi_dl/extractor/kaltura.py b/haruhi_dl/extractor/kaltura.py index 5e46c418b..da9405a93 100644 --- a/haruhi_dl/extractor/kaltura.py +++ b/haruhi_dl/extractor/kaltura.py @@ -145,7 +145,7 @@ class KalturaIE(InfoExtractor): ''', webpage)) or list(re.finditer( r'''(?xs) - <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P["']) + <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P["'])\s* (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P\d+) (?:(?!(?P=q1)).)* [?&;]entry_id=(?P(?:(?!(?P=q1))[^&])+)