a bit more embed searching normalization

This commit is contained in:
Laura Liberda 2021-01-05 20:06:22 +01:00
parent 8f86520b7a
commit 1d601522cc
4 changed files with 25 additions and 23 deletions

View file

@ -2561,12 +2561,28 @@ class GenericIE(InfoExtractor):
ExpressenIE,
ZypeIE,
OnNetworkLoaderIE,
VimeoIE,
SoundcloudEmbedIE,
KalturaIE,
):
try:
embie_urls = embie._extract_urls(webpage,
url=url)
if embie_urls:
return self.playlist_from_matches(embie_urls, video_id, video_title, ie=embie.ie_key())
entries = []
for embie_url in embie_urls:
entries.append({
'_type': 'url_transparent',
'url': smuggle_url(unescapeHTML(embie_url), {'source_url': embie_url}),
'ie_key': embie.ie_key(),
})
return {
'_type': 'playlist',
'entries': entries,
'id': video_id,
'title': video_title,
'uploader': video_uploader,
}
except Exception as exc:
self.report_warning('The exception above was caused by: %sIE' % embie.ie_key())
raise exc
@ -2578,10 +2594,6 @@ class GenericIE(InfoExtractor):
if matches:
return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
vimeo_urls = VimeoIE._extract_urls(url, webpage)
if vimeo_urls:
return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
vid_me_embed_url = self._search_regex(
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
webpage, 'vid.me embed', default=None)
@ -2778,11 +2790,6 @@ class GenericIE(InfoExtractor):
if myvi_url:
return self.url_result(myvi_url)
# Look for embedded soundcloud player
soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage)
if soundcloud_urls:
return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML)
# Look for embedded mtvservices player
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
if mtvservices_url:
@ -2842,14 +2849,6 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'Zapiks')
# Look for Kaltura embeds
kaltura_urls = KalturaIE._extract_urls(webpage)
if kaltura_urls:
return self.playlist_from_matches(
kaltura_urls, video_id, video_title,
getter=lambda x: smuggle_url(x, {'source_url': url}),
ie=KalturaIE.ie_key())
# Look for EaglePlatform embeds
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
if eagleplatform_url:

View file

@ -117,7 +117,7 @@ class KalturaIE(InfoExtractor):
return urls[0] if urls else None
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, url=None):
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
finditer = (
re.finditer(
@ -159,13 +159,15 @@ class KalturaIE(InfoExtractor):
for k, v in embed_info.items():
if v:
embed_info[k] = v.strip()
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
result_url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
escaped_pid = re.escape(embed_info['partner_id'])
service_mobj = re.search(
r'<script[^>]+src=(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
webpage)
smug = {'source_url': url}
if service_mobj:
url = smuggle_url(url, {'service_url': service_mobj.group('id')})
smug['service_url'] = service_mobj.group('id')
url = smuggle_url(result_url, smug)
urls.append(url)
return urls

View file

@ -24,6 +24,7 @@ from ..utils import (
mimetype2ext,
str_or_none,
try_get,
unescapeHTML,
unified_timestamp,
update_url_query,
url_or_none,
@ -41,7 +42,7 @@ class SoundcloudEmbedIE(InfoExtractor):
@staticmethod
def _extract_urls(webpage, **kwargs):
return [m.group('url') for m in re.finditer(
return [unescapeHTML(m.group('url')) for m in re.finditer(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
webpage)]

View file

@ -519,7 +519,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
@staticmethod
def _extract_urls(url, webpage):
def _extract_urls(webpage, url=None):
urls = []
# Look for embedded (iframe) Vimeo player
for mobj in re.finditer(