a bit more embed searching normalization
This commit is contained in:
parent
8f86520b7a
commit
1d601522cc
|
@ -2561,12 +2561,28 @@ class GenericIE(InfoExtractor):
|
|||
ExpressenIE,
|
||||
ZypeIE,
|
||||
OnNetworkLoaderIE,
|
||||
VimeoIE,
|
||||
SoundcloudEmbedIE,
|
||||
KalturaIE,
|
||||
):
|
||||
try:
|
||||
embie_urls = embie._extract_urls(webpage,
|
||||
url=url)
|
||||
if embie_urls:
|
||||
return self.playlist_from_matches(embie_urls, video_id, video_title, ie=embie.ie_key())
|
||||
entries = []
|
||||
for embie_url in embie_urls:
|
||||
entries.append({
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(unescapeHTML(embie_url), {'source_url': embie_url}),
|
||||
'ie_key': embie.ie_key(),
|
||||
})
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
'id': video_id,
|
||||
'title': video_title,
|
||||
'uploader': video_uploader,
|
||||
}
|
||||
except Exception as exc:
|
||||
self.report_warning('The exception above was caused by: %sIE' % embie.ie_key())
|
||||
raise exc
|
||||
|
@ -2578,10 +2594,6 @@ class GenericIE(InfoExtractor):
|
|||
if matches:
|
||||
return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
|
||||
|
||||
vimeo_urls = VimeoIE._extract_urls(url, webpage)
|
||||
if vimeo_urls:
|
||||
return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
|
||||
|
||||
vid_me_embed_url = self._search_regex(
|
||||
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
|
||||
webpage, 'vid.me embed', default=None)
|
||||
|
@ -2778,11 +2790,6 @@ class GenericIE(InfoExtractor):
|
|||
if myvi_url:
|
||||
return self.url_result(myvi_url)
|
||||
|
||||
# Look for embedded soundcloud player
|
||||
soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage)
|
||||
if soundcloud_urls:
|
||||
return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML)
|
||||
|
||||
# Look for embedded mtvservices player
|
||||
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
|
||||
if mtvservices_url:
|
||||
|
@ -2842,14 +2849,6 @@ class GenericIE(InfoExtractor):
|
|||
if mobj is not None:
|
||||
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||
|
||||
# Look for Kaltura embeds
|
||||
kaltura_urls = KalturaIE._extract_urls(webpage)
|
||||
if kaltura_urls:
|
||||
return self.playlist_from_matches(
|
||||
kaltura_urls, video_id, video_title,
|
||||
getter=lambda x: smuggle_url(x, {'source_url': url}),
|
||||
ie=KalturaIE.ie_key())
|
||||
|
||||
# Look for EaglePlatform embeds
|
||||
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
|
||||
if eagleplatform_url:
|
||||
|
|
|
@ -117,7 +117,7 @@ class KalturaIE(InfoExtractor):
|
|||
return urls[0] if urls else None
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
def _extract_urls(webpage, url=None):
|
||||
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
|
||||
finditer = (
|
||||
re.finditer(
|
||||
|
@ -159,13 +159,15 @@ class KalturaIE(InfoExtractor):
|
|||
for k, v in embed_info.items():
|
||||
if v:
|
||||
embed_info[k] = v.strip()
|
||||
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
|
||||
result_url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
|
||||
escaped_pid = re.escape(embed_info['partner_id'])
|
||||
service_mobj = re.search(
|
||||
r'<script[^>]+src=(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
|
||||
webpage)
|
||||
smug = {'source_url': url}
|
||||
if service_mobj:
|
||||
url = smuggle_url(url, {'service_url': service_mobj.group('id')})
|
||||
smug['service_url'] = service_mobj.group('id')
|
||||
url = smuggle_url(result_url, smug)
|
||||
urls.append(url)
|
||||
return urls
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ from ..utils import (
|
|||
mimetype2ext,
|
||||
str_or_none,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
|
@ -41,7 +42,7 @@ class SoundcloudEmbedIE(InfoExtractor):
|
|||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage, **kwargs):
|
||||
return [m.group('url') for m in re.finditer(
|
||||
return [unescapeHTML(m.group('url')) for m in re.finditer(
|
||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
|
||||
webpage)]
|
||||
|
||||
|
|
|
@ -519,7 +519,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
|||
return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(url, webpage):
|
||||
def _extract_urls(webpage, url=None):
|
||||
urls = []
|
||||
# Look for embedded (iframe) Vimeo player
|
||||
for mobj in re.finditer(
|
||||
|
|
Loading…
Reference in a new issue