a bit more embed searching normalization
This commit is contained in:
parent
8f86520b7a
commit
1d601522cc
|
@ -2561,12 +2561,28 @@ class GenericIE(InfoExtractor):
|
||||||
ExpressenIE,
|
ExpressenIE,
|
||||||
ZypeIE,
|
ZypeIE,
|
||||||
OnNetworkLoaderIE,
|
OnNetworkLoaderIE,
|
||||||
|
VimeoIE,
|
||||||
|
SoundcloudEmbedIE,
|
||||||
|
KalturaIE,
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
embie_urls = embie._extract_urls(webpage,
|
embie_urls = embie._extract_urls(webpage,
|
||||||
url=url)
|
url=url)
|
||||||
if embie_urls:
|
if embie_urls:
|
||||||
return self.playlist_from_matches(embie_urls, video_id, video_title, ie=embie.ie_key())
|
entries = []
|
||||||
|
for embie_url in embie_urls:
|
||||||
|
entries.append({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': smuggle_url(unescapeHTML(embie_url), {'source_url': embie_url}),
|
||||||
|
'ie_key': embie.ie_key(),
|
||||||
|
})
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_title,
|
||||||
|
'uploader': video_uploader,
|
||||||
|
}
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.report_warning('The exception above was caused by: %sIE' % embie.ie_key())
|
self.report_warning('The exception above was caused by: %sIE' % embie.ie_key())
|
||||||
raise exc
|
raise exc
|
||||||
|
@ -2578,10 +2594,6 @@ class GenericIE(InfoExtractor):
|
||||||
if matches:
|
if matches:
|
||||||
return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
|
return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
|
||||||
|
|
||||||
vimeo_urls = VimeoIE._extract_urls(url, webpage)
|
|
||||||
if vimeo_urls:
|
|
||||||
return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
|
|
||||||
|
|
||||||
vid_me_embed_url = self._search_regex(
|
vid_me_embed_url = self._search_regex(
|
||||||
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
|
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
|
||||||
webpage, 'vid.me embed', default=None)
|
webpage, 'vid.me embed', default=None)
|
||||||
|
@ -2778,11 +2790,6 @@ class GenericIE(InfoExtractor):
|
||||||
if myvi_url:
|
if myvi_url:
|
||||||
return self.url_result(myvi_url)
|
return self.url_result(myvi_url)
|
||||||
|
|
||||||
# Look for embedded soundcloud player
|
|
||||||
soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage)
|
|
||||||
if soundcloud_urls:
|
|
||||||
return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML)
|
|
||||||
|
|
||||||
# Look for embedded mtvservices player
|
# Look for embedded mtvservices player
|
||||||
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
|
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
|
||||||
if mtvservices_url:
|
if mtvservices_url:
|
||||||
|
@ -2842,14 +2849,6 @@ class GenericIE(InfoExtractor):
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'Zapiks')
|
return self.url_result(mobj.group('url'), 'Zapiks')
|
||||||
|
|
||||||
# Look for Kaltura embeds
|
|
||||||
kaltura_urls = KalturaIE._extract_urls(webpage)
|
|
||||||
if kaltura_urls:
|
|
||||||
return self.playlist_from_matches(
|
|
||||||
kaltura_urls, video_id, video_title,
|
|
||||||
getter=lambda x: smuggle_url(x, {'source_url': url}),
|
|
||||||
ie=KalturaIE.ie_key())
|
|
||||||
|
|
||||||
# Look for EaglePlatform embeds
|
# Look for EaglePlatform embeds
|
||||||
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
|
eagleplatform_url = EaglePlatformIE._extract_url(webpage)
|
||||||
if eagleplatform_url:
|
if eagleplatform_url:
|
||||||
|
|
|
@ -117,7 +117,7 @@ class KalturaIE(InfoExtractor):
|
||||||
return urls[0] if urls else None
|
return urls[0] if urls else None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage):
|
def _extract_urls(webpage, url=None):
|
||||||
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
|
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
|
||||||
finditer = (
|
finditer = (
|
||||||
re.finditer(
|
re.finditer(
|
||||||
|
@ -159,13 +159,15 @@ class KalturaIE(InfoExtractor):
|
||||||
for k, v in embed_info.items():
|
for k, v in embed_info.items():
|
||||||
if v:
|
if v:
|
||||||
embed_info[k] = v.strip()
|
embed_info[k] = v.strip()
|
||||||
url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
|
result_url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
|
||||||
escaped_pid = re.escape(embed_info['partner_id'])
|
escaped_pid = re.escape(embed_info['partner_id'])
|
||||||
service_mobj = re.search(
|
service_mobj = re.search(
|
||||||
r'<script[^>]+src=(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
|
r'<script[^>]+src=(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
|
||||||
webpage)
|
webpage)
|
||||||
|
smug = {'source_url': url}
|
||||||
if service_mobj:
|
if service_mobj:
|
||||||
url = smuggle_url(url, {'service_url': service_mobj.group('id')})
|
smug['service_url'] = service_mobj.group('id')
|
||||||
|
url = smuggle_url(result_url, smug)
|
||||||
urls.append(url)
|
urls.append(url)
|
||||||
return urls
|
return urls
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@ from ..utils import (
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
|
unescapeHTML,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
@ -41,7 +42,7 @@ class SoundcloudEmbedIE(InfoExtractor):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage, **kwargs):
|
def _extract_urls(webpage, **kwargs):
|
||||||
return [m.group('url') for m in re.finditer(
|
return [unescapeHTML(m.group('url')) for m in re.finditer(
|
||||||
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
|
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
|
||||||
webpage)]
|
webpage)]
|
||||||
|
|
||||||
|
|
|
@ -519,7 +519,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
|
||||||
return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
|
return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(url, webpage):
|
def _extract_urls(webpage, url=None):
|
||||||
urls = []
|
urls = []
|
||||||
# Look for embedded (iframe) Vimeo player
|
# Look for embedded (iframe) Vimeo player
|
||||||
for mobj in re.finditer(
|
for mobj in re.finditer(
|
||||||
|
|
Loading…
Reference in a new issue