[generic] normalizing embeds part 2137
This commit is contained in:
parent
b1c1d64de0
commit
a3816f69be
|
@ -363,6 +363,16 @@ class DailymotionPlaylistIE(DailymotionPlaylistBaseIE):
|
|||
}]
|
||||
_OBJECT_TYPE = 'collection'
|
||||
|
||||
def _extract_urls(webpage, url=None):
|
||||
m = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
|
||||
if m:
|
||||
playlists = re.findall(
|
||||
r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
|
||||
if playlists:
|
||||
return ['//dailymotion.com/playlist/%s' % p for p in playlists]
|
||||
return []
|
||||
|
||||
|
||||
class DailymotionUserIE(DailymotionPlaylistBaseIE):
|
||||
IE_NAME = 'dailymotion:user'
|
||||
|
|
|
@ -121,6 +121,7 @@ from .odnoklassniki import OdnoklassnikiIE
|
|||
from .kinja import KinjaEmbedIE
|
||||
from .onnetwork import OnNetworkLoaderIE
|
||||
from .embetty import EmbettyIE
|
||||
from .rtlnl import RtlNlIE
|
||||
|
||||
|
||||
class GenericIE(InfoExtractor):
|
||||
|
@ -2582,18 +2583,28 @@ class GenericIE(InfoExtractor):
|
|||
VimeoIE,
|
||||
SoundcloudEmbedIE,
|
||||
KalturaIE,
|
||||
RtlNlIE,
|
||||
TeachableIE, # must be before Wistia
|
||||
WistiaIE,
|
||||
SVTIE,
|
||||
):
|
||||
try:
|
||||
ie_key = embie.ie_key()
|
||||
embie_urls = embie._extract_urls(webpage,
|
||||
url=url)
|
||||
if embie_urls:
|
||||
entries = []
|
||||
for embie_url in embie_urls:
|
||||
entries.append({
|
||||
entry = {
|
||||
'_type': 'url_transparent',
|
||||
'url': smuggle_url(unescapeHTML(embie_url), {'source_url': embie_url}),
|
||||
'ie_key': embie.ie_key(),
|
||||
})
|
||||
'url': embie_url,
|
||||
'ie_key': ie_key,
|
||||
}
|
||||
if ie_key in ("Wistia", ):
|
||||
entries["uploader"] = video_uploader
|
||||
if ie_key in ("Bandcamp", ):
|
||||
entry["ie_key"] = None
|
||||
entries.append(entry)
|
||||
return {
|
||||
'_type': 'playlist',
|
||||
'entries': entries,
|
||||
|
@ -2605,50 +2616,6 @@ class GenericIE(InfoExtractor):
|
|||
self.report_warning('The exception above was caused by: %sIE' % embie.ie_key())
|
||||
raise exc
|
||||
|
||||
# Look for embedded rtl.nl player
|
||||
matches = re.findall(
|
||||
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
|
||||
webpage)
|
||||
if matches:
|
||||
return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
|
||||
|
||||
vid_me_embed_url = self._search_regex(
|
||||
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
|
||||
webpage, 'vid.me embed', default=None)
|
||||
if vid_me_embed_url is not None:
|
||||
return self.url_result(vid_me_embed_url, 'Vidme')
|
||||
|
||||
# Look for embedded Dailymotion playlist player (#3822)
|
||||
m = re.search(
|
||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
|
||||
if m:
|
||||
playlists = re.findall(
|
||||
r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
|
||||
if playlists:
|
||||
return self.playlist_from_matches(
|
||||
playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
|
||||
|
||||
# Look for Teachable embeds, must be before Wistia
|
||||
teachable_url = TeachableIE._extract_url(webpage, url)
|
||||
if teachable_url:
|
||||
return self.url_result(teachable_url)
|
||||
|
||||
# Look for embedded Wistia player
|
||||
wistia_urls = WistiaIE._extract_urls(webpage)
|
||||
if wistia_urls:
|
||||
playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
|
||||
for entry in playlist['entries']:
|
||||
entry.update({
|
||||
'_type': 'url_transparent',
|
||||
'uploader': video_uploader,
|
||||
})
|
||||
return playlist
|
||||
|
||||
# Look for SVT player
|
||||
svt_url = SVTIE._extract_url(webpage)
|
||||
if svt_url:
|
||||
return self.url_result(svt_url, 'SVT')
|
||||
|
||||
# Look for Bandcamp pages with custom domain
|
||||
mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
|
||||
if mobj is not None:
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
|
@ -98,6 +100,12 @@ class RtlNlIE(InfoExtractor):
|
|||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage, url=None):
|
||||
return re.findall(
|
||||
r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
uuid = self._match_id(url)
|
||||
info = self._download_json(
|
||||
|
|
|
@ -108,11 +108,10 @@ class SVTIE(SVTBaseIE):
|
|||
}
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage):
|
||||
mobj = re.search(
|
||||
def _extract_urls(webpage, url=None):
|
||||
mobj = re.finditer(
|
||||
r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE._VALID_URL, webpage)
|
||||
if mobj:
|
||||
return mobj.group('url')
|
||||
return [match.group('url') for match in mobj]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
|
|
@ -144,11 +144,11 @@ class TeachableIE(TeachableBaseIE):
|
|||
webpage)
|
||||
|
||||
@staticmethod
|
||||
def _extract_url(webpage, source_url):
|
||||
def _extract_urls(webpage, url=None):
|
||||
if not TeachableIE._is_teachable(webpage):
|
||||
return
|
||||
if re.match(r'https?://[^/]+/(?:courses|p)', source_url):
|
||||
return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url)
|
||||
return []
|
||||
if re.match(r'https?://[^/]+/(?:courses|p)', url):
|
||||
return ['%s%s' % (TeachableBaseIE._URL_PREFIX, url)]
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_HTTPError
|
||||
|
@ -132,6 +133,12 @@ class VidmeIE(InfoExtractor):
|
|||
},
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage, url=None):
|
||||
return re.findall(
|
||||
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
|
||||
webpage)
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
|
|
|
@ -49,7 +49,7 @@ class WistiaIE(InfoExtractor):
|
|||
return urls[0] if urls else None
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
def _extract_urls(webpage, url=None):
|
||||
urls = []
|
||||
for match in re.finditer(
|
||||
r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
|
||||
|
|
Loading…
Reference in a new issue