[generic] embed searching normalization 2/n

merge-requests/5/head
Laura Liberda 2021-01-01 07:05:16 +01:00
parent c73049bc5b
commit d915fe0b0c
45 changed files with 51 additions and 49 deletions

View File

@ -38,7 +38,7 @@ class APAIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return [
mobj.group('url')
for mobj in re.finditer(

View File

@ -82,7 +82,7 @@ class Channel9IE(InfoExtractor):
_RSS_URL = 'http://channel9.msdn.com/%s/RSS'
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return re.findall(
r'<iframe[^>]+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b',
webpage)

View File

@ -41,7 +41,7 @@ class CloudflareStreamIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return [
mobj.group('url')
for mobj in re.finditer(

View File

@ -30,7 +30,7 @@ class DailyMailIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return re.findall(
r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?dailymail\.co\.uk/embed/video/\d+\.html)',
webpage)

View File

@ -191,7 +191,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
xid'''
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
urls = []
# Look for embedded Dailymotion player
# https://developer.dailymotion.com/player#player-parameters

View File

@ -32,7 +32,7 @@ class DBTVIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return [url for _, url in re.findall(
r'<iframe[^>]+src=(["\'])((?:https?:)?//(?:www\.)?dagbladet\.no/video/embed/(?:[0-9A-Za-z_-]{11}|[a-zA-Z0-9]{8}).*?)\1',
webpage)]

View File

@ -36,7 +36,7 @@ class DrTuberIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?drtuber\.com/embed/\d+)',
webpage)

View File

@ -48,7 +48,7 @@ class ExpressenIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return [
mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',

View File

@ -225,7 +225,7 @@ class FacebookIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
urls = []
for mobj in re.finditer(
r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',

View File

@ -59,7 +59,7 @@ class FoxNewsIE(AMPIE):
]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return [
mobj.group('url')
for mobj in re.finditer(

View File

@ -2563,7 +2563,8 @@ class GenericIE(InfoExtractor):
OnNetworkLoaderIE,
):
try:
embie_urls = embie._extract_urls(webpage)
embie_urls = embie._extract_urls(webpage,
url=url)
if embie_urls:
return self.playlist_from_matches(embie_urls, video_id, video_title, ie=embie.ie_key())
except Exception as exc:

View File

@ -49,7 +49,7 @@ class IndavideoEmbedIE(InfoExtractor):
# http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return re.findall(
r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)',
webpage)

View File

@ -41,7 +41,7 @@ class JojIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return [
mobj.group('url')
for mobj in re.finditer(

View File

@ -31,7 +31,7 @@ class JWPlatformIE(InfoExtractor):
return urls[0] if urls else None
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return re.findall(
r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})',
webpage)

View File

@ -89,7 +89,7 @@ class LiveLeakIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return re.findall(
r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
webpage)

View File

@ -50,6 +50,6 @@ class MegaphoneIE(InfoExtractor):
}
@classmethod
def _extract_urls(cls, webpage):
def _extract_urls(cls, webpage, **kwargs):
return [m[0] for m in re.findall(
r'<iframe[^>]*?\ssrc=["\'](%s)' % cls._VALID_URL, webpage)]

View File

@ -67,7 +67,7 @@ class MofosexEmbedIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return re.findall(
r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=\d+)',
webpage)

View File

@ -113,7 +113,7 @@ class NexxIE(InfoExtractor):
return mobj.group('id') if mobj else None
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
# Reference:
# 1. https://nx-s.akamaized.net/files/201510/44.pdf
@ -436,7 +436,7 @@ class NexxEmbedIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
# Reference:
# 1. https://nx-s.akamaized.net/files/201510/44.pdf

View File

@ -24,7 +24,7 @@ class OnNetworkLoaderIE(InfoExtractor):
_VALID_URL = r'''https?://video\.onnetwork\.tv/embed\.php\?(?:mid=(?P<mid>[^&]+))?(?:&?sid=(?P<sid>[^&\s]+))?(?:&?cId=onn-cid-(?P<cid>\d+))?(?:.+)?'''
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
matches = re.finditer(
r'''<script\s+[^>]*src=["'](%s.*?)["']''' % OnNetworkLoaderIE._VALID_URL,
webpage)

View File

@ -72,11 +72,12 @@ class PeerTubeSHIE(SelfhostedInfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
entries = re.findall(
r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//[^/]+/videos/embed/%s)'''
def _extract_urls(webpage, **kwargs):
entries = re.finditer(
r'''(?x)<iframe[^>]+\bsrc=["\'](?:https?:)?//(?P<host>[^/]+)/videos/embed/(?P<video_id>%s)'''
% (PeerTubeSHIE._UUID_RE), webpage)
return entries
return ['peertube:%s:%s' % (mobj.group('host'), mobj.group('video_id'))
for mobj in entries]
def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
return self._download_json(

View File

@ -158,7 +158,7 @@ class PornHubIE(PornHubBaseIE):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net)/embed/[\da-z]+)',
webpage)

View File

@ -37,7 +37,7 @@ class RedTubeIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)',
webpage)

View File

@ -133,7 +133,7 @@ class RutubeIE(RutubeBaseIE):
return False if RutubePlaylistIE.suitable(url) else super(RutubeIE, cls).suitable(url)
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return [mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/embed/[\da-z]{32}.*?)\1',
webpage)]

View File

@ -40,7 +40,7 @@ class SoundcloudEmbedIE(InfoExtractor):
}
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return [m.group('url') for m in re.finditer(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
webpage)]

View File

@ -68,7 +68,7 @@ class SpankwireIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return re.findall(
r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?spankwire\.com/EmbedPlayer\.aspx/?\?.*?\bArticleId=\d+)',
webpage)

View File

@ -46,7 +46,7 @@ class SportBoxIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return re.findall(
r'<iframe[^>]+src="(https?://(?:news\.sportbox|matchtv)\.ru/vdl/player[^"]+)"',
webpage)

View File

@ -49,7 +49,7 @@ class SpringboardPlatformIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return [
mobj.group('url')
for mobj in re.finditer(

View File

@ -198,7 +198,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
}]
@classmethod
def _extract_urls(cls, webpage):
def _extract_urls(cls, webpage, **kwargs):
m = re.search(
r'''(?x)
<meta\s+

View File

@ -197,7 +197,7 @@ class TNAFlixNetworkEmbedIE(TNAFlixNetworkBaseIE):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return [url for _, url in re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.(?:tna|emp)flix\.com/video/\d+)\1',
webpage)]

View File

@ -32,7 +32,7 @@ class Tube8IE(KeezMoviesIE):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return re.findall(
r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)',
webpage)

View File

@ -12,7 +12,7 @@ class TuneInBaseIE(InfoExtractor):
_API_BASE_URL = 'http://tunein.com/tuner/tune/'
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return re.findall(
r'<iframe[^>]+src=["\'](?P<url>(?:https?://)?tunein\.com/embed/player/[pst]\d+)',
webpage)

View File

@ -48,7 +48,7 @@ class TwentyMinutenIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return [m.group('url') for m in re.finditer(
r'<iframe[^>]+src=(["\'])(?P<url>(?:(?:https?:)?//)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1',
webpage)]

View File

@ -107,7 +107,7 @@ class ViceIE(ViceBaseIE, AdobePassIE):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return re.findall(
r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]{24})',
webpage)

View File

@ -55,7 +55,7 @@ class VideaIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return [url for _, url in re.findall(
r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',
webpage)]

View File

@ -39,7 +39,7 @@ class VideoPressIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return re.findall(
r'<iframe[^>]+src=["\']((?:https?://)?videopress\.com/embed/[\da-zA-Z]+)',
webpage)

View File

@ -39,7 +39,7 @@ class ViqeoIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return [
mobj.group('url')
for mobj in re.finditer(

View File

@ -27,7 +27,7 @@ class VShareIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return re.findall(
r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
webpage)

View File

@ -51,7 +51,7 @@ class VzaarIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return re.findall(
r'<iframe[^>]+src=["\']((?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)',
webpage)

View File

@ -29,7 +29,7 @@ class WashingtonPostIE(InfoExtractor):
}
@classmethod
def _extract_urls(cls, webpage):
def _extract_urls(cls, webpage, **kwargs):
return re.findall(
r'<iframe[^>]+\bsrc=["\'](%s)' % cls._EMBED_URL, webpage)

View File

@ -81,7 +81,7 @@ class XFileShareIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return [
mobj.group('url')
for mobj in re.finditer(

View File

@ -325,7 +325,7 @@ class XHamsterEmbedIE(InfoExtractor):
}
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return [url for _, url in re.findall(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1',
webpage)]

View File

@ -34,7 +34,7 @@ class YapFilesIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1'
% YapFilesIE._YAPFILES_URL, webpage)]

View File

@ -65,7 +65,7 @@ class YouPornIE(InfoExtractor):
}]
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return re.findall(
r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)',
webpage)

View File

@ -1252,7 +1252,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'Unable to mark watched', fatal=False)
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
# Embedded YouTube player
entries = [
unescapeHTML(mobj.group('url'))

View File

@ -33,7 +33,7 @@ class ZypeIE(InfoExtractor):
}
@staticmethod
def _extract_urls(webpage):
def _extract_urls(webpage, **kwargs):
return [
mobj.group('url')
for mobj in re.finditer(