selfhosted extractors, peertube extractor reworked (#10)

This commit is contained in:
Laura Liberda 2020-12-09 21:52:30 +01:00
parent 005b3fbedd
commit 889005bab3
8 changed files with 150 additions and 437 deletions

View file

@ -1,4 +1,5 @@
# coding: utf-8
# flake8: noqa
from __future__ import unicode_literals
import re
@ -17,3 +18,17 @@ class LazyLoadExtractor(object):
instance = real_cls.__new__(real_cls)
instance.__init__(*args, **kwargs)
return instance
# suitable() inserts below
{}
class LazyLoadSearchExtractor(LazyLoadExtractor):
pass
class LazyLoadSelfhostedExtractor(LazyLoadExtractor):
# suitable_selfhosted() inserts below
{}

View file

@ -15,14 +15,15 @@ if os.path.exists(lazy_extractors_filename):
os.remove(lazy_extractors_filename)
from haruhi_dl.extractor import _ALL_CLASSES
from haruhi_dl.extractor.common import InfoExtractor, SearchInfoExtractor
from haruhi_dl.extractor.common import InfoExtractor, SearchInfoExtractor, SelfhostedInfoExtractor
with open('devscripts/lazy_load_template.py', 'rt') as f:
module_template = f.read()
module_contents = [
module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
module_template.format(getsource(InfoExtractor.suitable),
getsource(SelfhostedInfoExtractor.suitable_selfhosted)),
]
ie_template = '''
class {name}({bases}):
@ -30,6 +31,12 @@ class {name}({bases}):
_module = '{module}'
'''
sh_additions_template = '''
_SH_VALID_URL = {sh_valid_url!r}
_SH_VALID_CONTENT_STRINGS = {sh_valid_content_strings!r}
_SH_VALID_CONTENT_REGEXES = {sh_valid_content_regexes!r}
'''
make_valid_template = '''
@classmethod
def _make_valid_url(cls):
@ -42,6 +49,8 @@ def get_base_name(base):
return 'LazyLoadExtractor'
elif base is SearchInfoExtractor:
return 'LazyLoadSearchExtractor'
elif base is SelfhostedInfoExtractor:
return 'LazyLoadSelfhostedExtractor'
else:
return base.__name__
@ -53,6 +62,13 @@ def build_lazy_ie(ie, name):
bases=', '.join(map(get_base_name, ie.__bases__)),
valid_url=valid_url,
module=ie.__module__)
if ie._SELFHOSTED is True:
s += sh_additions_template.format(
sh_valid_url=ie._SH_VALID_URL,
sh_valid_content_strings=ie._SH_VALID_CONTENT_STRINGS,
sh_valid_content_regexes=ie._SH_VALID_CONTENT_REGEXES)
if ie.suitable_selfhosted.__func__ is not SelfhostedInfoExtractor.suitable_selfhosted.__func__:
s += '\n' + getsource(ie.suitable_selfhosted)
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
s += '\n' + getsource(ie.suitable)
if hasattr(ie, '_make_valid_url'):
@ -84,15 +100,19 @@ while classes:
ordered_cls.append(_ALL_CLASSES[-1])
names = []
sh_names = []
for ie in ordered_cls:
name = ie.__name__
src = build_lazy_ie(ie, name)
module_contents.append(src)
if ie in _ALL_CLASSES:
names.append(name)
if ie._SELFHOSTED is True:
sh_names.append(name)
module_contents.append(
'_ALL_CLASSES = [{0}]'.format(', '.join(names)))
module_contents.extend((
'\n_ALL_CLASSES = [{0}]'.format(', '.join(names)),
'\n_SH_CLASSES = [{0}]'.format(', '.join(sh_names))))
module_src = '\n'.join(module_contents) + '\n'

View file

@ -2,7 +2,7 @@ from __future__ import unicode_literals
try:
from .lazy_extractors import *
from .lazy_extractors import _ALL_CLASSES
from .lazy_extractors import _ALL_CLASSES, _SH_CLASSES
_LAZY_LOADER = True
except ImportError:
_LAZY_LOADER = False
@ -14,6 +14,11 @@ except ImportError:
if name.endswith('IE') and name != 'GenericIE'
]
_ALL_CLASSES.append(GenericIE)
_SH_CLASSES = [
klass
for klass in _ALL_CLASSES
if klass._SELFHOSTED is True
]
def gen_extractor_classes():

View file

@ -394,6 +394,7 @@ class InfoExtractor(object):
_GEO_COUNTRIES = None
_GEO_IP_BLOCKS = None
_WORKING = True
_SELFHOSTED = False
def __init__(self, downloader=None):
"""Constructor. Receives an optional downloader."""
@ -3020,3 +3021,73 @@ class SearchInfoExtractor(InfoExtractor):
@property
def SEARCH_KEY(self):
return self._SEARCH_KEY
class SelfhostedInfoExtractor(InfoExtractor):
"""Selfhosted Information Extractor class.
Selfhosted info extractors are for the services,
that cannot be handled by just listing all of their domains.
Mostly related to free and open source software,
which everyone is allowed to host on their own servers
(like PeerTube, Funkwhale, Mastodon, Nextcloud, and lots of others).
The _VALID_URL value should not match URLs, but it surely can
match the extractor-specific ID pointer string
(f.e. Mastodon extractor can match "mastodon:donotsta.re:9xN1v6yM7WhzE7aIIC",
but not "https://donotsta.re/notice/9xN1v6yM7WhzE7aIIC").
https://git.sakamoto.pl/laudom/haruhi-dl/-/issues/10
"""
_SELFHOSTED = True
"""Regular expression that matches the actual URLs, or None if should not be checked"""
_SH_VALID_URL = None
"""An iterable of strings, of which *any* should be contained in the webpage contents, or None if should not be checked"""
_SH_VALID_CONTENT_STRINGS = None
"""An iterable of regular expression strings, of which *any* should match the webpage contents, or None if should not be checked"""
_SH_VALID_CONTENT_REGEXES = None
@property
def IE_NAME(self):
return compat_str(type(self).__name__[:-4])
@classmethod
def suitable_selfhosted(cls, url, webpage):
"""Receives a URL and webpage contents, and returns True if suitable for this IE."""
if cls._SH_VALID_URL:
if '_SH_VALID_URL_RE' not in cls.__dict__:
cls._SH_VALID_URL_RE = re.compile(cls._SH_VALID_URL)
if cls._SH_VALID_URL_RE.match(url) is None:
return False
if webpage is None:
# if no webpage, assume just matching the URL is fine
if cls._SH_VALID_URL:
return True
# failing, there's nothing more to check
return False
if any(p in webpage for p in (cls._SH_VALID_CONTENT_STRINGS or ())):
return True
# no strings? check regexes!
if '_SH_CONTENT_REGEXES_RES' not in cls.__dict__:
cls._SH_VALID_CONTENT_REGEXES_RES = (re.compile(rgx)
for rgx in cls._SH_VALID_CONTENT_REGEXES)
if not any(rgx.match(webpage) is not None for rgx in cls._SH_VALID_CONTENT_REGEXES_RES):
return False
def _real_extract(self, url):
"""Unreal extraction process. Do NOT redefine in subclasses."""
return self._selfhosted_extract(url)
def _selfhosted_extract(self, url, webpage=None):
"""Real extraction process. Redefine in subclasses.
`webpage` is a string (the website contents, as downloaded by GenericIE) or None"""
pass

View file

@ -842,7 +842,7 @@ from .parliamentliveuk import ParliamentLiveUKIE
from .patreon import PatreonIE
from .pbs import PBSIE
from .pearvideo import PearVideoIE
from .peertube import PeerTubeIE
from .peertube import PeerTubeSHIE
from .people import PeopleIE
from .performgroup import PerformGroupIE
from .periscope import (

View file

@ -109,7 +109,7 @@ from .yapfiles import YapFilesIE
from .vice import ViceIE
from .xfileshare import XFileShareIE
from .cloudflarestream import CloudflareStreamIE
from .peertube import PeerTubeIE
from .peertube import PeerTubeSHIE
from .teachable import TeachableIE
from .indavideo import IndavideoEmbedIE
from .apa import APAIE
@ -2430,6 +2430,15 @@ class GenericIE(InfoExtractor):
except compat_xml_parse_error:
pass
if not self._downloader.params.get('force_generic_extractor', False):
# Is it a selfhosted web service?
from ..extractor import _SH_CLASSES
for shie in _SH_CLASSES:
if shie.suitable_selfhosted(url, webpage):
shie = self._downloader.get_info_extractor(shie.ie_key())
self.to_screen('%s: This webpage seems to be %s' % (video_id, shie.IE_NAME))
return shie._selfhosted_extract(url, webpage=webpage)
# Is it a Camtasia project?
camtasia_res = self._extract_camtasia(url, video_id, webpage)
if camtasia_res is not None:
@ -3186,10 +3195,10 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
peertube_urls = PeerTubeIE._extract_urls(webpage, url)
peertube_urls = PeerTubeSHIE._extract_urls(webpage, url)
if peertube_urls:
return self.playlist_from_matches(
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
peertube_urls, video_id, video_title, ie=PeerTubeSHIE.ie_key())
indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
if indavideo_urls:

View file

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
from .common import SelfhostedInfoExtractor
from ..compat import compat_str
from ..utils import (
int_or_none,
@ -16,414 +16,17 @@ from ..utils import (
)
class PeerTubeIE(InfoExtractor):
_INSTANCES_RE = r'''(?:
# Taken from https://instances.joinpeertube.org/instances
peertube\.rainbowswingers\.net|
tube\.stanisic\.nl|
peer\.suiri\.us|
medias\.libox\.fr|
videomensoif\.ynh\.fr|
peertube\.travelpandas\.eu|
peertube\.rachetjay\.fr|
peertube\.montecsys\.fr|
tube\.eskuero\.me|
peer\.tube|
peertube\.umeahackerspace\.se|
tube\.nx-pod\.de|
video\.monsieurbidouille\.fr|
tube\.openalgeria\.org|
vid\.lelux\.fi|
video\.anormallostpod\.ovh|
tube\.crapaud-fou\.org|
peertube\.stemy\.me|
lostpod\.space|
exode\.me|
peertube\.snargol\.com|
vis\.ion\.ovh|
videosdulib\.re|
v\.mbius\.io|
videos\.judrey\.eu|
peertube\.osureplayviewer\.xyz|
peertube\.mathieufamily\.ovh|
www\.videos-libr\.es|
fightforinfo\.com|
peertube\.fediverse\.ru|
peertube\.oiseauroch\.fr|
video\.nesven\.eu|
v\.bearvideo\.win|
video\.qoto\.org|
justporn\.cc|
video\.vny\.fr|
peervideo\.club|
tube\.taker\.fr|
peertube\.chantierlibre\.org|
tube\.ipfixe\.info|
tube\.kicou\.info|
tube\.dodsorf\.as|
videobit\.cc|
video\.yukari\.moe|
videos\.elbinario\.net|
hkvideo\.live|
pt\.tux\.tf|
www\.hkvideo\.live|
FIGHTFORINFO\.com|
pt\.765racing\.com|
peertube\.gnumeria\.eu\.org|
nordenmedia\.com|
peertube\.co\.uk|
tube\.darfweb\.eu|
tube\.kalah-france\.org|
0ch\.in|
vod\.mochi\.academy|
film\.node9\.org|
peertube\.hatthieves\.es|
video\.fitchfamily\.org|
peertube\.ddns\.net|
video\.ifuncle\.kr|
video\.fdlibre\.eu|
tube\.22decembre\.eu|
peertube\.harmoniescreatives\.com|
tube\.fabrigli\.fr|
video\.thedwyers\.co|
video\.bruitbruit\.com|
peertube\.foxfam\.club|
peer\.philoxweb\.be|
videos\.bugs\.social|
peertube\.malbert\.xyz|
peertube\.bilange\.ca|
libretube\.net|
diytelevision\.com|
peertube\.fedilab\.app|
libre\.video|
video\.mstddntfdn\.online|
us\.tv|
peertube\.sl-network\.fr|
peertube\.dynlinux\.io|
peertube\.david\.durieux\.family|
peertube\.linuxrocks\.online|
peerwatch\.xyz|
v\.kretschmann\.social|
tube\.otter\.sh|
yt\.is\.nota\.live|
tube\.dragonpsi\.xyz|
peertube\.boneheadmedia\.com|
videos\.funkwhale\.audio|
watch\.44con\.com|
peertube\.gcaillaut\.fr|
peertube\.icu|
pony\.tube|
spacepub\.space|
tube\.stbr\.io|
v\.mom-gay\.faith|
tube\.port0\.xyz|
peertube\.simounet\.net|
play\.jergefelt\.se|
peertube\.zeteo\.me|
tube\.danq\.me|
peertube\.kerenon\.com|
tube\.fab-l3\.org|
tube\.calculate\.social|
peertube\.mckillop\.org|
tube\.netzspielplatz\.de|
vod\.ksite\.de|
peertube\.laas\.fr|
tube\.govital\.net|
peertube\.stephenson\.cc|
bistule\.nohost\.me|
peertube\.kajalinifi\.de|
video\.ploud\.jp|
video\.omniatv\.com|
peertube\.ffs2play\.fr|
peertube\.leboulaire\.ovh|
peertube\.tronic-studio\.com|
peertube\.public\.cat|
peertube\.metalbanana\.net|
video\.1000i100\.fr|
peertube\.alter-nativ-voll\.de|
tube\.pasa\.tf|
tube\.worldofhauru\.xyz|
pt\.kamp\.site|
peertube\.teleassist\.fr|
videos\.mleduc\.xyz|
conf\.tube|
media\.privacyinternational\.org|
pt\.forty-two\.nl|
video\.halle-leaks\.de|
video\.grosskopfgames\.de|
peertube\.schaeferit\.de|
peertube\.jackbot\.fr|
tube\.extinctionrebellion\.fr|
peertube\.f-si\.org|
video\.subak\.ovh|
videos\.koweb\.fr|
peertube\.zergy\.net|
peertube\.roflcopter\.fr|
peertube\.floss-marketing-school\.com|
vloggers\.social|
peertube\.iriseden\.eu|
videos\.ubuntu-paris\.org|
peertube\.mastodon\.host|
armstube\.com|
peertube\.s2s\.video|
peertube\.lol|
tube\.open-plug\.eu|
open\.tube|
peertube\.ch|
peertube\.normandie-libre\.fr|
peertube\.slat\.org|
video\.lacaveatonton\.ovh|
peertube\.uno|
peertube\.servebeer\.com|
peertube\.fedi\.quebec|
tube\.h3z\.jp|
tube\.plus200\.com|
peertube\.eric\.ovh|
tube\.metadocs\.cc|
tube\.unmondemeilleur\.eu|
gouttedeau\.space|
video\.antirep\.net|
nrop\.cant\.at|
tube\.ksl-bmx\.de|
tube\.plaf\.fr|
tube\.tchncs\.de|
video\.devinberg\.com|
hitchtube\.fr|
peertube\.kosebamse\.com|
yunopeertube\.myddns\.me|
peertube\.varney\.fr|
peertube\.anon-kenkai\.com|
tube\.maiti\.info|
tubee\.fr|
videos\.dinofly\.com|
toobnix\.org|
videotape\.me|
voca\.tube|
video\.heromuster\.com|
video\.lemediatv\.fr|
video\.up\.edu\.ph|
balafon\.video|
video\.ivel\.fr|
thickrips\.cloud|
pt\.laurentkruger\.fr|
video\.monarch-pass\.net|
peertube\.artica\.center|
video\.alternanet\.fr|
indymotion\.fr|
fanvid\.stopthatimp\.net|
video\.farci\.org|
v\.lesterpig\.com|
video\.okaris\.de|
tube\.pawelko\.net|
peertube\.mablr\.org|
tube\.fede\.re|
pytu\.be|
evertron\.tv|
devtube\.dev-wiki\.de|
raptube\.antipub\.org|
video\.selea\.se|
peertube\.mygaia\.org|
video\.oh14\.de|
peertube\.livingutopia\.org|
peertube\.the-penguin\.de|
tube\.thechangebook\.org|
tube\.anjara\.eu|
pt\.pube\.tk|
video\.samedi\.pm|
mplayer\.demouliere\.eu|
widemus\.de|
peertube\.me|
peertube\.zapashcanon\.fr|
video\.latavernedejohnjohn\.fr|
peertube\.pcservice46\.fr|
peertube\.mazzonetto\.eu|
video\.irem\.univ-paris-diderot\.fr|
video\.livecchi\.cloud|
alttube\.fr|
video\.coop\.tools|
video\.cabane-libre\.org|
peertube\.openstreetmap\.fr|
videos\.alolise\.org|
irrsinn\.video|
video\.antopie\.org|
scitech\.video|
tube2\.nemsia\.org|
video\.amic37\.fr|
peertube\.freeforge\.eu|
video\.arbitrarion\.com|
video\.datsemultimedia\.com|
stoptrackingus\.tv|
peertube\.ricostrongxxx\.com|
docker\.videos\.lecygnenoir\.info|
peertube\.togart\.de|
tube\.postblue\.info|
videos\.domainepublic\.net|
peertube\.cyber-tribal\.com|
video\.gresille\.org|
peertube\.dsmouse\.net|
cinema\.yunohost\.support|
tube\.theocevaer\.fr|
repro\.video|
tube\.4aem\.com|
quaziinc\.com|
peertube\.metawurst\.space|
videos\.wakapo\.com|
video\.ploud\.fr|
video\.freeradical\.zone|
tube\.valinor\.fr|
refuznik\.video|
pt\.kircheneuenburg\.de|
peertube\.asrun\.eu|
peertube\.lagob\.fr|
videos\.side-ways\.net|
91video\.online|
video\.valme\.io|
video\.taboulisme\.com|
videos-libr\.es|
tv\.mooh\.fr|
nuage\.acostey\.fr|
video\.monsieur-a\.fr|
peertube\.librelois\.fr|
videos\.pair2jeux\.tube|
videos\.pueseso\.club|
peer\.mathdacloud\.ovh|
media\.assassinate-you\.net|
vidcommons\.org|
ptube\.rousset\.nom\.fr|
tube\.cyano\.at|
videos\.squat\.net|
video\.iphodase\.fr|
peertube\.makotoworkshop\.org|
peertube\.serveur\.slv-valbonne\.fr|
vault\.mle\.party|
hostyour\.tv|
videos\.hack2g2\.fr|
libre\.tube|
pire\.artisanlogiciel\.net|
videos\.numerique-en-commun\.fr|
video\.netsyms\.com|
video\.die-partei\.social|
video\.writeas\.org|
peertube\.swarm\.solvingmaz\.es|
tube\.pericoloso\.ovh|
watching\.cypherpunk\.observer|
videos\.adhocmusic\.com|
tube\.rfc1149\.net|
peertube\.librelabucm\.org|
videos\.numericoop\.fr|
peertube\.koehn\.com|
peertube\.anarchmusicall\.net|
tube\.kampftoast\.de|
vid\.y-y\.li|
peertube\.xtenz\.xyz|
diode\.zone|
tube\.egf\.mn|
peertube\.nomagic\.uk|
visionon\.tv|
videos\.koumoul\.com|
video\.rastapuls\.com|
video\.mantlepro\.com|
video\.deadsuperhero\.com|
peertube\.musicstudio\.pro|
peertube\.we-keys\.fr|
artitube\.artifaille\.fr|
peertube\.ethernia\.net|
tube\.midov\.pl|
peertube\.fr|
watch\.snoot\.tube|
peertube\.donnadieu\.fr|
argos\.aquilenet\.fr|
tube\.nemsia\.org|
tube\.bruniau\.net|
videos\.darckoune\.moe|
tube\.traydent\.info|
dev\.videos\.lecygnenoir\.info|
peertube\.nayya\.org|
peertube\.live|
peertube\.mofgao\.space|
video\.lequerrec\.eu|
peertube\.amicale\.net|
aperi\.tube|
tube\.ac-lyon\.fr|
video\.lw1\.at|
www\.yiny\.org|
videos\.pofilo\.fr|
tube\.lou\.lt|
choob\.h\.etbus\.ch|
tube\.hoga\.fr|
peertube\.heberge\.fr|
video\.obermui\.de|
videos\.cloudfrancois\.fr|
betamax\.video|
video\.typica\.us|
tube\.piweb\.be|
video\.blender\.org|
peertube\.cat|
tube\.kdy\.ch|
pe\.ertu\.be|
peertube\.social|
videos\.lescommuns\.org|
tv\.datamol\.org|
videonaute\.fr|
dialup\.express|
peertube\.nogafa\.org|
megatube\.lilomoino\.fr|
peertube\.tamanoir\.foucry\.net|
peertube\.devosi\.org|
peertube\.1312\.media|
tube\.bootlicker\.party|
skeptikon\.fr|
video\.blueline\.mg|
tube\.homecomputing\.fr|
tube\.ouahpiti\.info|
video\.tedomum\.net|
video\.g3l\.org|
fontube\.fr|
peertube\.gaialabs\.ch|
tube\.kher\.nl|
peertube\.qtg\.fr|
video\.migennes\.net|
tube\.p2p\.legal|
troll\.tv|
videos\.iut-orsay\.fr|
peertube\.solidev\.net|
videos\.cemea\.org|
video\.passageenseine\.fr|
videos\.festivalparminous\.org|
peertube\.touhoppai\.moe|
sikke\.fi|
peer\.hostux\.social|
share\.tube|
peertube\.walkingmountains\.fr|
videos\.benpro\.fr|
peertube\.parleur\.net|
peertube\.heraut\.eu|
tube\.aquilenet\.fr|
peertube\.gegeweb\.eu|
framatube\.org|
thinkerview\.video|
tube\.conferences-gesticulees\.net|
peertube\.datagueule\.tv|
video\.lqdn\.fr|
tube\.mochi\.academy|
media\.zat\.im|
video\.colibris-outilslibres\.org|
tube\.svnet\.fr|
peertube\.video|
peertube3\.cpy\.re|
peertube2\.cpy\.re|
videos\.tcit\.fr|
peertube\.cpy\.re
)'''
class PeerTubeSHIE(SelfhostedInfoExtractor):
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
_API_BASE = 'https://%s/api/v1/videos/%s/%s'
_VALID_URL = r'''(?x)
(?:
peertube:(?P<host>[^:]+):|
https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/
)
(?P<id>%s)
''' % (_INSTANCES_RE, _UUID_RE)
_VALID_URL = r'peertube:(?P<host>[^:]+):(?P<id>%s)' % (_UUID_RE)
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)|api/v\d/videos)/(?P<id>%s)' % _UUID_RE
_SH_VALID_CONTENT_STRINGS = (
'<title>PeerTube<',
'There will be other non JS-based clients to access PeerTube',
'>We are sorry but it seems that PeerTube is not compatible with your web browser.<',
)
_TESTS = [{
'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
'md5': '9bed8c0137913e17b86334e5885aacff',
@ -468,26 +71,11 @@ class PeerTubeIE(InfoExtractor):
'only_matching': True,
}]
@staticmethod
def _extract_peertube_url(webpage, source_url):
mobj = re.match(
r'https?://(?P<host>[^/]+)/videos/(?:watch|embed)/(?P<id>%s)'
% PeerTubeIE._UUID_RE, source_url)
if mobj and any(p in webpage for p in (
'<title>PeerTube<',
'There will be other non JS-based clients to access PeerTube',
'>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
return 'peertube:%s:%s' % mobj.group('host', 'id')
@staticmethod
def _extract_urls(webpage, source_url):
entries = re.findall(
r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
% (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
if not entries:
peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
if peertube_url:
entries = [peertube_url]
r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//[^/]+/videos/embed/%s)'''
% (PeerTubeSHIE._UUID_RE), webpage)
return entries
def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
@ -515,9 +103,11 @@ class PeerTubeIE(InfoExtractor):
})
return subtitles
def _real_extract(self, url):
def _selfhosted_extract(self, url, webpage=None):
mobj = re.match(self._VALID_URL, url)
host = mobj.group('host') or mobj.group('host_2')
if not mobj:
mobj = re.match(self._SH_VALID_URL, url)
host = mobj.group('host')
video_id = mobj.group('id')
video = self._call_api(

View file

@ -89,7 +89,10 @@ class TestAllURLsMatching(unittest.TestCase):
url = tc['url']
for ie in ies:
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
if ie._SELFHOSTED is True:
self.assertTrue(ie.suitable(url) or ie.suitable_selfhosted(url, None), '%s should match URL %r' % (type(ie).__name__, url))
else:
self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
else:
self.assertFalse(
ie.suitable(url),