selfhosted extractors, peertube extractor reworked (#10)
This commit is contained in:
parent
005b3fbedd
commit
889005bab3
|
@ -1,4 +1,5 @@
|
|||
# coding: utf-8
|
||||
# flake8: noqa
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
@ -17,3 +18,17 @@ class LazyLoadExtractor(object):
|
|||
instance = real_cls.__new__(real_cls)
|
||||
instance.__init__(*args, **kwargs)
|
||||
return instance
|
||||
|
||||
|
||||
# suitable() inserts below
|
||||
{}
|
||||
|
||||
|
||||
class LazyLoadSearchExtractor(LazyLoadExtractor):
|
||||
pass
|
||||
|
||||
|
||||
class LazyLoadSelfhostedExtractor(LazyLoadExtractor):
|
||||
|
||||
# suitable_selfhosted() inserts below
|
||||
{}
|
||||
|
|
|
@ -15,14 +15,15 @@ if os.path.exists(lazy_extractors_filename):
|
|||
os.remove(lazy_extractors_filename)
|
||||
|
||||
from haruhi_dl.extractor import _ALL_CLASSES
|
||||
from haruhi_dl.extractor.common import InfoExtractor, SearchInfoExtractor
|
||||
from haruhi_dl.extractor.common import InfoExtractor, SearchInfoExtractor, SelfhostedInfoExtractor
|
||||
|
||||
with open('devscripts/lazy_load_template.py', 'rt') as f:
|
||||
module_template = f.read()
|
||||
|
||||
module_contents = [
|
||||
module_template + '\n' + getsource(InfoExtractor.suitable) + '\n',
|
||||
'class LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n']
|
||||
module_template.format(getsource(InfoExtractor.suitable),
|
||||
getsource(SelfhostedInfoExtractor.suitable_selfhosted)),
|
||||
]
|
||||
|
||||
ie_template = '''
|
||||
class {name}({bases}):
|
||||
|
@ -30,6 +31,12 @@ class {name}({bases}):
|
|||
_module = '{module}'
|
||||
'''
|
||||
|
||||
sh_additions_template = '''
|
||||
_SH_VALID_URL = {sh_valid_url!r}
|
||||
_SH_VALID_CONTENT_STRINGS = {sh_valid_content_strings!r}
|
||||
_SH_VALID_CONTENT_REGEXES = {sh_valid_content_regexes!r}
|
||||
'''
|
||||
|
||||
make_valid_template = '''
|
||||
@classmethod
|
||||
def _make_valid_url(cls):
|
||||
|
@ -42,6 +49,8 @@ def get_base_name(base):
|
|||
return 'LazyLoadExtractor'
|
||||
elif base is SearchInfoExtractor:
|
||||
return 'LazyLoadSearchExtractor'
|
||||
elif base is SelfhostedInfoExtractor:
|
||||
return 'LazyLoadSelfhostedExtractor'
|
||||
else:
|
||||
return base.__name__
|
||||
|
||||
|
@ -53,6 +62,13 @@ def build_lazy_ie(ie, name):
|
|||
bases=', '.join(map(get_base_name, ie.__bases__)),
|
||||
valid_url=valid_url,
|
||||
module=ie.__module__)
|
||||
if ie._SELFHOSTED is True:
|
||||
s += sh_additions_template.format(
|
||||
sh_valid_url=ie._SH_VALID_URL,
|
||||
sh_valid_content_strings=ie._SH_VALID_CONTENT_STRINGS,
|
||||
sh_valid_content_regexes=ie._SH_VALID_CONTENT_REGEXES)
|
||||
if ie.suitable_selfhosted.__func__ is not SelfhostedInfoExtractor.suitable_selfhosted.__func__:
|
||||
s += '\n' + getsource(ie.suitable_selfhosted)
|
||||
if ie.suitable.__func__ is not InfoExtractor.suitable.__func__:
|
||||
s += '\n' + getsource(ie.suitable)
|
||||
if hasattr(ie, '_make_valid_url'):
|
||||
|
@ -84,15 +100,19 @@ while classes:
|
|||
ordered_cls.append(_ALL_CLASSES[-1])
|
||||
|
||||
names = []
|
||||
sh_names = []
|
||||
for ie in ordered_cls:
|
||||
name = ie.__name__
|
||||
src = build_lazy_ie(ie, name)
|
||||
module_contents.append(src)
|
||||
if ie in _ALL_CLASSES:
|
||||
names.append(name)
|
||||
if ie._SELFHOSTED is True:
|
||||
sh_names.append(name)
|
||||
|
||||
module_contents.append(
|
||||
'_ALL_CLASSES = [{0}]'.format(', '.join(names)))
|
||||
module_contents.extend((
|
||||
'\n_ALL_CLASSES = [{0}]'.format(', '.join(names)),
|
||||
'\n_SH_CLASSES = [{0}]'.format(', '.join(sh_names))))
|
||||
|
||||
module_src = '\n'.join(module_contents) + '\n'
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ from __future__ import unicode_literals
|
|||
|
||||
try:
|
||||
from .lazy_extractors import *
|
||||
from .lazy_extractors import _ALL_CLASSES
|
||||
from .lazy_extractors import _ALL_CLASSES, _SH_CLASSES
|
||||
_LAZY_LOADER = True
|
||||
except ImportError:
|
||||
_LAZY_LOADER = False
|
||||
|
@ -14,6 +14,11 @@ except ImportError:
|
|||
if name.endswith('IE') and name != 'GenericIE'
|
||||
]
|
||||
_ALL_CLASSES.append(GenericIE)
|
||||
_SH_CLASSES = [
|
||||
klass
|
||||
for klass in _ALL_CLASSES
|
||||
if klass._SELFHOSTED is True
|
||||
]
|
||||
|
||||
|
||||
def gen_extractor_classes():
|
||||
|
|
|
@ -394,6 +394,7 @@ class InfoExtractor(object):
|
|||
_GEO_COUNTRIES = None
|
||||
_GEO_IP_BLOCKS = None
|
||||
_WORKING = True
|
||||
_SELFHOSTED = False
|
||||
|
||||
def __init__(self, downloader=None):
|
||||
"""Constructor. Receives an optional downloader."""
|
||||
|
@ -3020,3 +3021,73 @@ class SearchInfoExtractor(InfoExtractor):
|
|||
@property
|
||||
def SEARCH_KEY(self):
|
||||
return self._SEARCH_KEY
|
||||
|
||||
|
||||
class SelfhostedInfoExtractor(InfoExtractor):
|
||||
"""Selfhosted Information Extractor class.
|
||||
|
||||
Selfhosted info extractors are for the services,
|
||||
that cannot be handled by just listing all of their domains.
|
||||
Mostly related to free and open source software,
|
||||
which everyone is allowed to host on their own servers
|
||||
(like PeerTube, Funkwhale, Mastodon, Nextcloud, and lots of others).
|
||||
|
||||
The _VALID_URL value should not match URLs, but it surely can
|
||||
match the extractor-specific ID pointer string
|
||||
(f.e. Mastodon extractor can match "mastodon:donotsta.re:9xN1v6yM7WhzE7aIIC",
|
||||
but not "https://donotsta.re/notice/9xN1v6yM7WhzE7aIIC").
|
||||
|
||||
https://git.sakamoto.pl/laudom/haruhi-dl/-/issues/10
|
||||
"""
|
||||
|
||||
_SELFHOSTED = True
|
||||
|
||||
"""Regular expression that matches the actual URLs, or None if should not be checked"""
|
||||
_SH_VALID_URL = None
|
||||
|
||||
"""An iterable of strings, of which *any* should be contained in the webpage contents, or None if should not be checked"""
|
||||
_SH_VALID_CONTENT_STRINGS = None
|
||||
|
||||
"""An iterable of regular expression strings, of which *any* should match the webpage contents, or None if should not be checked"""
|
||||
_SH_VALID_CONTENT_REGEXES = None
|
||||
|
||||
@property
|
||||
def IE_NAME(self):
|
||||
return compat_str(type(self).__name__[:-4])
|
||||
|
||||
@classmethod
|
||||
def suitable_selfhosted(cls, url, webpage):
|
||||
"""Receives a URL and webpage contents, and returns True if suitable for this IE."""
|
||||
|
||||
if cls._SH_VALID_URL:
|
||||
if '_SH_VALID_URL_RE' not in cls.__dict__:
|
||||
cls._SH_VALID_URL_RE = re.compile(cls._SH_VALID_URL)
|
||||
if cls._SH_VALID_URL_RE.match(url) is None:
|
||||
return False
|
||||
|
||||
if webpage is None:
|
||||
# if no webpage, assume just matching the URL is fine
|
||||
if cls._SH_VALID_URL:
|
||||
return True
|
||||
# failing, there's nothing more to check
|
||||
return False
|
||||
|
||||
if any(p in webpage for p in (cls._SH_VALID_CONTENT_STRINGS or ())):
|
||||
return True
|
||||
|
||||
# no strings? check regexes!
|
||||
if '_SH_CONTENT_REGEXES_RES' not in cls.__dict__:
|
||||
cls._SH_VALID_CONTENT_REGEXES_RES = (re.compile(rgx)
|
||||
for rgx in cls._SH_VALID_CONTENT_REGEXES)
|
||||
if not any(rgx.match(webpage) is not None for rgx in cls._SH_VALID_CONTENT_REGEXES_RES):
|
||||
return False
|
||||
|
||||
def _real_extract(self, url):
|
||||
"""Unreal extraction process. Do NOT redefine in subclasses."""
|
||||
return self._selfhosted_extract(url)
|
||||
|
||||
def _selfhosted_extract(self, url, webpage=None):
|
||||
"""Real extraction process. Redefine in subclasses.
|
||||
|
||||
`webpage` is a string (the website contents, as downloaded by GenericIE) or None"""
|
||||
pass
|
||||
|
|
|
@ -842,7 +842,7 @@ from .parliamentliveuk import ParliamentLiveUKIE
|
|||
from .patreon import PatreonIE
|
||||
from .pbs import PBSIE
|
||||
from .pearvideo import PearVideoIE
|
||||
from .peertube import PeerTubeIE
|
||||
from .peertube import PeerTubeSHIE
|
||||
from .people import PeopleIE
|
||||
from .performgroup import PerformGroupIE
|
||||
from .periscope import (
|
||||
|
|
|
@ -109,7 +109,7 @@ from .yapfiles import YapFilesIE
|
|||
from .vice import ViceIE
|
||||
from .xfileshare import XFileShareIE
|
||||
from .cloudflarestream import CloudflareStreamIE
|
||||
from .peertube import PeerTubeIE
|
||||
from .peertube import PeerTubeSHIE
|
||||
from .teachable import TeachableIE
|
||||
from .indavideo import IndavideoEmbedIE
|
||||
from .apa import APAIE
|
||||
|
@ -2430,6 +2430,15 @@ class GenericIE(InfoExtractor):
|
|||
except compat_xml_parse_error:
|
||||
pass
|
||||
|
||||
if not self._downloader.params.get('force_generic_extractor', False):
|
||||
# Is it a selfhosted web service?
|
||||
from ..extractor import _SH_CLASSES
|
||||
for shie in _SH_CLASSES:
|
||||
if shie.suitable_selfhosted(url, webpage):
|
||||
shie = self._downloader.get_info_extractor(shie.ie_key())
|
||||
self.to_screen('%s: This webpage seems to be %s' % (video_id, shie.IE_NAME))
|
||||
return shie._selfhosted_extract(url, webpage=webpage)
|
||||
|
||||
# Is it a Camtasia project?
|
||||
camtasia_res = self._extract_camtasia(url, video_id, webpage)
|
||||
if camtasia_res is not None:
|
||||
|
@ -3186,10 +3195,10 @@ class GenericIE(InfoExtractor):
|
|||
return self.playlist_from_matches(
|
||||
cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
|
||||
|
||||
peertube_urls = PeerTubeIE._extract_urls(webpage, url)
|
||||
peertube_urls = PeerTubeSHIE._extract_urls(webpage, url)
|
||||
if peertube_urls:
|
||||
return self.playlist_from_matches(
|
||||
peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
|
||||
peertube_urls, video_id, video_title, ie=PeerTubeSHIE.ie_key())
|
||||
|
||||
indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
|
||||
if indavideo_urls:
|
||||
|
|
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
|||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .common import SelfhostedInfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
|
@ -16,414 +16,17 @@ from ..utils import (
|
|||
)
|
||||
|
||||
|
||||
class PeerTubeIE(InfoExtractor):
|
||||
_INSTANCES_RE = r'''(?:
|
||||
# Taken from https://instances.joinpeertube.org/instances
|
||||
peertube\.rainbowswingers\.net|
|
||||
tube\.stanisic\.nl|
|
||||
peer\.suiri\.us|
|
||||
medias\.libox\.fr|
|
||||
videomensoif\.ynh\.fr|
|
||||
peertube\.travelpandas\.eu|
|
||||
peertube\.rachetjay\.fr|
|
||||
peertube\.montecsys\.fr|
|
||||
tube\.eskuero\.me|
|
||||
peer\.tube|
|
||||
peertube\.umeahackerspace\.se|
|
||||
tube\.nx-pod\.de|
|
||||
video\.monsieurbidouille\.fr|
|
||||
tube\.openalgeria\.org|
|
||||
vid\.lelux\.fi|
|
||||
video\.anormallostpod\.ovh|
|
||||
tube\.crapaud-fou\.org|
|
||||
peertube\.stemy\.me|
|
||||
lostpod\.space|
|
||||
exode\.me|
|
||||
peertube\.snargol\.com|
|
||||
vis\.ion\.ovh|
|
||||
videosdulib\.re|
|
||||
v\.mbius\.io|
|
||||
videos\.judrey\.eu|
|
||||
peertube\.osureplayviewer\.xyz|
|
||||
peertube\.mathieufamily\.ovh|
|
||||
www\.videos-libr\.es|
|
||||
fightforinfo\.com|
|
||||
peertube\.fediverse\.ru|
|
||||
peertube\.oiseauroch\.fr|
|
||||
video\.nesven\.eu|
|
||||
v\.bearvideo\.win|
|
||||
video\.qoto\.org|
|
||||
justporn\.cc|
|
||||
video\.vny\.fr|
|
||||
peervideo\.club|
|
||||
tube\.taker\.fr|
|
||||
peertube\.chantierlibre\.org|
|
||||
tube\.ipfixe\.info|
|
||||
tube\.kicou\.info|
|
||||
tube\.dodsorf\.as|
|
||||
videobit\.cc|
|
||||
video\.yukari\.moe|
|
||||
videos\.elbinario\.net|
|
||||
hkvideo\.live|
|
||||
pt\.tux\.tf|
|
||||
www\.hkvideo\.live|
|
||||
FIGHTFORINFO\.com|
|
||||
pt\.765racing\.com|
|
||||
peertube\.gnumeria\.eu\.org|
|
||||
nordenmedia\.com|
|
||||
peertube\.co\.uk|
|
||||
tube\.darfweb\.eu|
|
||||
tube\.kalah-france\.org|
|
||||
0ch\.in|
|
||||
vod\.mochi\.academy|
|
||||
film\.node9\.org|
|
||||
peertube\.hatthieves\.es|
|
||||
video\.fitchfamily\.org|
|
||||
peertube\.ddns\.net|
|
||||
video\.ifuncle\.kr|
|
||||
video\.fdlibre\.eu|
|
||||
tube\.22decembre\.eu|
|
||||
peertube\.harmoniescreatives\.com|
|
||||
tube\.fabrigli\.fr|
|
||||
video\.thedwyers\.co|
|
||||
video\.bruitbruit\.com|
|
||||
peertube\.foxfam\.club|
|
||||
peer\.philoxweb\.be|
|
||||
videos\.bugs\.social|
|
||||
peertube\.malbert\.xyz|
|
||||
peertube\.bilange\.ca|
|
||||
libretube\.net|
|
||||
diytelevision\.com|
|
||||
peertube\.fedilab\.app|
|
||||
libre\.video|
|
||||
video\.mstddntfdn\.online|
|
||||
us\.tv|
|
||||
peertube\.sl-network\.fr|
|
||||
peertube\.dynlinux\.io|
|
||||
peertube\.david\.durieux\.family|
|
||||
peertube\.linuxrocks\.online|
|
||||
peerwatch\.xyz|
|
||||
v\.kretschmann\.social|
|
||||
tube\.otter\.sh|
|
||||
yt\.is\.nota\.live|
|
||||
tube\.dragonpsi\.xyz|
|
||||
peertube\.boneheadmedia\.com|
|
||||
videos\.funkwhale\.audio|
|
||||
watch\.44con\.com|
|
||||
peertube\.gcaillaut\.fr|
|
||||
peertube\.icu|
|
||||
pony\.tube|
|
||||
spacepub\.space|
|
||||
tube\.stbr\.io|
|
||||
v\.mom-gay\.faith|
|
||||
tube\.port0\.xyz|
|
||||
peertube\.simounet\.net|
|
||||
play\.jergefelt\.se|
|
||||
peertube\.zeteo\.me|
|
||||
tube\.danq\.me|
|
||||
peertube\.kerenon\.com|
|
||||
tube\.fab-l3\.org|
|
||||
tube\.calculate\.social|
|
||||
peertube\.mckillop\.org|
|
||||
tube\.netzspielplatz\.de|
|
||||
vod\.ksite\.de|
|
||||
peertube\.laas\.fr|
|
||||
tube\.govital\.net|
|
||||
peertube\.stephenson\.cc|
|
||||
bistule\.nohost\.me|
|
||||
peertube\.kajalinifi\.de|
|
||||
video\.ploud\.jp|
|
||||
video\.omniatv\.com|
|
||||
peertube\.ffs2play\.fr|
|
||||
peertube\.leboulaire\.ovh|
|
||||
peertube\.tronic-studio\.com|
|
||||
peertube\.public\.cat|
|
||||
peertube\.metalbanana\.net|
|
||||
video\.1000i100\.fr|
|
||||
peertube\.alter-nativ-voll\.de|
|
||||
tube\.pasa\.tf|
|
||||
tube\.worldofhauru\.xyz|
|
||||
pt\.kamp\.site|
|
||||
peertube\.teleassist\.fr|
|
||||
videos\.mleduc\.xyz|
|
||||
conf\.tube|
|
||||
media\.privacyinternational\.org|
|
||||
pt\.forty-two\.nl|
|
||||
video\.halle-leaks\.de|
|
||||
video\.grosskopfgames\.de|
|
||||
peertube\.schaeferit\.de|
|
||||
peertube\.jackbot\.fr|
|
||||
tube\.extinctionrebellion\.fr|
|
||||
peertube\.f-si\.org|
|
||||
video\.subak\.ovh|
|
||||
videos\.koweb\.fr|
|
||||
peertube\.zergy\.net|
|
||||
peertube\.roflcopter\.fr|
|
||||
peertube\.floss-marketing-school\.com|
|
||||
vloggers\.social|
|
||||
peertube\.iriseden\.eu|
|
||||
videos\.ubuntu-paris\.org|
|
||||
peertube\.mastodon\.host|
|
||||
armstube\.com|
|
||||
peertube\.s2s\.video|
|
||||
peertube\.lol|
|
||||
tube\.open-plug\.eu|
|
||||
open\.tube|
|
||||
peertube\.ch|
|
||||
peertube\.normandie-libre\.fr|
|
||||
peertube\.slat\.org|
|
||||
video\.lacaveatonton\.ovh|
|
||||
peertube\.uno|
|
||||
peertube\.servebeer\.com|
|
||||
peertube\.fedi\.quebec|
|
||||
tube\.h3z\.jp|
|
||||
tube\.plus200\.com|
|
||||
peertube\.eric\.ovh|
|
||||
tube\.metadocs\.cc|
|
||||
tube\.unmondemeilleur\.eu|
|
||||
gouttedeau\.space|
|
||||
video\.antirep\.net|
|
||||
nrop\.cant\.at|
|
||||
tube\.ksl-bmx\.de|
|
||||
tube\.plaf\.fr|
|
||||
tube\.tchncs\.de|
|
||||
video\.devinberg\.com|
|
||||
hitchtube\.fr|
|
||||
peertube\.kosebamse\.com|
|
||||
yunopeertube\.myddns\.me|
|
||||
peertube\.varney\.fr|
|
||||
peertube\.anon-kenkai\.com|
|
||||
tube\.maiti\.info|
|
||||
tubee\.fr|
|
||||
videos\.dinofly\.com|
|
||||
toobnix\.org|
|
||||
videotape\.me|
|
||||
voca\.tube|
|
||||
video\.heromuster\.com|
|
||||
video\.lemediatv\.fr|
|
||||
video\.up\.edu\.ph|
|
||||
balafon\.video|
|
||||
video\.ivel\.fr|
|
||||
thickrips\.cloud|
|
||||
pt\.laurentkruger\.fr|
|
||||
video\.monarch-pass\.net|
|
||||
peertube\.artica\.center|
|
||||
video\.alternanet\.fr|
|
||||
indymotion\.fr|
|
||||
fanvid\.stopthatimp\.net|
|
||||
video\.farci\.org|
|
||||
v\.lesterpig\.com|
|
||||
video\.okaris\.de|
|
||||
tube\.pawelko\.net|
|
||||
peertube\.mablr\.org|
|
||||
tube\.fede\.re|
|
||||
pytu\.be|
|
||||
evertron\.tv|
|
||||
devtube\.dev-wiki\.de|
|
||||
raptube\.antipub\.org|
|
||||
video\.selea\.se|
|
||||
peertube\.mygaia\.org|
|
||||
video\.oh14\.de|
|
||||
peertube\.livingutopia\.org|
|
||||
peertube\.the-penguin\.de|
|
||||
tube\.thechangebook\.org|
|
||||
tube\.anjara\.eu|
|
||||
pt\.pube\.tk|
|
||||
video\.samedi\.pm|
|
||||
mplayer\.demouliere\.eu|
|
||||
widemus\.de|
|
||||
peertube\.me|
|
||||
peertube\.zapashcanon\.fr|
|
||||
video\.latavernedejohnjohn\.fr|
|
||||
peertube\.pcservice46\.fr|
|
||||
peertube\.mazzonetto\.eu|
|
||||
video\.irem\.univ-paris-diderot\.fr|
|
||||
video\.livecchi\.cloud|
|
||||
alttube\.fr|
|
||||
video\.coop\.tools|
|
||||
video\.cabane-libre\.org|
|
||||
peertube\.openstreetmap\.fr|
|
||||
videos\.alolise\.org|
|
||||
irrsinn\.video|
|
||||
video\.antopie\.org|
|
||||
scitech\.video|
|
||||
tube2\.nemsia\.org|
|
||||
video\.amic37\.fr|
|
||||
peertube\.freeforge\.eu|
|
||||
video\.arbitrarion\.com|
|
||||
video\.datsemultimedia\.com|
|
||||
stoptrackingus\.tv|
|
||||
peertube\.ricostrongxxx\.com|
|
||||
docker\.videos\.lecygnenoir\.info|
|
||||
peertube\.togart\.de|
|
||||
tube\.postblue\.info|
|
||||
videos\.domainepublic\.net|
|
||||
peertube\.cyber-tribal\.com|
|
||||
video\.gresille\.org|
|
||||
peertube\.dsmouse\.net|
|
||||
cinema\.yunohost\.support|
|
||||
tube\.theocevaer\.fr|
|
||||
repro\.video|
|
||||
tube\.4aem\.com|
|
||||
quaziinc\.com|
|
||||
peertube\.metawurst\.space|
|
||||
videos\.wakapo\.com|
|
||||
video\.ploud\.fr|
|
||||
video\.freeradical\.zone|
|
||||
tube\.valinor\.fr|
|
||||
refuznik\.video|
|
||||
pt\.kircheneuenburg\.de|
|
||||
peertube\.asrun\.eu|
|
||||
peertube\.lagob\.fr|
|
||||
videos\.side-ways\.net|
|
||||
91video\.online|
|
||||
video\.valme\.io|
|
||||
video\.taboulisme\.com|
|
||||
videos-libr\.es|
|
||||
tv\.mooh\.fr|
|
||||
nuage\.acostey\.fr|
|
||||
video\.monsieur-a\.fr|
|
||||
peertube\.librelois\.fr|
|
||||
videos\.pair2jeux\.tube|
|
||||
videos\.pueseso\.club|
|
||||
peer\.mathdacloud\.ovh|
|
||||
media\.assassinate-you\.net|
|
||||
vidcommons\.org|
|
||||
ptube\.rousset\.nom\.fr|
|
||||
tube\.cyano\.at|
|
||||
videos\.squat\.net|
|
||||
video\.iphodase\.fr|
|
||||
peertube\.makotoworkshop\.org|
|
||||
peertube\.serveur\.slv-valbonne\.fr|
|
||||
vault\.mle\.party|
|
||||
hostyour\.tv|
|
||||
videos\.hack2g2\.fr|
|
||||
libre\.tube|
|
||||
pire\.artisanlogiciel\.net|
|
||||
videos\.numerique-en-commun\.fr|
|
||||
video\.netsyms\.com|
|
||||
video\.die-partei\.social|
|
||||
video\.writeas\.org|
|
||||
peertube\.swarm\.solvingmaz\.es|
|
||||
tube\.pericoloso\.ovh|
|
||||
watching\.cypherpunk\.observer|
|
||||
videos\.adhocmusic\.com|
|
||||
tube\.rfc1149\.net|
|
||||
peertube\.librelabucm\.org|
|
||||
videos\.numericoop\.fr|
|
||||
peertube\.koehn\.com|
|
||||
peertube\.anarchmusicall\.net|
|
||||
tube\.kampftoast\.de|
|
||||
vid\.y-y\.li|
|
||||
peertube\.xtenz\.xyz|
|
||||
diode\.zone|
|
||||
tube\.egf\.mn|
|
||||
peertube\.nomagic\.uk|
|
||||
visionon\.tv|
|
||||
videos\.koumoul\.com|
|
||||
video\.rastapuls\.com|
|
||||
video\.mantlepro\.com|
|
||||
video\.deadsuperhero\.com|
|
||||
peertube\.musicstudio\.pro|
|
||||
peertube\.we-keys\.fr|
|
||||
artitube\.artifaille\.fr|
|
||||
peertube\.ethernia\.net|
|
||||
tube\.midov\.pl|
|
||||
peertube\.fr|
|
||||
watch\.snoot\.tube|
|
||||
peertube\.donnadieu\.fr|
|
||||
argos\.aquilenet\.fr|
|
||||
tube\.nemsia\.org|
|
||||
tube\.bruniau\.net|
|
||||
videos\.darckoune\.moe|
|
||||
tube\.traydent\.info|
|
||||
dev\.videos\.lecygnenoir\.info|
|
||||
peertube\.nayya\.org|
|
||||
peertube\.live|
|
||||
peertube\.mofgao\.space|
|
||||
video\.lequerrec\.eu|
|
||||
peertube\.amicale\.net|
|
||||
aperi\.tube|
|
||||
tube\.ac-lyon\.fr|
|
||||
video\.lw1\.at|
|
||||
www\.yiny\.org|
|
||||
videos\.pofilo\.fr|
|
||||
tube\.lou\.lt|
|
||||
choob\.h\.etbus\.ch|
|
||||
tube\.hoga\.fr|
|
||||
peertube\.heberge\.fr|
|
||||
video\.obermui\.de|
|
||||
videos\.cloudfrancois\.fr|
|
||||
betamax\.video|
|
||||
video\.typica\.us|
|
||||
tube\.piweb\.be|
|
||||
video\.blender\.org|
|
||||
peertube\.cat|
|
||||
tube\.kdy\.ch|
|
||||
pe\.ertu\.be|
|
||||
peertube\.social|
|
||||
videos\.lescommuns\.org|
|
||||
tv\.datamol\.org|
|
||||
videonaute\.fr|
|
||||
dialup\.express|
|
||||
peertube\.nogafa\.org|
|
||||
megatube\.lilomoino\.fr|
|
||||
peertube\.tamanoir\.foucry\.net|
|
||||
peertube\.devosi\.org|
|
||||
peertube\.1312\.media|
|
||||
tube\.bootlicker\.party|
|
||||
skeptikon\.fr|
|
||||
video\.blueline\.mg|
|
||||
tube\.homecomputing\.fr|
|
||||
tube\.ouahpiti\.info|
|
||||
video\.tedomum\.net|
|
||||
video\.g3l\.org|
|
||||
fontube\.fr|
|
||||
peertube\.gaialabs\.ch|
|
||||
tube\.kher\.nl|
|
||||
peertube\.qtg\.fr|
|
||||
video\.migennes\.net|
|
||||
tube\.p2p\.legal|
|
||||
troll\.tv|
|
||||
videos\.iut-orsay\.fr|
|
||||
peertube\.solidev\.net|
|
||||
videos\.cemea\.org|
|
||||
video\.passageenseine\.fr|
|
||||
videos\.festivalparminous\.org|
|
||||
peertube\.touhoppai\.moe|
|
||||
sikke\.fi|
|
||||
peer\.hostux\.social|
|
||||
share\.tube|
|
||||
peertube\.walkingmountains\.fr|
|
||||
videos\.benpro\.fr|
|
||||
peertube\.parleur\.net|
|
||||
peertube\.heraut\.eu|
|
||||
tube\.aquilenet\.fr|
|
||||
peertube\.gegeweb\.eu|
|
||||
framatube\.org|
|
||||
thinkerview\.video|
|
||||
tube\.conferences-gesticulees\.net|
|
||||
peertube\.datagueule\.tv|
|
||||
video\.lqdn\.fr|
|
||||
tube\.mochi\.academy|
|
||||
media\.zat\.im|
|
||||
video\.colibris-outilslibres\.org|
|
||||
tube\.svnet\.fr|
|
||||
peertube\.video|
|
||||
peertube3\.cpy\.re|
|
||||
peertube2\.cpy\.re|
|
||||
videos\.tcit\.fr|
|
||||
peertube\.cpy\.re
|
||||
)'''
|
||||
class PeerTubeSHIE(SelfhostedInfoExtractor):
|
||||
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
|
||||
_API_BASE = 'https://%s/api/v1/videos/%s/%s'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
peertube:(?P<host>[^:]+):|
|
||||
https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/
|
||||
)
|
||||
(?P<id>%s)
|
||||
''' % (_INSTANCES_RE, _UUID_RE)
|
||||
_VALID_URL = r'peertube:(?P<host>[^:]+):(?P<id>%s)' % (_UUID_RE)
|
||||
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)|api/v\d/videos)/(?P<id>%s)' % _UUID_RE
|
||||
_SH_VALID_CONTENT_STRINGS = (
|
||||
'<title>PeerTube<',
|
||||
'There will be other non JS-based clients to access PeerTube',
|
||||
'>We are sorry but it seems that PeerTube is not compatible with your web browser.<',
|
||||
)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
|
||||
'md5': '9bed8c0137913e17b86334e5885aacff',
|
||||
|
@ -468,26 +71,11 @@ class PeerTubeIE(InfoExtractor):
|
|||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@staticmethod
|
||||
def _extract_peertube_url(webpage, source_url):
|
||||
mobj = re.match(
|
||||
r'https?://(?P<host>[^/]+)/videos/(?:watch|embed)/(?P<id>%s)'
|
||||
% PeerTubeIE._UUID_RE, source_url)
|
||||
if mobj and any(p in webpage for p in (
|
||||
'<title>PeerTube<',
|
||||
'There will be other non JS-based clients to access PeerTube',
|
||||
'>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
|
||||
return 'peertube:%s:%s' % mobj.group('host', 'id')
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage, source_url):
|
||||
entries = re.findall(
|
||||
r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
|
||||
% (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
|
||||
if not entries:
|
||||
peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
|
||||
if peertube_url:
|
||||
entries = [peertube_url]
|
||||
r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//[^/]+/videos/embed/%s)'''
|
||||
% (PeerTubeSHIE._UUID_RE), webpage)
|
||||
return entries
|
||||
|
||||
def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
|
||||
|
@ -515,9 +103,11 @@ class PeerTubeIE(InfoExtractor):
|
|||
})
|
||||
return subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
def _selfhosted_extract(self, url, webpage=None):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
host = mobj.group('host') or mobj.group('host_2')
|
||||
if not mobj:
|
||||
mobj = re.match(self._SH_VALID_URL, url)
|
||||
host = mobj.group('host')
|
||||
video_id = mobj.group('id')
|
||||
|
||||
video = self._call_api(
|
||||
|
|
|
@ -89,7 +89,10 @@ class TestAllURLsMatching(unittest.TestCase):
|
|||
url = tc['url']
|
||||
for ie in ies:
|
||||
if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'):
|
||||
self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
|
||||
if ie._SELFHOSTED is True:
|
||||
self.assertTrue(ie.suitable(url) or ie.suitable_selfhosted(url, None), '%s should match URL %r' % (type(ie).__name__, url))
|
||||
else:
|
||||
self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url))
|
||||
else:
|
||||
self.assertFalse(
|
||||
ie.suitable(url),
|
||||
|
|
Loading…
Reference in a new issue