From adea7807afe5bfb0377bef132ec181bb6b74afcf Mon Sep 17 00:00:00 2001 From: Laura Liberda Date: Thu, 10 Dec 2020 03:23:28 +0100 Subject: [PATCH] mastodon extractor (#11) --- haruhi_dl/extractor/extractors.py | 1 + haruhi_dl/extractor/mastodon.py | 149 ++++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+) create mode 100644 haruhi_dl/extractor/mastodon.py diff --git a/haruhi_dl/extractor/extractors.py b/haruhi_dl/extractor/extractors.py index c5ce71380..61eda1707 100644 --- a/haruhi_dl/extractor/extractors.py +++ b/haruhi_dl/extractor/extractors.py @@ -610,6 +610,7 @@ from .markiza import ( MarkizaIE, MarkizaPageIE, ) +from .mastodon import MastodonSHIE from .massengeschmacktv import MassengeschmackTVIE from .matchtv import MatchTVIE from .mdr import MDRIE diff --git a/haruhi_dl/extractor/mastodon.py b/haruhi_dl/extractor/mastodon.py new file mode 100644 index 000000000..ff50e35ee --- /dev/null +++ b/haruhi_dl/extractor/mastodon.py @@ -0,0 +1,149 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import SelfhostedInfoExtractor + +from ..utils import ( + clean_html, + str_or_none, + ExtractorError, +) + +import re + + +class MastodonSHIE(SelfhostedInfoExtractor): + """ + This extractor is for services implementing the Mastodon API, not just Mastodon + Supported services (possibly more already work or could): + - Mastodon - https://github.com/tootsuite/mastodon + - Glitch (a fork of Mastodon) - https://github.com/glitch-soc/mastodon + - Pleroma - https://git.pleroma.social/pleroma/pleroma + - Gab Social (a fork of Mastodon) - https://code.gab.com/gab/social/gab-social/ + """ + IE_NAME = 'mastodon' + _VALID_URL = r'mastodon:(?P[^:]+):(?P.+)' + _SH_VALID_URL = r'''(?x) + https?:// + (?P[^/\s]+)/ + (?: + # mastodon + @[a-zA-Z0-9_]+ + # gab social + |[a-zA-Z0-9_]+/posts + # mastodon legacy (?) + |users/[a-zA-Z0-9_]+/statuses + # pleroma + |notice + # pleroma (OStatus standard?) - https://git.pleroma.social/pleroma/pleroma/-/blob/e9859b68fcb9c38b2ec27a45ffe0921e8d78b5e1/lib/pleroma/web/router.ex#L607 + |objects + |activities + )/(?P[0-9a-zA-Z-]+) + ''' + _SH_VALID_CONTENT_STRINGS = ( + ',"settings":{"known_fediverse":', # Mastodon initial-state + '
  • Documentation
  • ', + 'Pleroma', + '', + 'Alternatively, try one of the native apps for Gab Social for your platform.', + ) + _SH_VALID_CONTENT_REGEXES = ( + # double quotes on Mastodon, single quotes on Gab Social + r'