Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
haruhi-dl
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
21
Issues
21
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Laura x Dominika x Patrycja
haruhi-dl
Commits
4221c2ee
Commit
4221c2ee
authored
Dec 12, 2020
by
Lauren Liberda
1
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
funkwhale extractors
parent
b2e1200c
Pipeline
#167
passed with stage
in 21 minutes and 12 seconds
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
307 additions
and
0 deletions
+307
-0
haruhi_dl/extractor/extractors.py
haruhi_dl/extractor/extractors.py
+7
-0
haruhi_dl/extractor/funkwhale.py
haruhi_dl/extractor/funkwhale.py
+300
-0
No files found.
haruhi_dl/extractor/extractors.py
View file @
4221c2ee
...
...
@@ -403,6 +403,13 @@ from .frontendmasters import (
)
from
.funimation
import
FunimationIE
from
.funk
import
FunkIE
from
.funkwhale
import
(
FunkwhaleAlbumSHIE
,
FunkwhaleArtistSHIE
,
FunkwhaleChannelSHIE
,
FunkwhalePlaylistSHIE
,
FunkwhaleTrackSHIE
,
)
from
.fusion
import
FusionIE
from
.fxnetworks
import
FXNetworksIE
from
.gaia
import
GaiaIE
...
...
haruhi_dl/extractor/funkwhale.py
0 → 100644
View file @
4221c2ee
# coding: utf-8
from
__future__
import
unicode_literals
from
.common
import
SelfhostedInfoExtractor
from
..utils
import
(
compat_str
,
compat_urllib_parse_urlencode
,
try_get
,
parse_iso8601
,
)
class
FunkwhaleBaseExtractor
(
SelfhostedInfoExtractor
):
_SH_VALID_CONTENT_STRINGS
=
(
"<noscript><strong>We're sorry but Funkwhale doesn't work"
,
)
def
_call_api
(
self
,
host
,
method
,
params
,
vis_id
,
note
=
'Downloading JSON metadata'
):
# basic querystring handling
qs
=
''
if
isinstance
(
params
,
dict
):
qs
=
compat_urllib_parse_urlencode
(
params
)
return
self
.
_download_json
(
'https://%s/api/v1/%s?%s'
%
(
host
,
method
,
qs
),
vis_id
,
note
)
def
_cover_to_thumbnails
(
self
,
cover_data
):
if
cover_data
is
None
:
return
cover_data
thumbnails
=
[{
'url'
:
cover_data
[
'urls'
][
'original'
],
'filesize'
:
cover_data
[
'size'
],
'preference'
:
500
,
}]
for
quality
in
(
'large_square_crop'
,
'medium_square_crop'
):
if
cover_data
[
'urls'
].
get
(
quality
):
thumbnails
.
append
({
'url'
:
cover_data
[
'urls'
][
quality
],
})
return
thumbnails
def
_track_data_to_entry
(
self
,
track_data
,
host
):
formats
=
[]
for
upload
in
track_data
.
get
(
'uploads'
)
or
():
formats
.
append
({
'url'
:
'https://%s%s'
%
(
host
,
upload
[
'listen_url'
]),
'ext'
:
upload
[
'extension'
],
'abr'
:
upload
[
'bitrate'
],
'filesize'
:
upload
[
'size'
],
})
channel_data
=
track_data
.
get
(
'artist'
,
{})
info_dict
=
{
'id'
:
compat_str
(
track_data
[
'id'
]),
'formats'
:
formats
,
'title'
:
track_data
[
'title'
],
'description'
:
try_get
(
track_data
,
lambda
x
:
x
[
'description'
][
'text'
],
compat_str
),
'channel'
:
channel_data
.
get
(
'name'
),
'channel_url'
:
'https://%s/library/artists/%d/'
%
(
host
,
channel_data
.
get
(
'id'
))
if
isinstance
(
channel_data
.
get
(
'id'
),
int
)
else
None
,
'thumbnails'
:
self
.
_cover_to_thumbnails
(
try_get
(
track_data
,
(
lambda
x
:
x
[
'cover'
],
lambda
x
:
x
[
'album'
][
'cover'
],
),
dict
)),
'duration'
:
try_get
(
track_data
,
lambda
x
:
[
'uploads'
][
0
][
'duration'
],
int
),
'timestamp'
:
parse_iso8601
(
track_data
.
get
(
'creation_date'
)),
'view_count'
:
track_data
.
get
(
'downloads_count'
),
'license'
:
track_data
.
get
(
'license'
),
'tags'
:
track_data
.
get
(
'tags'
),
}
info_dict
.
update
(
self
.
_uploader_data_to_info_dict
(
track_data
.
get
(
'attributed_to'
)))
return
info_dict
def
_uploader_data_to_info_dict
(
self
,
uploader_data
):
if
uploader_data
is
None
:
return
{}
return
{
'uploader'
:
uploader_data
.
get
(
'name'
),
'uploader_url'
:
'https://%s/@%s'
%
(
uploader_data
.
get
(
'domain'
),
uploader_data
.
get
(
'preferred_username'
)),
}
class
FunkwhaleTrackSHIE
(
FunkwhaleBaseExtractor
):
IE_NAME
=
'funkwhale:track'
_VALID_URL
=
r'funkwhale:track:(?P<host>[^:]+):(?P<id>.+)'
_SH_VALID_URL
=
r'https?://(?P<host>[^/]+)/library/tracks/(?P<id>\d+)'
_TESTS
=
[{
'url'
:
'https://podcast.midline.pl/library/tracks/10/'
,
'info_dict'
:
{
'id'
:
'10'
,
'ext'
:
'mp3'
,
'uploader'
:
'Internet. Czas działać!'
,
'title'
:
'#0 - Podcast "Internet. Czas działać! | Trailer'
,
'description'
:
'"Internet. Czas działać!" to podcast, z którego dowiecie się, jak internetowe technologie wpływają na społeczeństwo i jak być ich świadomym konsumentem.'
,
'upload_date'
:
'20201207'
,
'timestamp'
:
1607301944
,
},
}]
def
_selfhosted_extract
(
self
,
url
,
webpage
=
None
):
host
,
vis_id
=
self
.
_match_id_and_host
(
url
)
track_data
=
self
.
_call_api
(
host
,
'tracks/%s'
%
vis_id
,
None
,
vis_id
)
return
self
.
_track_data_to_entry
(
track_data
,
host
)
class
FunkwhaleArtistSHIE
(
FunkwhaleBaseExtractor
):
IE_NAME
=
'funkwhale:artist'
_VALID_URL
=
r'funkwhale:artist:(?P<host>[^:]+):(?P<id>.+)'
_SH_VALID_URL
=
r'https?://(?P<host>[^/]+)/library/artists/(?P<id>[\w-]+)'
_TESTS
=
[{
'url'
:
'https://open.audio/library/artists/13556/'
,
'info_dict'
:
{
'id'
:
'13556'
,
'title'
:
'Violons_Populaires_en_Nouvelle_Aquitaine'
,
'uploader'
:
'Violons_Populaires_en_Nouvelle_Aquitaine'
,
},
'playlist_mincount'
:
38
,
# 77 tracks, but just 38 of them are playable 🤷♀️
}]
def
_selfhosted_extract
(
self
,
url
,
webpage
=
None
):
host
,
vis_id
=
self
.
_match_id_and_host
(
url
)
artist_data
=
self
.
_call_api
(
host
,
'artists/%s'
%
vis_id
,
None
,
vis_id
)
# the same is done on the frontend
# https://dev.funkwhale.audio/funkwhale/funkwhale/-/blob/89037a76/front/src/components/library/ArtistBase.vue#L189
if
artist_data
.
get
(
'channel'
):
return
self
.
url_result
(
'funkwhale:channel:%s:%s'
%
(
host
,
artist_data
[
'channel'
][
'uuid'
]),
ie
=
'FunkwhaleChannelSH'
)
tracks_data
=
self
.
_call_api
(
host
,
'tracks'
,
{
'artist'
:
vis_id
,
'hidden'
:
''
,
'playable'
:
'true'
,
},
vis_id
,
'Downloading track list'
)
tracks
=
tracks_data
[
'results'
]
page
=
1
while
tracks_data
.
get
(
'next'
)
is
not
None
:
page
+=
1
tracks_data
=
tracks_data
=
self
.
_call_api
(
host
,
'tracks'
,
{
'artist'
:
vis_id
,
'hidden'
:
''
,
'playable'
:
'true'
,
'page'
:
page
,
},
vis_id
,
'Downloading track list (page #%d)'
%
page
)
tracks
.
extend
(
tracks_data
[
'results'
])
entries
=
[
self
.
_track_data_to_entry
(
track
,
host
)
for
track
in
tracks
]
info_dict
=
{
'_type'
:
'playlist'
,
'id'
:
vis_id
,
'entries'
:
entries
,
'title'
:
artist_data
[
'attributed_to'
].
get
(
'name'
),
}
info_dict
.
update
(
self
.
_uploader_data_to_info_dict
(
artist_data
[
'attributed_to'
]))
return
info_dict
class
FunkwhaleChannelSHIE
(
FunkwhaleBaseExtractor
):
IE_NAME
=
'funkwhale:channel'
_VALID_URL
=
r'funkwhale:channel:(?P<host>[^:]+):(?P<id>.+)'
_SH_VALID_URL
=
r'https?://(?P<host>[^/]+)/channels/(?P<id>[\w-]+)'
_TESTS
=
[{
'url'
:
'https://podcast.midline.pl/channels/Midline/'
,
'info_dict'
:
{
'id'
:
'd98ae7a5-5bd5-48c8-a178-a9a12e84cfc7'
,
},
'playlist_mincount'
:
9
,
},
{
'url'
:
'https://podcast.midline.pl/channels/d98ae7a5-5bd5-48c8-a178-a9a12e84cfc7/'
,
'only_matching'
:
True
,
}]
def
_selfhosted_extract
(
self
,
url
,
webpage
=
None
):
host
,
vis_id
=
self
.
_match_id_and_host
(
url
)
channel_data
=
self
.
_call_api
(
host
,
'channels/%s'
%
vis_id
,
None
,
vis_id
)
uuid
=
channel_data
[
'uuid'
]
tracks_data
=
self
.
_call_api
(
host
,
'tracks'
,
{
'channel'
:
uuid
,
'include_channels'
:
'true'
,
'playable'
:
'true'
,
},
uuid
,
'Downloading track list'
)
tracks
=
tracks_data
[
'results'
]
page
=
1
while
tracks_data
.
get
(
'next'
)
is
not
None
:
page
+=
1
tracks_data
=
tracks_data
=
self
.
_call_api
(
host
,
'tracks'
,
{
'channel'
:
uuid
,
'include_channels'
:
'true'
,
'playable'
:
'true'
,
'page'
:
page
,
},
vis_id
,
'Downloading track list (page #%d)'
%
page
)
tracks
.
extend
(
tracks_data
[
'results'
])
entries
=
[
self
.
_track_data_to_entry
(
track
,
host
)
for
track
in
tracks
]
info_dict
=
{
'_type'
:
'playlist'
,
'id'
:
uuid
,
'title'
:
channel_data
[
'attributed_to'
].
get
(
'name'
),
'entries'
:
entries
,
}
info_dict
.
update
(
self
.
_uploader_data_to_info_dict
(
channel_data
[
'attributed_to'
]))
return
info_dict
class
FunkwhalePlaylistSHIE
(
FunkwhaleBaseExtractor
):
IE_NAME
=
'funkwhale:playlist'
_VALID_URL
=
r'funkwhale:playlist:(?P<host>[^:]+):(?P<id>.+)'
_SH_VALID_URL
=
r'https?://(?P<host>[^/]+)/library/playlists/(?P<id>\d+)'
_TESTS
=
[{
'url'
:
'https://open.audio/library/playlists/268'
,
'info_dict'
:
{
'id'
:
'268'
,
'title'
:
'Cleaning'
,
'uploader'
:
'trash'
,
},
'playlist_mincount'
:
180
,
}]
def
_selfhosted_extract
(
self
,
url
,
webpage
=
None
):
host
,
vis_id
=
self
.
_match_id_and_host
(
url
)
playlist_data
=
self
.
_call_api
(
host
,
'playlists/%s'
%
vis_id
,
None
,
vis_id
)
tracks_data
=
self
.
_call_api
(
host
,
'playlists/%s/tracks'
%
vis_id
,
{
'playable'
:
'true'
,
},
vis_id
,
'Downloading track list'
)
entries
=
[
self
.
_track_data_to_entry
(
track
.
get
(
'track'
),
host
)
for
track
in
tracks_data
[
'results'
]]
info_dict
=
{
'_type'
:
'playlist'
,
'id'
:
vis_id
,
'title'
:
playlist_data
[
'name'
],
'entries'
:
entries
,
}
info_dict
.
update
(
self
.
_uploader_data_to_info_dict
(
playlist_data
.
get
(
'actor'
)))
return
info_dict
class
FunkwhaleAlbumSHIE
(
FunkwhaleBaseExtractor
):
IE_NAME
=
'funkwhale:album'
_VALID_URL
=
r'funkwhale:album:(?P<host>[^:]+):(?P<id>.+)'
_SH_VALID_URL
=
r'https?://(?P<host>[^/]+)/library/albums/(?P<id>\d+)'
_TESTS
=
[{
'url'
:
'https://open.audio/library/albums/5623/'
,
'info_dict'
:
{
'id'
:
'5623'
,
'title'
:
'Volume 5'
,
},
'playlist_mincount'
:
115
,
}]
def
_selfhosted_extract
(
self
,
url
,
webpage
=
None
):
host
,
vis_id
=
self
.
_match_id_and_host
(
url
)
album_data
=
self
.
_call_api
(
host
,
'albums/%s'
%
vis_id
,
None
,
vis_id
)
tracks_data
=
self
.
_call_api
(
host
,
'tracks'
,
{
'ordering'
:
'disc_number,position'
,
'album'
:
vis_id
,
'include_channels'
:
'true'
,
'playable'
:
'true'
,
},
vis_id
,
'Downloading track list'
)
tracks
=
tracks_data
[
'results'
]
page
=
1
while
tracks_data
.
get
(
'next'
)
is
not
None
:
page
+=
1
tracks_data
=
tracks_data
=
self
.
_call_api
(
host
,
'tracks'
,
{
'ordering'
:
'disc_number,position'
,
'album'
:
vis_id
,
'include_channels'
:
'true'
,
'playable'
:
'true'
,
'page'
:
page
,
},
vis_id
,
'Downloading track list (page #%d)'
%
page
)
tracks
.
extend
(
tracks_data
[
'results'
])
entries
=
[
self
.
_track_data_to_entry
(
track
,
host
)
for
track
in
tracks
]
thumbnails
=
self
.
_cover_to_thumbnails
(
album_data
.
get
(
'cover'
))
info_dict
=
{
'_type'
:
'playlist'
,
'id'
:
vis_id
,
'title'
:
album_data
[
'title'
],
'entries'
:
entries
,
'thumbnails'
:
thumbnails
,
}
return
info_dict
Lauren Liberda
@selfisekai
mentioned in issue
#17 (closed)
·
Dec 12, 2020
mentioned in issue
#17 (closed)
mentioned in issue #17
Toggle commit list
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment