Compare commits
8 commits
Author | SHA1 | Date | |
---|---|---|---|
2f375d447c | |||
d464b29113 | |||
19602fb3f5 | |||
a550e21b8c | |||
1ae67712e8 | |||
Dominika Liberda | a96bf110da | ||
973652cf4d | |||
d81137a604 |
|
@ -1,5 +1,6 @@
|
|||
default:
|
||||
before_script:
|
||||
- sed -i "s@dl-cdn.alpinelinux.org@alpine.sakamoto.pl@g" /etc/apk/repositories
|
||||
- apk add bash
|
||||
- pip install nose
|
||||
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
version 2021.08.01
|
||||
Extractor
|
||||
* [youtube] fixed agegate
|
||||
* [niconico] dmc downloader from youtube-dlp
|
||||
* [peertube] new URL schemas
|
||||
|
||||
version 2021.06.20
|
||||
Core
|
||||
* [playwright] fixed headlessness
|
||||
|
|
|
@ -1513,6 +1513,7 @@ from .videomore import (
|
|||
)
|
||||
from .videopress import VideoPressIE
|
||||
from .videotarget import VideoTargetIE
|
||||
from .vider import ViderIE
|
||||
from .vidio import VidioIE
|
||||
from .vidlii import VidLiiIE
|
||||
from .vidme import (
|
||||
|
|
|
@ -8,6 +8,7 @@ from .common import InfoExtractor
|
|||
from ..utils import (
|
||||
int_or_none,
|
||||
url_or_none,
|
||||
ExtractorError,
|
||||
)
|
||||
|
||||
|
||||
|
@ -79,7 +80,11 @@ class IplaIE(InfoExtractor):
|
|||
'Content-type': 'application/json'
|
||||
}
|
||||
|
||||
res = self._download_json('http://b2c-mobile.redefine.pl/rpc/navigation/', media_id, data=req, headers=headers)
|
||||
res = self._download_json('https://b2c-mobile.redefine.pl/rpc/navigation/', media_id, data=req, headers=headers)
|
||||
if not res.get('result'):
|
||||
if res['error']['code'] == 13404:
|
||||
raise ExtractorError('Video requires DRM protection', expected=True)
|
||||
raise ExtractorError(f"Ipla said: {res['error']['message']} - {res['error']['data']['userMessage']}")
|
||||
return res['result']['mediaItem']
|
||||
|
||||
def get_url(self, media_id, source_id):
|
||||
|
@ -93,4 +98,6 @@ class IplaIE(InfoExtractor):
|
|||
}
|
||||
|
||||
res = self._download_json('https://b2c-mobile.redefine.pl/rpc/drm/', media_id, data=req, headers=headers)
|
||||
if not res.get('result'):
|
||||
raise ExtractorError(f"Ipla said: {res['error']['message']} - {res['error']['data']['userMessage']}")
|
||||
return res['result']['url']
|
||||
|
|
|
@ -21,7 +21,7 @@ from ..utils import (
|
|||
|
||||
|
||||
class PeerTubeBaseExtractor(SelfhostedInfoExtractor):
|
||||
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
|
||||
_UUID_RE = r'[\da-zA-Z]{22}|[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
|
||||
_API_BASE = 'https://%s/api/v1/%s/%s/%s'
|
||||
_SH_VALID_CONTENT_STRINGS = (
|
||||
'<title>PeerTube<',
|
||||
|
@ -180,16 +180,16 @@ class PeerTubeBaseExtractor(SelfhostedInfoExtractor):
|
|||
|
||||
class PeerTubeSHIE(PeerTubeBaseExtractor):
|
||||
_VALID_URL = r'peertube:(?P<host>[^:]+):(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
|
||||
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)|api/v\d/videos)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
|
||||
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)|api/v\d/videos|w)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
|
||||
'md5': '9bed8c0137913e17b86334e5885aacff',
|
||||
'md5': '8563064d245a4be5705bddb22bb00a28',
|
||||
'info_dict': {
|
||||
'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
|
||||
'ext': 'mp4',
|
||||
'title': 'What is PeerTube?',
|
||||
'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
|
||||
'description': 'md5:96adbaf219b4d41747bfc5937df0b017',
|
||||
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||
'timestamp': 1538391166,
|
||||
'upload_date': '20181001',
|
||||
|
@ -220,6 +220,27 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):
|
|||
'upload_date': '20200420',
|
||||
'uploader': 'Drew DeVault',
|
||||
}
|
||||
}, {
|
||||
# new url scheme since PeerTube 3.3
|
||||
'url': 'https://peertube2.cpy.re/w/3fbif9S3WmtTP8gGsC5HBd',
|
||||
'info_dict': {
|
||||
'id': '122d093a-1ede-43bd-bd34-59d2931ffc5e',
|
||||
'ext': 'mp4',
|
||||
'title': 'E2E tests',
|
||||
'uploader_id': '37855',
|
||||
'timestamp': 1589276219,
|
||||
'upload_date': '20200512',
|
||||
'uploader': 'chocobozzz',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://peertube2.cpy.re/w/122d093a-1ede-43bd-bd34-59d2931ffc5e',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://peertube2.cpy.re/api/v1/videos/3fbif9S3WmtTP8gGsC5HBd',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'peertube:peertube2.cpy.re:3fbif9S3WmtTP8gGsC5HBd',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
|
||||
'only_matching': True,
|
||||
|
@ -289,7 +310,7 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):
|
|||
|
||||
description = None
|
||||
if webpage:
|
||||
description = self._og_search_description(webpage)
|
||||
description = self._og_search_description(webpage, default=None)
|
||||
if not description:
|
||||
full_description = self._call_api(
|
||||
host, 'videos', video_id, 'description', note='Downloading description JSON',
|
||||
|
@ -305,7 +326,7 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):
|
|||
|
||||
class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
|
||||
_VALID_URL = r'peertube:playlist:(?P<host>[^:]+):(?P<id>.+)'
|
||||
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)/playlist|api/v\d/video-playlists)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
|
||||
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)/playlist|api/v\d/video-playlists|w/p)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://video.internet-czas-dzialac.pl/videos/watch/playlist/3c81b894-acde-4539-91a2-1748b208c14c?playlistPosition=1',
|
||||
|
@ -316,6 +337,9 @@ class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
|
|||
'uploader': 'Internet. Czas działać!',
|
||||
},
|
||||
'playlist_mincount': 14,
|
||||
}, {
|
||||
'url': 'https://peertube2.cpy.re/w/p/hrAdcvjkMMkHJ28upnoN21',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _selfhosted_extract(self, url, webpage=None):
|
||||
|
@ -352,18 +376,21 @@ class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
|
|||
|
||||
class PeerTubeChannelSHIE(PeerTubeBaseExtractor):
|
||||
_VALID_URL = r'peertube:channel:(?P<host>[^:]+):(?P<id>.+)'
|
||||
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:api/v\d/)?video-channels/(?P<id>[^/?#]+)(?:/videos)?'
|
||||
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:(?:api/v\d/)?video-channels|c)/(?P<id>[^/?#]+)(?:/videos)?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://video.internet-czas-dzialac.pl/video-channels/internet_czas_dzialac/videos',
|
||||
'info_dict': {
|
||||
'id': '2',
|
||||
'title': 'internet_czas_dzialac',
|
||||
'description': 'md5:4d2e215ea0d9ae4501a556ef6e9a5308',
|
||||
'title': 'Internet. Czas działać!',
|
||||
'description': 'md5:ac35d70f6625b04b189e0b4b76e62e17',
|
||||
'uploader_id': 3,
|
||||
'uploader': 'Internet. Czas działać!',
|
||||
},
|
||||
'playlist_mincount': 14,
|
||||
}, {
|
||||
'url': 'https://video.internet-czas-dzialac.pl/c/internet_czas_dzialac',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _selfhosted_extract(self, url, webpage=None):
|
||||
|
@ -401,18 +428,21 @@ class PeerTubeChannelSHIE(PeerTubeBaseExtractor):
|
|||
|
||||
class PeerTubeAccountSHIE(PeerTubeBaseExtractor):
|
||||
_VALID_URL = r'peertube:account:(?P<host>[^:]+):(?P<id>.+)'
|
||||
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:api/v\d/)?accounts/(?P<id>[^/?#]+)(?:/video(?:s|-channels))?'
|
||||
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:(?:api/v\d/)?accounts|a)/(?P<id>[^/?#]+)(?:/video(?:s|-channels))?'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://video.internet-czas-dzialac.pl/accounts/icd/video-channels',
|
||||
'info_dict': {
|
||||
'id': '3',
|
||||
'description': 'md5:ab3c9b934dd39030eea1c9fe76079870',
|
||||
'description': 'md5:ac35d70f6625b04b189e0b4b76e62e17',
|
||||
'uploader': 'Internet. Czas działać!',
|
||||
'title': 'Internet. Czas działać!',
|
||||
'uploader_id': 3,
|
||||
},
|
||||
'playlist_mincount': 14,
|
||||
}, {
|
||||
'url': 'https://video.internet-czas-dzialac.pl/a/icd',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _selfhosted_extract(self, url, webpage=None):
|
||||
|
|
|
@ -91,6 +91,14 @@ class PolskieRadioIE(PolskieRadioBaseExtractor):
|
|||
'upload_date': '20201116',
|
||||
},
|
||||
}]
|
||||
}, {
|
||||
# PR4 audition - other frontend
|
||||
'url': 'https://www.polskieradio.pl/10/6071/Artykul/2610977,Poglos-29-pazdziernika-godz-2301',
|
||||
'info_dict': {
|
||||
'id': '2610977',
|
||||
'ext': 'mp3',
|
||||
'title': 'Pogłos 29 października godz. 23:01',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis',
|
||||
'only_matching': True,
|
||||
|
@ -113,24 +121,34 @@ class PolskieRadioIE(PolskieRadioBaseExtractor):
|
|||
|
||||
content = self._search_regex(
|
||||
r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>',
|
||||
webpage, 'content')
|
||||
webpage, 'content', default=None)
|
||||
|
||||
timestamp = unified_timestamp(self._html_search_regex(
|
||||
r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
|
||||
webpage, 'timestamp', fatal=False))
|
||||
webpage, 'timestamp', default=None))
|
||||
|
||||
thumbnail_url = self._og_search_thumbnail(webpage)
|
||||
thumbnail_url = self._og_search_thumbnail(webpage, default=None)
|
||||
|
||||
title = self._og_search_title(webpage).strip()
|
||||
|
||||
description = strip_or_none(self._og_search_description(webpage, default=None))
|
||||
|
||||
if not content:
|
||||
return {
|
||||
'id': playlist_id,
|
||||
'url': 'https:' + self._search_regex(r"source:\s*'(//static\.prsa\.pl/[^']+)'", webpage, 'audition record url'),
|
||||
'title': title,
|
||||
'description': description,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': thumbnail_url,
|
||||
}
|
||||
|
||||
entries = self._extract_webpage_player_entries(content, playlist_id, {
|
||||
'title': title,
|
||||
'timestamp': timestamp,
|
||||
'thumbnail': thumbnail_url,
|
||||
})
|
||||
|
||||
description = strip_or_none(self._og_search_description(webpage))
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
||||
|
||||
|
||||
|
|
37
haruhi_dl/extractor/vider.py
Normal file
37
haruhi_dl/extractor/vider.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class ViderIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://vider\.(?:pl|info)/(?:vid/\+f|embed/video/)(?P<id>[a-z\d]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://vider.info/vid/+fsx51se',
|
||||
'info_dict': {
|
||||
'id': 'sx51se',
|
||||
'ext': 'mp4',
|
||||
'title': 'Big Buck Bunny',
|
||||
'upload_date': '20210906',
|
||||
'timestamp': 1630927351,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(f'https://vider.info/vid/+f{video_id}', video_id)
|
||||
|
||||
json_ld = self._parse_json(
|
||||
self._search_regex(
|
||||
r'(?s)<script type="application/ld\+json">(.+?)</script>',
|
||||
webpage, 'JSON-LD'), video_id)
|
||||
info_dict = self._json_ld(json_ld, video_id)
|
||||
# generated SEO junk
|
||||
info_dict['description'] = None
|
||||
info_dict['id'] = video_id
|
||||
info_dict['formats'] = [{
|
||||
'url': self._search_regex(r'\?file=(.+)', json_ld['embedUrl'], 'video url'),
|
||||
'http_headers': {
|
||||
'Referer': 'https://vider.info/',
|
||||
},
|
||||
}]
|
||||
|
||||
return info_dict
|
|
@ -1441,29 +1441,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
|
||||
or re.search(r'player-age-gate-content">', video_webpage) is not None):
|
||||
age_gate = True
|
||||
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
||||
# this can be viewed without login into Youtube
|
||||
data = compat_urllib_parse_urlencode({
|
||||
'video_id': video_id,
|
||||
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
||||
'html5': 1,
|
||||
'c': 'TVHTML5',
|
||||
'cver': '6.20180913',
|
||||
})
|
||||
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
||||
try:
|
||||
video_info_webpage = self._download_webpage(
|
||||
video_info_url, video_id,
|
||||
note='Downloading age-gated video info',
|
||||
yti1_player = self._download_webpage(
|
||||
proto + '://www.youtube.com/youtubei/v1/player', video_id,
|
||||
headers={
|
||||
'User-Agent': 'Mozilla/5.0 (SMART-TV; Linux; Tizen 4.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.0 Safari/537.36',
|
||||
'Content-Type': 'application/json',
|
||||
'X-Goog-Api-Key': self._YOUTUBE_API_KEY,
|
||||
},
|
||||
data=bytes(json.dumps({
|
||||
'context': {
|
||||
'client': {
|
||||
'clientName': 'WEB',
|
||||
'clientVersion': '2.20210721.00.00',
|
||||
'clientScreen': 'EMBED',
|
||||
},
|
||||
},
|
||||
'videoId': video_id,
|
||||
}).encode('utf-8')),
|
||||
note='Downloading age-gated player info',
|
||||
errnote='unable to download video info')
|
||||
except ExtractorError:
|
||||
video_info_webpage = None
|
||||
if video_info_webpage:
|
||||
video_info = compat_parse_qs(video_info_webpage)
|
||||
pl_response = video_info.get('player_response', [None])[0]
|
||||
player_response = extract_player_response(pl_response, video_id)
|
||||
yti1_player = None
|
||||
if yti1_player:
|
||||
player_response = extract_player_response(yti1_player, video_id)
|
||||
add_dash_mpd(video_info)
|
||||
view_count = extract_view_count(video_info)
|
||||
view_count = extract_view_count(video_id)
|
||||
else:
|
||||
age_gate = False
|
||||
# Try looking directly into the video webpage
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
__version__ = '2021.06.24.1'
|
||||
__version__ = '2021.08.01'
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(__version__)
|
||||
|
|
Loading…
Reference in a new issue