Compare commits

...

8 commits

Author SHA1 Message Date
Lauren Liberda 2f375d447c fix/speedup ci 2021-09-09 12:38:11 +02:00
Lauren Liberda d464b29113 vider support 2021-09-06 22:34:06 +02:00
Lauren Liberda 19602fb3f5 [polskieradio] fix PR4 audition shit 2021-08-31 20:25:12 +02:00
Lauren Liberda a550e21b8c [ipla] state the DRM requirement clearly 2021-08-07 02:23:28 +02:00
Lauren Liberda 1ae67712e8 [ipla] error handling 2021-08-07 01:08:07 +02:00
Dominika Liberda a96bf110da * version 2021.08.01 2021-08-01 17:44:07 +02:00
Lauren Liberda 973652cf4d [youtube] fix age gate for *some* videos 2021-08-01 17:39:30 +02:00
Lauren Liberda d81137a604 [peertube] pt 3.3+ url scheme support, fix tests, minor fixes 2021-07-30 20:40:19 +02:00
9 changed files with 140 additions and 37 deletions

View file

@ -1,5 +1,6 @@
default:
before_script:
- sed -i "s@dl-cdn.alpinelinux.org@alpine.sakamoto.pl@g" /etc/apk/repositories
- apk add bash
- pip install nose

View file

@ -1,3 +1,9 @@
version 2021.08.01
Extractor
* [youtube] fixed agegate
* [niconico] dmc downloader from youtube-dlp
* [peertube] new URL schemas
version 2021.06.20
Core
* [playwright] fixed headlessness

View file

@ -1513,6 +1513,7 @@ from .videomore import (
)
from .videopress import VideoPressIE
from .videotarget import VideoTargetIE
from .vider import ViderIE
from .vidio import VidioIE
from .vidlii import VidLiiIE
from .vidme import (

View file

@ -8,6 +8,7 @@ from .common import InfoExtractor
from ..utils import (
int_or_none,
url_or_none,
ExtractorError,
)
@ -79,7 +80,11 @@ class IplaIE(InfoExtractor):
'Content-type': 'application/json'
}
res = self._download_json('http://b2c-mobile.redefine.pl/rpc/navigation/', media_id, data=req, headers=headers)
res = self._download_json('https://b2c-mobile.redefine.pl/rpc/navigation/', media_id, data=req, headers=headers)
if not res.get('result'):
if res['error']['code'] == 13404:
raise ExtractorError('Video requires DRM protection', expected=True)
raise ExtractorError(f"Ipla said: {res['error']['message']} - {res['error']['data']['userMessage']}")
return res['result']['mediaItem']
def get_url(self, media_id, source_id):
@ -93,4 +98,6 @@ class IplaIE(InfoExtractor):
}
res = self._download_json('https://b2c-mobile.redefine.pl/rpc/drm/', media_id, data=req, headers=headers)
if not res.get('result'):
raise ExtractorError(f"Ipla said: {res['error']['message']} - {res['error']['data']['userMessage']}")
return res['result']['url']

View file

@ -21,7 +21,7 @@ from ..utils import (
class PeerTubeBaseExtractor(SelfhostedInfoExtractor):
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
_UUID_RE = r'[\da-zA-Z]{22}|[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
_API_BASE = 'https://%s/api/v1/%s/%s/%s'
_SH_VALID_CONTENT_STRINGS = (
'<title>PeerTube<',
@ -180,16 +180,16 @@ class PeerTubeBaseExtractor(SelfhostedInfoExtractor):
class PeerTubeSHIE(PeerTubeBaseExtractor):
_VALID_URL = r'peertube:(?P<host>[^:]+):(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)|api/v\d/videos)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)|api/v\d/videos|w)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
_TESTS = [{
'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
'md5': '9bed8c0137913e17b86334e5885aacff',
'md5': '8563064d245a4be5705bddb22bb00a28',
'info_dict': {
'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
'ext': 'mp4',
'title': 'What is PeerTube?',
'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
'description': 'md5:96adbaf219b4d41747bfc5937df0b017',
'thumbnail': r're:https?://.*\.(?:jpg|png)',
'timestamp': 1538391166,
'upload_date': '20181001',
@ -220,6 +220,27 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):
'upload_date': '20200420',
'uploader': 'Drew DeVault',
}
}, {
# new url scheme since PeerTube 3.3
'url': 'https://peertube2.cpy.re/w/3fbif9S3WmtTP8gGsC5HBd',
'info_dict': {
'id': '122d093a-1ede-43bd-bd34-59d2931ffc5e',
'ext': 'mp4',
'title': 'E2E tests',
'uploader_id': '37855',
'timestamp': 1589276219,
'upload_date': '20200512',
'uploader': 'chocobozzz',
},
}, {
'url': 'https://peertube2.cpy.re/w/122d093a-1ede-43bd-bd34-59d2931ffc5e',
'only_matching': True,
}, {
'url': 'https://peertube2.cpy.re/api/v1/videos/3fbif9S3WmtTP8gGsC5HBd',
'only_matching': True,
}, {
'url': 'peertube:peertube2.cpy.re:3fbif9S3WmtTP8gGsC5HBd',
'only_matching': True,
}, {
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
'only_matching': True,
@ -289,7 +310,7 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):
description = None
if webpage:
description = self._og_search_description(webpage)
description = self._og_search_description(webpage, default=None)
if not description:
full_description = self._call_api(
host, 'videos', video_id, 'description', note='Downloading description JSON',
@ -305,7 +326,7 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):
class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
_VALID_URL = r'peertube:playlist:(?P<host>[^:]+):(?P<id>.+)'
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)/playlist|api/v\d/video-playlists)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)/playlist|api/v\d/video-playlists|w/p)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
_TESTS = [{
'url': 'https://video.internet-czas-dzialac.pl/videos/watch/playlist/3c81b894-acde-4539-91a2-1748b208c14c?playlistPosition=1',
@ -316,6 +337,9 @@ class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
'uploader': 'Internet. Czas działać!',
},
'playlist_mincount': 14,
}, {
'url': 'https://peertube2.cpy.re/w/p/hrAdcvjkMMkHJ28upnoN21',
'only_matching': True,
}]
def _selfhosted_extract(self, url, webpage=None):
@ -352,18 +376,21 @@ class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
class PeerTubeChannelSHIE(PeerTubeBaseExtractor):
_VALID_URL = r'peertube:channel:(?P<host>[^:]+):(?P<id>.+)'
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:api/v\d/)?video-channels/(?P<id>[^/?#]+)(?:/videos)?'
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:(?:api/v\d/)?video-channels|c)/(?P<id>[^/?#]+)(?:/videos)?'
_TESTS = [{
'url': 'https://video.internet-czas-dzialac.pl/video-channels/internet_czas_dzialac/videos',
'info_dict': {
'id': '2',
'title': 'internet_czas_dzialac',
'description': 'md5:4d2e215ea0d9ae4501a556ef6e9a5308',
'title': 'Internet. Czas działać!',
'description': 'md5:ac35d70f6625b04b189e0b4b76e62e17',
'uploader_id': 3,
'uploader': 'Internet. Czas działać!',
},
'playlist_mincount': 14,
}, {
'url': 'https://video.internet-czas-dzialac.pl/c/internet_czas_dzialac',
'only_matching': True,
}]
def _selfhosted_extract(self, url, webpage=None):
@ -401,18 +428,21 @@ class PeerTubeChannelSHIE(PeerTubeBaseExtractor):
class PeerTubeAccountSHIE(PeerTubeBaseExtractor):
_VALID_URL = r'peertube:account:(?P<host>[^:]+):(?P<id>.+)'
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:api/v\d/)?accounts/(?P<id>[^/?#]+)(?:/video(?:s|-channels))?'
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:(?:api/v\d/)?accounts|a)/(?P<id>[^/?#]+)(?:/video(?:s|-channels))?'
_TESTS = [{
'url': 'https://video.internet-czas-dzialac.pl/accounts/icd/video-channels',
'info_dict': {
'id': '3',
'description': 'md5:ab3c9b934dd39030eea1c9fe76079870',
'description': 'md5:ac35d70f6625b04b189e0b4b76e62e17',
'uploader': 'Internet. Czas działać!',
'title': 'Internet. Czas działać!',
'uploader_id': 3,
},
'playlist_mincount': 14,
}, {
'url': 'https://video.internet-czas-dzialac.pl/a/icd',
'only_matching': True,
}]
def _selfhosted_extract(self, url, webpage=None):

View file

@ -91,6 +91,14 @@ class PolskieRadioIE(PolskieRadioBaseExtractor):
'upload_date': '20201116',
},
}]
}, {
# PR4 audition - other frontend
'url': 'https://www.polskieradio.pl/10/6071/Artykul/2610977,Poglos-29-pazdziernika-godz-2301',
'info_dict': {
'id': '2610977',
'ext': 'mp3',
'title': 'Pogłos 29 października godz. 23:01',
},
}, {
'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis',
'only_matching': True,
@ -113,24 +121,34 @@ class PolskieRadioIE(PolskieRadioBaseExtractor):
content = self._search_regex(
r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>',
webpage, 'content')
webpage, 'content', default=None)
timestamp = unified_timestamp(self._html_search_regex(
r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
webpage, 'timestamp', fatal=False))
webpage, 'timestamp', default=None))
thumbnail_url = self._og_search_thumbnail(webpage)
thumbnail_url = self._og_search_thumbnail(webpage, default=None)
title = self._og_search_title(webpage).strip()
description = strip_or_none(self._og_search_description(webpage, default=None))
if not content:
return {
'id': playlist_id,
'url': 'https:' + self._search_regex(r"source:\s*'(//static\.prsa\.pl/[^']+)'", webpage, 'audition record url'),
'title': title,
'description': description,
'timestamp': timestamp,
'thumbnail': thumbnail_url,
}
entries = self._extract_webpage_player_entries(content, playlist_id, {
'title': title,
'timestamp': timestamp,
'thumbnail': thumbnail_url,
})
description = strip_or_none(self._og_search_description(webpage))
return self.playlist_result(entries, playlist_id, title, description)

View file

@ -0,0 +1,37 @@
from .common import InfoExtractor
class ViderIE(InfoExtractor):
_VALID_URL = r'https?://vider\.(?:pl|info)/(?:vid/\+f|embed/video/)(?P<id>[a-z\d]+)'
_TESTS = [{
'url': 'https://vider.info/vid/+fsx51se',
'info_dict': {
'id': 'sx51se',
'ext': 'mp4',
'title': 'Big Buck Bunny',
'upload_date': '20210906',
'timestamp': 1630927351,
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(f'https://vider.info/vid/+f{video_id}', video_id)
json_ld = self._parse_json(
self._search_regex(
r'(?s)<script type="application/ld\+json">(.+?)</script>',
webpage, 'JSON-LD'), video_id)
info_dict = self._json_ld(json_ld, video_id)
# generated SEO junk
info_dict['description'] = None
info_dict['id'] = video_id
info_dict['formats'] = [{
'url': self._search_regex(r'\?file=(.+)', json_ld['embedUrl'], 'video url'),
'http_headers': {
'Referer': 'https://vider.info/',
},
}]
return info_dict

View file

@ -1441,29 +1441,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
or re.search(r'player-age-gate-content">', video_webpage) is not None):
age_gate = True
# We simulate the access to the video from www.youtube.com/v/{video_id}
# this can be viewed without login into Youtube
data = compat_urllib_parse_urlencode({
'video_id': video_id,
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
'html5': 1,
'c': 'TVHTML5',
'cver': '6.20180913',
})
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
try:
video_info_webpage = self._download_webpage(
video_info_url, video_id,
note='Downloading age-gated video info',
yti1_player = self._download_webpage(
proto + '://www.youtube.com/youtubei/v1/player', video_id,
headers={
'User-Agent': 'Mozilla/5.0 (SMART-TV; Linux; Tizen 4.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.0 Safari/537.36',
'Content-Type': 'application/json',
'X-Goog-Api-Key': self._YOUTUBE_API_KEY,
},
data=bytes(json.dumps({
'context': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20210721.00.00',
'clientScreen': 'EMBED',
},
},
'videoId': video_id,
}).encode('utf-8')),
note='Downloading age-gated player info',
errnote='unable to download video info')
except ExtractorError:
video_info_webpage = None
if video_info_webpage:
video_info = compat_parse_qs(video_info_webpage)
pl_response = video_info.get('player_response', [None])[0]
player_response = extract_player_response(pl_response, video_id)
yti1_player = None
if yti1_player:
player_response = extract_player_response(yti1_player, video_id)
add_dash_mpd(video_info)
view_count = extract_view_count(video_info)
view_count = extract_view_count(video_id)
else:
age_gate = False
# Try looking directly into the video webpage

View file

@ -1,6 +1,6 @@
from __future__ import unicode_literals
__version__ = '2021.06.24.1'
__version__ = '2021.08.01'
if __name__ == '__main__':
print(__version__)