haruhi-dl/haruhi_dl/extractor/videotarget.py
2021-10-23 01:06:53 +02:00

60 lines
2 KiB
Python

# coding: utf-8
import base64
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
js_to_json,
)
class VideoTargetIE(InfoExtractor):
_VALID_URL = r'https?://videotarget\.pl/player/v1/content/(?P<id>[a-zA-Z\d_-]+={0,3})'
_TESTS = [{
'url': 'https://videotarget.pl/player/v1/content/eyJzaXRlIjoxMDMzLCJwbGFjZW1lbnQiOjEwNzksInRlbXBsYXRlIjoyLCJjb250ZXh0IjoxNjA2NiwidHlwZSI6ImNvbnRlbnQifQ==?type=content',
'info_dict': {
'id': '16066',
'ext': 'mp4',
'title': 'Inflacja straszy rynki finansowe, niepokoją zwłaszcza rosnące ceny namu mieszkań',
},
}]
@staticmethod
def _extract_urls(webpage, **kw):
return [mobj.group('url')
for mobj in re.finditer(
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>%s(?:\?[^#]+)?(?:\#.+?)?)\1' % VideoTargetIE._VALID_URL,
webpage)
] + ['https://videotarget.pl/player/v1/content/' + mobj.group('vtid')
for mobj in re.finditer(
r'<div\b[^>]+?data-vt=(["\'])(?P<vtid>[a-zA-Z\d_-]+={0,3})\1',
webpage)]
def _real_extract(self, url):
b64_json_ident = self._match_id(url)
ident = self._parse_json(
base64.urlsafe_b64decode(b64_json_ident), b64_json_ident)
video_id = str(ident['context'])
webpage = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<title>(.+?) - videotarget</title>', webpage, 'video title')
formats = []
for qual in re.finditer(r'(?s)videoQualities\.push\(({.+?})\);', webpage):
qual = self._parse_json(qual.group(1), video_id, js_to_json)
formats.append({
'height': int_or_none(qual['label'][:-1]),
'url': qual['src'].replace('{ext}', 'mp4'),
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'formats': formats,
}