[nova:embed] Add extractor (closes #17282)

This commit is contained in:
Sergey M․ 2018-08-22 23:18:07 +07:00
parent 4c86163b60
commit c707d2067d
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
2 changed files with 101 additions and 42 deletions

View file

@ -741,7 +741,10 @@ from .nonktube import NonkTubeIE
from .noovo import NoovoIE from .noovo import NoovoIE
from .normalboots import NormalbootsIE from .normalboots import NormalbootsIE
from .nosvideo import NosVideoIE from .nosvideo import NosVideoIE
from .nova import NovaIE from .nova import (
NovaEmbedIE,
NovaIE,
)
from .novamov import ( from .novamov import (
AuroraVidIE, AuroraVidIE,
CloudTimeIE, CloudTimeIE,

View file

@ -6,30 +6,90 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
clean_html, clean_html,
int_or_none,
js_to_json, js_to_json,
qualities,
unified_strdate, unified_strdate,
url_or_none, url_or_none,
) )
class NovaEmbedIE(InfoExtractor):
_VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
_TEST = {
'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
'md5': 'b3834f6de5401baabf31ed57456463f7',
'info_dict': {
'id': '8o0n0r',
'ext': 'mp4',
'title': '2180. díl',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 2578,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
bitrates = self._parse_json(
self._search_regex(
r'(?s)bitrates\s*=\s*({.+?})\s*;', webpage, 'formats'),
video_id, transform_source=js_to_json)
QUALITIES = ('lq', 'mq', 'hq', 'hd')
quality_key = qualities(QUALITIES)
formats = []
for format_id, format_list in bitrates.items():
if not isinstance(format_list, list):
continue
for format_url in format_list:
format_url = url_or_none(format_url)
if not format_url:
continue
f = {
'url': format_url,
}
f_id = format_id
for quality in QUALITIES:
if '%s.mp4' % quality in format_url:
f_id += '-%s' % quality
f.update({
'quality': quality_key(quality),
'format_note': quality.upper(),
})
break
f['format_id'] = f_id
formats.append(f)
self._sort_formats(formats)
title = self._og_search_title(
webpage, default=None) or self._search_regex(
(r'<value>(?P<title>[^<]+)',
r'videoTitle\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
'title', group='value')
thumbnail = self._og_search_thumbnail(
webpage, default=None) or self._search_regex(
r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
'thumbnail', fatal=False, group='value')
duration = int_or_none(self._search_regex(
r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
return {
'id': video_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'formats': formats,
}
class NovaIE(InfoExtractor): class NovaIE(InfoExtractor):
IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz' IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz'
_VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)' _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
_TESTS = [{ _TESTS = [{
'url': 'http://tvnoviny.nova.cz/clanek/novinky/co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou.html?utm_source=tvnoviny&utm_medium=cpfooter&utm_campaign=novaplus',
'info_dict': {
'id': '1608920',
'display_id': 'co-na-sebe-sportaci-praskli-vime-jestli-pujde-hrdlicka-na-materskou',
'ext': 'flv',
'title': 'Duel: Michal Hrdlička a Petr Suchoň',
'description': 'md5:d0cc509858eee1b1374111c588c6f5d5',
'thumbnail': r're:^https?://.*\.(?:jpg)',
},
'params': {
# rtmp download
'skip_download': True,
}
}, {
'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260', 'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
'md5': '1dd7b9d5ea27bc361f110cd855a19bd3', 'md5': '1dd7b9d5ea27bc361f110cd855a19bd3',
'info_dict': { 'info_dict': {
@ -40,33 +100,6 @@ class NovaIE(InfoExtractor):
'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53', 'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
'thumbnail': r're:^https?://.*\.(?:jpg)', 'thumbnail': r're:^https?://.*\.(?:jpg)',
} }
}, {
'url': 'http://novaplus.nova.cz/porad/policie-modrava/video/5591-policie-modrava-15-dil-blondynka-na-hrbitove',
'info_dict': {
'id': '1756825',
'display_id': '5591-policie-modrava-15-dil-blondynka-na-hrbitove',
'ext': 'flv',
'title': 'Policie Modrava - 15. díl - Blondýnka na hřbitově',
'description': 'md5:dc24e50be5908df83348e50d1431295e', # Make sure this description is clean of html tags
'thumbnail': r're:^https?://.*\.(?:jpg)',
},
'params': {
# rtmp download
'skip_download': True,
}
}, {
'url': 'http://novaplus.nova.cz/porad/televizni-noviny/video/5585-televizni-noviny-30-5-2015/',
'info_dict': {
'id': '1756858',
'ext': 'flv',
'title': 'Televizní noviny - 30. 5. 2015',
'thumbnail': r're:^https?://.*\.(?:jpg)',
'upload_date': '20150530',
},
'params': {
# rtmp download
'skip_download': True,
}
}, { }, {
'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html', 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
'info_dict': { 'info_dict': {
@ -81,6 +114,20 @@ class NovaIE(InfoExtractor):
# rtmp download # rtmp download
'skip_download': True, 'skip_download': True,
} }
}, {
# media.cms.nova.cz embed
'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil',
'info_dict': {
'id': '8o0n0r',
'ext': 'mp4',
'title': '2180. díl',
'thumbnail': r're:^https?://.*\.jpg',
'duration': 2578,
},
'params': {
'skip_download': True,
},
'add_ie': [NovaEmbedIE.ie_key()],
}, { }, {
'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html', 'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
'only_matching': True, 'only_matching': True,
@ -105,6 +152,15 @@ class NovaIE(InfoExtractor):
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
# novaplus
embed_id = self._search_regex(
r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
webpage, 'embed url', default=None)
if embed_id:
return self.url_result(
'https://media.cms.nova.cz/embed/%s' % embed_id,
ie=NovaEmbedIE.ie_key(), video_id=embed_id)
video_id = self._search_regex( video_id = self._search_regex(
[r"(?:media|video_id)\s*:\s*'(\d+)'", [r"(?:media|video_id)\s*:\s*'(\d+)'",
r'media=(\d+)', r'media=(\d+)',