[dvtv] Add new extractor

This commit is contained in:
Petr Kutalek 2014-12-17 15:52:54 +01:00
parent 7642c08763
commit 5f627b4448
2 changed files with 64 additions and 0 deletions

View file

@ -88,6 +88,7 @@ from .dotsub import DotsubIE
from .dreisat import DreiSatIE
from .drtuber import DrTuberIE
from .drtv import DRTVIE
from .dvtv import DVTVIE
from .dump import DumpIE
from .defense import DefenseGouvFrIE
from .discovery import DiscoveryIE

View file

@ -0,0 +1,63 @@
# coding: utf-8
from __future__ import unicode_literals
import re
import json
from .common import InfoExtractor
from ..utils import (
ExtractorError,
js_to_json,
unescapeHTML
)
class DVTVIE(InfoExtractor):
IE_NAME = 'dvtv'
IE_DESC = 'http://video.aktualne.cz/dvtv/'
_VALID_URL = r'http://video\.aktualne\.cz/dvtv/(?P<id>[a-z0-9-]+/r~[0-9a-f]{32})/?'
_TESTS = [{
'url': 'http://video.aktualne.cz/dvtv/vondra-o-ceskem-stoleti-pri-pohledu-na-havla-mi-bylo-trapne/r~e5efe9ca855511e4833a0025900fea04/',
'md5': '75800f964fa0f82939a2914563301f72',
'info_dict': {
'id': 'e5efe9ca855511e4833a0025900fea04',
'ext': 'webm',
'title': 'Vondra o Českém století: Při pohledu na Havla mi bylo trapně'
}
}, {
'url': 'http://video.aktualne.cz/dvtv/stropnicky-policie-vrbetice-preventivne-nekontrolovala/r~82ed4322849211e4a10c0025900fea04/',
'md5': 'd50455195a67a94c57f931360cc68a1b',
'info_dict': {
'id': '82ed4322849211e4a10c0025900fea04',
'ext': 'webm',
'title': 'Stropnický: Policie Vrbětice preventivně nekontrolovala'
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
code = self._search_regex(r'embedData[0-9a-f]{32}\[\'asset\'\] = (\{.+?\});', webpage, 'video JSON', flags=re.DOTALL)
payload = self._parse_json(code, video_id, transform_source=js_to_json)
formats = []
for source in payload['sources']:
formats.append({
'url': source['file'],
'ext': source['type'][6:],
'format': '%s %s' % (source['type'][6:], source['label']),
'format_id': '%s-%s' % (source['type'][6:], source['label']),
'resolution': source['label'],
'fps': 25,
'preference': -1 if source['type'][6:] == 'mp4' and source['label'] == '720p' else -2
})
return {
'id': video_id[-32:],
'display_id': video_id[:-35],
'title': unescapeHTML(payload['title']),
'thumbnail': 'http:%s' % payload['image'],
'formats': formats
}