[fktv] fix info extraction

This commit is contained in:
remitamine 2015-09-09 10:42:47 +01:00
parent 47004d9579
commit 7b4137c351
2 changed files with 20 additions and 63 deletions

View file

@ -170,10 +170,7 @@ from .firstpost import FirstpostIE
from .firsttv import FirstTVIE from .firsttv import FirstTVIE
from .fivemin import FiveMinIE from .fivemin import FiveMinIE
from .fivetv import FiveTVIE from .fivetv import FiveTVIE
from .fktv import ( from .fktv import FKTVIE
FKTVIE,
FKTVPosteckeIE,
)
from .flickr import FlickrIE from .flickr import FlickrIE
from .folketinget import FolketingetIE from .folketinget import FolketingetIE
from .footyroom import FootyRoomIE from .footyroom import FootyRoomIE

View file

@ -1,13 +1,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re import re
import random
import json
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
get_element_by_id,
clean_html, clean_html,
determine_ext,
) )
@ -17,66 +15,28 @@ class FKTVIE(InfoExtractor):
_TEST = { _TEST = {
'url': 'http://fernsehkritik.tv/folge-1', 'url': 'http://fernsehkritik.tv/folge-1',
'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79',
'info_dict': { 'info_dict': {
'id': '00011', 'id': '1',
'ext': 'flv', 'ext': 'mp4',
'title': 'Folge 1 vom 10. April 2007', 'title': 'Folge 1 vom 10. April 2007',
'description': 'md5:fb4818139c7cfe6907d4b83412a6864f',
}, },
} }
def _real_extract(self, url): def _real_extract(self, url):
episode = int(self._match_id(url)) episode = self._match_id(url)
video_thumbnail = 'http://fernsehkritik.tv/images/magazin/folge%s.jpg' % episode webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/play' % episode, episode)
start_webpage = self._download_webpage('http://fernsehkritik.tv/folge-%s/Start' % episode, title = clean_html(self._html_search_regex('<h3>([^<]+?)</h3>', webpage, 'title'))
episode) matchs = re.search(r'(?s)<video[^>]*poster="([^"]+)"[^>]*>(.*?)</video>', webpage)
playlist = self._search_regex(r'playlist = (\[.*?\]);', start_webpage, if matchs:
'playlist', flags=re.DOTALL) poster, sources = matchs.groups()
files = json.loads(re.sub('{[^{}]*?}', '{}', playlist)) urls = re.findall(r'(?s)<source[^>]*src="([^"]+)"[^>]*>', sources)
if sources:
videos = [] formats = [{'url': url, 'format_id': determine_ext(url)} for url in urls]
for i, _ in enumerate(files, 1): return {
video_id = '%04d%d' % (episode, i) 'id': episode,
video_url = 'http://fernsehkritik.tv/js/directme.php?file=%s%s.flv' % (episode, '' if i == 1 else '-%d' % i) 'title': title,
videos.append({ 'formats': formats,
'ext': 'flv', 'thumbnail': poster,
'id': video_id, }
'url': video_url,
'title': clean_html(get_element_by_id('eptitle', start_webpage)),
'description': clean_html(get_element_by_id('contentlist', start_webpage)),
'thumbnail': video_thumbnail
})
return {
'_type': 'multi_video',
'entries': videos,
'id': 'folge-%s' % episode,
}
class FKTVPosteckeIE(InfoExtractor):
IE_NAME = 'fernsehkritik.tv:postecke'
_VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/inline-video/postecke\.php\?(.*&)?ep=(?P<ep>[0-9]+)(&|$)'
_TEST = {
'url': 'http://fernsehkritik.tv/inline-video/postecke.php?iframe=true&width=625&height=440&ep=120',
'md5': '262f0adbac80317412f7e57b4808e5c4',
'info_dict': {
'id': '0120',
'ext': 'flv',
'title': 'Postecke 120',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
episode = int(mobj.group('ep'))
server = random.randint(2, 4)
video_id = '%04d' % episode
video_url = 'http://dl%d.fernsehkritik.tv/postecke/postecke%d.flv' % (server, episode)
video_title = 'Postecke %d' % episode
return {
'id': video_id,
'url': video_url,
'title': video_title,
}