Merge remote-tracking branch 'fstirlitz/master'

This commit is contained in:
Philipp Hagemeister 2014-12-11 17:11:25 +01:00
commit 69f491f14e
2 changed files with 93 additions and 17 deletions

View file

@ -51,7 +51,6 @@ from .cbsnews import CBSNewsIE
from .ceskatelevize import CeskaTelevizeIE from .ceskatelevize import CeskaTelevizeIE
from .channel9 import Channel9IE from .channel9 import Channel9IE
from .chilloutzone import ChilloutzoneIE from .chilloutzone import ChilloutzoneIE
from .cinemassacre import CinemassacreIE
from .clipfish import ClipfishIE from .clipfish import ClipfishIE
from .cliphunter import CliphunterIE from .cliphunter import CliphunterIE
from .clipsyndicate import ClipsyndicateIE from .clipsyndicate import ClipsyndicateIE
@ -336,6 +335,7 @@ from .savefrom import SaveFromIE
from .sbs import SBSIE from .sbs import SBSIE
from .scivee import SciVeeIE from .scivee import SciVeeIE
from .screencast import ScreencastIE from .screencast import ScreencastIE
from .screenwavemedia import ScreenwaveMediaIE
from .servingsys import ServingSysIE from .servingsys import ServingSysIE
from .sexu import SexuIE from .sexu import SexuIE
from .sexykarma import SexyKarmaIE from .sexykarma import SexyKarmaIE

View file

@ -6,18 +6,23 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
month_by_name,
int_or_none, int_or_none,
) )
class ScreenwaveMediaIE(InfoExtractor):
_VALID_URL = r'(?:http://)?(?' \
r':(?P<generic>player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<video_id>.+))' \
r'|(?P<cinemassacre>(?:www\.)?cinemassacre\.com/(?P<cm_date_Y>[0-9]{4})/(?P<cm_date_m>[0-9]{2})/(?P<cm_date_d>[0-9]{2})/(?P<cm_display_id>[^?#/]+))' \
r'|(?P<teamfourstar>(?:www\.)?teamfourstar\.com/video/(?P<tfs_display_id>[a-z0-9\-]+)/?)' \
r')'
class CinemassacreIE(InfoExtractor):
_VALID_URL = r'http://(?:www\.)?cinemassacre\.com/(?P<date_Y>[0-9]{4})/(?P<date_m>[0-9]{2})/(?P<date_d>[0-9]{2})/(?P<display_id>[^?#/]+)'
_TESTS = [ _TESTS = [
{ {
'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/', 'url': 'http://cinemassacre.com/2012/11/10/avgn-the-movie-trailer/',
'md5': 'fde81fbafaee331785f58cd6c0d46190', 'md5': 'fde81fbafaee331785f58cd6c0d46190',
'info_dict': { 'info_dict': {
'id': '19911', 'id': 'Cinemasssacre-19911',
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20121110', 'upload_date': '20121110',
'title': '“Angry Video Game Nerd: The Movie” Trailer', 'title': '“Angry Video Game Nerd: The Movie” Trailer',
@ -28,7 +33,7 @@ class CinemassacreIE(InfoExtractor):
'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940', 'url': 'http://cinemassacre.com/2013/10/02/the-mummys-hand-1940',
'md5': 'd72f10cd39eac4215048f62ab477a511', 'md5': 'd72f10cd39eac4215048f62ab477a511',
'info_dict': { 'info_dict': {
'id': '521be8ef82b16', 'id': 'Cinemasssacre-521be8ef82b16',
'ext': 'mp4', 'ext': 'mp4',
'upload_date': '20131002', 'upload_date': '20131002',
'title': 'The Mummys Hand (1940)', 'title': 'The Mummys Hand (1940)',
@ -36,18 +41,16 @@ class CinemassacreIE(InfoExtractor):
} }
] ]
def _real_extract(self, url): def _cinemassacre_get_info(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('display_id') display_id = mobj.group('cm_display_id')
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') video_date = mobj.group('cm_date_Y') + mobj.group('cm_date_m') + mobj.group('cm_date_d')
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<full_video_id>(?:Cinemassacre-)?(?P<video_id>.+?)))"', webpage) mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', webpage)
if not mobj: if not mobj:
raise ExtractorError('Can\'t extract embed url and video id') raise ExtractorError('Can\'t extract embed url and video id')
playerdata_url = mobj.group('embed_url') playerdata_url = mobj.group('embed_url')
video_id = mobj.group('video_id')
full_video_id = mobj.group('full_video_id')
video_title = self._html_search_regex( video_title = self._html_search_regex(
r'<title>(?P<title>.+?)\|', webpage, 'title') r'<title>(?P<title>.+?)\|', webpage, 'title')
@ -56,10 +59,56 @@ class CinemassacreIE(InfoExtractor):
webpage, 'description', flags=re.DOTALL, fatal=False) webpage, 'description', flags=re.DOTALL, fatal=False)
video_thumbnail = self._og_search_thumbnail(webpage) video_thumbnail = self._og_search_thumbnail(webpage)
playerdata = self._download_webpage(playerdata_url, video_id, 'Downloading player webpage') return {
'title': video_title,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'_embed_url': playerdata_url,
}
def _teamfourstar_get_info(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('tfs_display_id')
webpage = self._download_webpage(url, display_id)
mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=.+?)"', webpage)
if not mobj:
raise ExtractorError('Can\'t extract embed url and video id')
playerdata_url = mobj.group('embed_url')
video_title = self._html_search_regex(
r'<div class="heroheadingtitle">(?P<title>.+?)</div>', webpage, 'title')
video_date = self._html_search_regex(
r'<div class="heroheadingdate">(?P<date>.+?)</div>', webpage, 'date')
mobj = re.match('(?P<month>[A-Z][a-z]+) (?P<day>\d+), (?P<year>\d+)', video_date)
video_date = '%04u%02u%02u' % (int(mobj.group('year')), month_by_name(mobj.group('month')), int(mobj.group('day')))
video_description = self._html_search_regex(
r'<div class="postcontent">(?P<description>.+?)</div>', webpage, 'description', flags=re.DOTALL)
video_thumbnail = self._og_search_thumbnail(webpage)
return {
'title': video_title,
'description': video_description,
'upload_date': video_date,
'thumbnail': video_thumbnail,
'_embed_url': playerdata_url,
}
def _screenwavemedia_get_info(self, url):
mobj = re.match(self._VALID_URL, url)
if not mobj:
raise ExtractorError('Can\'t extract embed url and video id')
video_id = mobj.group('video_id')
playerdata = self._download_webpage(url, video_id, 'Downloading player webpage')
vidtitle = self._search_regex(
r'\'vidtitle\'\s*:\s*"([^\']+)"', playerdata, 'vidtitle').replace('\\/', '/')
vidurl = self._search_regex( vidurl = self._search_regex(
r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/') r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/')
pageurl = self._search_regex(
r'\'pageurl\'\s*:\s*"([^\']+)"', playerdata, 'pageurl', fatal=False).replace('\\/', '/')
videolist_url = None videolist_url = None
@ -67,7 +116,7 @@ class CinemassacreIE(InfoExtractor):
if mobj: if mobj:
videoserver = mobj.group('videoserver') videoserver = mobj.group('videoserver')
mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata) mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata)
vidid = mobj.group('vidid') if mobj else full_video_id vidid = mobj.group('vidid') if mobj else video_id
videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid) videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid)
else: else:
mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata) mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata)
@ -110,9 +159,36 @@ class CinemassacreIE(InfoExtractor):
return { return {
'id': video_id, 'id': video_id,
'title': video_title, 'title': vidtitle,
'formats': formats, 'formats': formats,
'description': video_description, '_episode_page': pageurl,
'upload_date': video_date,
'thumbnail': video_thumbnail,
} }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
swm_info = None
site_info = None
if mobj.group('generic'):
swm_info = self._screenwavemedia_get_info(url)
url = swm_info['_episode_page']
mobj = re.match(self._VALID_URL, url)
if mobj:
if mobj.group('cinemassacre'):
site_info = self._cinemassacre_get_info(url)
elif mobj.group('teamfourstar'):
site_info = self._teamfourstar_get_info(url)
if not swm_info:
if site_info:
swm_info = self._screenwavemedia_get_info(site_info['_embed_url'])
if not swm_info:
raise ExtractorError("Failed to extract metadata for this URL")
if site_info:
swm_info.update(site_info)
return swm_info