[ign]: support some country versions and add an extractor for 1up.com

1up.com uses the gin video system, the extractor is a subclass of IGNIE, it just replaces the video id
This commit is contained in:
Jaime Marquínez Ferrándiz 2013-07-12 11:39:40 +02:00
parent 2ef648d3d3
commit a95967f8b7
2 changed files with 58 additions and 12 deletions

View file

@ -27,7 +27,7 @@ from .googlesearch import GoogleSearchIE
from .hotnewhiphop import HotNewHipHopIE from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE from .howcast import HowcastIE
from .hypem import HypemIE from .hypem import HypemIE
from .ign import IGNIE from .ign import IGNIE, OneUPIE
from .ina import InaIE from .ina import InaIE
from .infoq import InfoQIE from .infoq import InfoQIE
from .instagram import InstagramIE from .instagram import InstagramIE

View file

@ -6,10 +6,21 @@ from ..utils import (
determine_ext, determine_ext,
) )
class IGNIE(InfoExtractor): class IGNIE(InfoExtractor):
_VALID_URL = r'http://www.ign.com/videos/.+/(?P<name>.+)' """
Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
Some videos of it.ign.com are also supported
"""
_VALID_URL = r'https?://.+?\.ign\.com/(?:videos|show_videos)(/.+)?/(?P<name_or_id>.+)'
IE_NAME = u'ign.com' IE_NAME = u'ign.com'
_CONFIG_URL_TEMPLATE = 'http://www.ign.com/videos/configs/id/%s.config'
_DESCRIPTION_RE = [r'<span class="page-object-description">(.+?)</span>',
r'id="my_show_video">.*?<p>(.*?)</p>',
]
_TEST = { _TEST = {
u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review', u'url': u'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
u'file': u'8f862beef863986b2785559b9e1aa599.mp4', u'file': u'8f862beef863986b2785559b9e1aa599.mp4',
@ -20,16 +31,29 @@ class IGNIE(InfoExtractor):
} }
} }
def _find_video_id(self, webpage):
res_id = [r'data-video-id="(.+?)"',
r'<object id="vid_(.+?)"',
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
]
return self._search_regex(res_id, webpage, 'video id')
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
name = mobj.group('name') name_or_id = mobj.group('name_or_id')
config_url = url + '.config' webpage = self._download_webpage(url, name_or_id)
webpage = self._download_webpage(url, name) video_id = self._find_video_id(webpage)
config = json.loads(self._download_webpage(config_url, name, u'Downloading video info')) result = self._get_video_info(video_id)
description = self._html_search_regex(self._DESCRIPTION_RE,
webpage, 'video description',
flags=re.DOTALL)
result['description'] = description
return result
self.report_extraction(name) def _get_video_info(self, video_id):
description = self._html_search_regex(r'<span class="page-object-description">(.+?)</span>', config_url = self._CONFIG_URL_TEMPLATE % video_id
webpage, 'video description', flags=re.DOTALL) config = json.loads(self._download_webpage(config_url, video_id,
u'Downloading video info'))
media = config['playlist']['media'] media = config['playlist']['media']
video_url = media['url'] video_url = media['url']
@ -37,9 +61,31 @@ class IGNIE(InfoExtractor):
'url': video_url, 'url': video_url,
'ext': determine_ext(video_url), 'ext': determine_ext(video_url),
'title': media['metadata']['title'], 'title': media['metadata']['title'],
'description': description, 'thumbnail': media['poster'][0]['url'].replace('{size}', 'grande'),
'thumbnail': media['poster'][0]['url'].replace('{size}', 'small'),
} }
class OneUPIE(IGNIE):
"""Extractor for 1up.com, it uses the ign videos system."""
_VALID_URL = r'https?://gamevideos.1up.com/video/id/(?P<name_or_id>.+)'
IE_NAME = '1up.com'
_DESCRIPTION_RE = r'<div id="vid_summary">(.+?)</div>'
_TEST = {
u'url': u'http://gamevideos.1up.com/video/id/34976',
u'file': u'34976.mp4',
u'md5': u'68a54ce4ebc772e4b71e3123d413163d',
u'info_dict': {
u'title': u'Sniper Elite V2 - Trailer',
u'description': u'md5:5d289b722f5a6d940ca3136e9dae89cf',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
id = mobj.group('name_or_id')
result = super(OneUPIE, self)._real_extract(url)
result['id'] = id
return result