Merge branch 'remitamine-brightcove_in_page_embed'

This commit is contained in:
Sergey M․ 2015-11-14 06:11:49 +06:00
commit 967e0955f0
8 changed files with 200 additions and 24 deletions

View file

@ -60,7 +60,10 @@ from .bloomberg import BloombergIE
from .bpb import BpbIE from .bpb import BpbIE
from .br import BRIE from .br import BRIE
from .breakcom import BreakIE from .breakcom import BreakIE
from .brightcove import BrightcoveIE from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
)
from .buzzfeed import BuzzFeedIE from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE from .byutv import BYUtvIE
from .c56 import C56IE from .c56 import C56IE

View file

@ -15,7 +15,7 @@ class AlJazeeraIE(InfoExtractor):
'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.', 'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',
'uploader': 'Al Jazeera English', 'uploader': 'Al Jazeera English',
}, },
'add_ie': ['Brightcove'], 'add_ie': ['BrightcoveLegacy'],
'skip': 'Not accessible from Travis CI server', 'skip': 'Not accessible from Travis CI server',
} }
@ -32,5 +32,5 @@ class AlJazeeraIE(InfoExtractor):
'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc' 'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc'
'&%40videoPlayer={0}'.format(brightcove_id) '&%40videoPlayer={0}'.format(brightcove_id)
), ),
'ie_key': 'Brightcove', 'ie_key': 'BrightcoveLegacy',
} }

View file

@ -20,12 +20,17 @@ from ..utils import (
ExtractorError, ExtractorError,
find_xpath_attr, find_xpath_attr,
fix_xml_ampersands, fix_xml_ampersands,
float_or_none,
js_to_json,
int_or_none,
parse_iso8601,
unescapeHTML, unescapeHTML,
unsmuggle_url, unsmuggle_url,
) )
class BrightcoveIE(InfoExtractor): class BrightcoveLegacyIE(InfoExtractor):
IE_NAME = 'brightcove:legacy'
_VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)' _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
_FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
@ -346,3 +351,152 @@ class BrightcoveIE(InfoExtractor):
if 'url' not in info and not info.get('formats'): if 'url' not in info and not info.get('formats'):
raise ExtractorError('Unable to extract video url for %s' % info['id']) raise ExtractorError('Unable to extract video url for %s' % info['id'])
return info return info
class BrightcoveNewIE(InfoExtractor):
IE_NAME = 'brightcove:new'
_VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+)'
_TEST = {
'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
'md5': 'c8100925723840d4b0d243f7025703be',
'info_dict': {
'id': '4463358922001',
'ext': 'mp4',
'title': 'Meet the man behind Popcorn Time',
'description': 'md5:eac376a4fe366edc70279bfb681aea16',
'timestamp': 1441391203,
'upload_date': '20150904',
'duration': 165.768,
'uploader_id': '929656772001',
}
}
@staticmethod
def _extract_urls(webpage):
# Reference:
# 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript)
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html
entries = []
# Look for iframe embeds [1]
for _, url in re.findall(
r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
entries.append(url)
# Look for embed_in_page embeds [2]
# According to examples from [3] it's unclear whether video id may be optional
# and what to do when it is
for video_id, account_id, player_id, embed in re.findall(
r'''(?sx)
<video[^>]+
data-video-id=["\'](\d+)["\'][^>]*>.*?
</video>.*?
<script[^>]+
src=["\'](?:https?:)?//players\.brightcove\.net/
(\d+)/([\da-f-]+)_([^/]+)/index\.min\.js
''', webpage):
entries.append(
'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
% (account_id, player_id, embed, video_id))
return entries
def _real_extract(self, url):
account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(
'http://players.brightcove.net/%s/%s_%s/index.min.js'
% (account_id, player_id, embed), video_id)
policy_key = None
catalog = self._search_regex(
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
if catalog:
catalog = self._parse_json(
js_to_json(catalog), video_id, fatal=False)
if catalog:
policy_key = catalog.get('policyKey')
if not policy_key:
policy_key = self._search_regex(
r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
webpage, 'policy key', group='pk')
req = compat_urllib_request.Request(
'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s'
% (account_id, video_id),
headers={'Accept': 'application/json;pk=%s' % policy_key})
json_data = self._download_json(req, video_id)
title = json_data['name']
formats = []
for source in json_data.get('sources', []):
source_type = source.get('type')
src = source.get('src')
if source_type == 'application/x-mpegURL':
if not src:
continue
m3u8_formats = self._extract_m3u8_formats(
src, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
else:
streaming_src = source.get('streaming_src')
stream_name, app_name = source.get('stream_name'), source.get('app_name')
if not src and not streaming_src and (not stream_name or not app_name):
continue
tbr = float_or_none(source.get('avg_bitrate'), 1000)
height = int_or_none(source.get('height'))
f = {
'tbr': tbr,
'width': int_or_none(source.get('width')),
'height': height,
'filesize': int_or_none(source.get('size')),
'container': source.get('container'),
'vcodec': source.get('codec'),
'ext': source.get('container').lower(),
}
def build_format_id(kind):
format_id = kind
if tbr:
format_id += '-%dk' % int(tbr)
if height:
format_id += '-%dp' % height
return format_id
if src or streaming_src:
f.update({
'url': src or streaming_src,
'format_id': build_format_id('http' if src else 'http-streaming'),
'preference': 2 if src else 1,
})
else:
f.update({
'url': app_name,
'play_path': stream_name,
'format_id': build_format_id('rtmp'),
})
formats.append(f)
self._sort_formats(formats)
description = json_data.get('description')
thumbnail = json_data.get('thumbnail')
timestamp = parse_iso8601(json_data.get('published_at'))
duration = float_or_none(json_data.get('duration'), 1000)
tags = json_data.get('tags', [])
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'timestamp': timestamp,
'uploader_id': account_id,
'formats': formats,
'tags': tags,
}

View file

@ -30,7 +30,10 @@ from ..utils import (
url_basename, url_basename,
xpath_text, xpath_text,
) )
from .brightcove import BrightcoveIE from .brightcove import (
BrightcoveLegacyIE,
BrightcoveNewIE,
)
from .nbc import NBCSportsVPlayerIE from .nbc import NBCSportsVPlayerIE
from .ooyala import OoyalaIE from .ooyala import OoyalaIE
from .rutv import RUTVIE from .rutv import RUTVIE
@ -275,7 +278,7 @@ class GenericIE(InfoExtractor):
# it also tests brightcove videos that need to set the 'Referer' in the # it also tests brightcove videos that need to set the 'Referer' in the
# http requests # http requests
{ {
'add_ie': ['Brightcove'], 'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/', 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
'info_dict': { 'info_dict': {
'id': '2765128793001', 'id': '2765128793001',
@ -299,7 +302,7 @@ class GenericIE(InfoExtractor):
'uploader': 'thestar.com', 'uploader': 'thestar.com',
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.', 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
}, },
'add_ie': ['Brightcove'], 'add_ie': ['BrightcoveLegacy'],
}, },
{ {
'url': 'http://www.championat.com/video/football/v/87/87499.html', 'url': 'http://www.championat.com/video/football/v/87/87499.html',
@ -314,7 +317,7 @@ class GenericIE(InfoExtractor):
}, },
{ {
# https://github.com/rg3/youtube-dl/issues/3541 # https://github.com/rg3/youtube-dl/issues/3541
'add_ie': ['Brightcove'], 'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1', 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
'info_dict': { 'info_dict': {
'id': '3866516442001', 'id': '3866516442001',
@ -1031,6 +1034,17 @@ class GenericIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'cinemasnob', 'title': 'cinemasnob',
}, },
},
# BrightcoveInPageEmbed embed
{
'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
'info_dict': {
'id': '4238694884001',
'ext': 'flv',
'title': 'Tabletop: Dread, Last Thoughts',
'description': 'Tabletop: Dread, Last Thoughts',
'duration': 51690,
},
} }
] ]
@ -1290,14 +1304,14 @@ class GenericIE(InfoExtractor):
return self.playlist_result( return self.playlist_result(
urlrs, playlist_id=video_id, playlist_title=video_title) urlrs, playlist_id=video_id, playlist_title=video_title)
# Look for BrightCove: # Look for Brightcove Legacy Studio embeds
bc_urls = BrightcoveIE._extract_brightcove_urls(webpage) bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
if bc_urls: if bc_urls:
self.to_screen('Brightcove video detected.') self.to_screen('Brightcove video detected.')
entries = [{ entries = [{
'_type': 'url', '_type': 'url',
'url': smuggle_url(bc_url, {'Referer': url}), 'url': smuggle_url(bc_url, {'Referer': url}),
'ie_key': 'Brightcove' 'ie_key': 'BrightcoveLegacy'
} for bc_url in bc_urls] } for bc_url in bc_urls]
return { return {
@ -1307,6 +1321,11 @@ class GenericIE(InfoExtractor):
'entries': entries, 'entries': entries,
} }
# Look for Brightcove New Studio embeds
bc_urls = BrightcoveNewIE._extract_urls(webpage)
if bc_urls:
return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
# Look for embedded rtl.nl player # Look for embedded rtl.nl player
matches = re.findall( matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',

View file

@ -1,7 +1,7 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from .brightcove import BrightcoveIE from .brightcove import BrightcoveLegacyIE
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ExtractorError from ..utils import ExtractorError
from ..compat import ( from ..compat import (
@ -22,10 +22,10 @@ class NownessBaseIE(InfoExtractor):
'http://www.nowness.com/iframe?id=%s' % video_id, video_id, 'http://www.nowness.com/iframe?id=%s' % video_id, video_id,
note='Downloading player JavaScript', note='Downloading player JavaScript',
errnote='Unable to download player JavaScript') errnote='Unable to download player JavaScript')
bc_url = BrightcoveIE._extract_brightcove_url(player_code) bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)
if bc_url is None: if bc_url is None:
raise ExtractorError('Could not find player definition') raise ExtractorError('Could not find player definition')
return self.url_result(bc_url, 'Brightcove') return self.url_result(bc_url, 'BrightcoveLegacy')
elif source == 'vimeo': elif source == 'vimeo':
return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo') return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
elif source == 'youtube': elif source == 'youtube':

View file

@ -4,7 +4,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .brightcove import BrightcoveIE from .brightcove import BrightcoveLegacyIE
from ..compat import ( from ..compat import (
compat_urllib_parse, compat_urllib_parse,
@ -112,11 +112,11 @@ class SafariIE(SafariBaseIE):
'%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part), '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),
part) part)
bc_url = BrightcoveIE._extract_brightcove_url(webpage) bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
if not bc_url: if not bc_url:
raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True) raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True)
return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove') return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'BrightcoveLegacy')
class SafariCourseIE(SafariBaseIE): class SafariCourseIE(SafariBaseIE):

View file

@ -3,14 +3,14 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .brightcove import BrightcoveIE from .brightcove import BrightcoveLegacyIE
from ..utils import RegexNotFoundError, ExtractorError from ..utils import RegexNotFoundError, ExtractorError
class SpaceIE(InfoExtractor): class SpaceIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:www|m)\.)?space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html' _VALID_URL = r'https?://(?:(?:www|m)\.)?space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'
_TEST = { _TEST = {
'add_ie': ['Brightcove'], 'add_ie': ['BrightcoveLegacy'],
'url': 'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html', 'url': 'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',
'info_dict': { 'info_dict': {
'id': '2780937028001', 'id': '2780937028001',
@ -31,8 +31,8 @@ class SpaceIE(InfoExtractor):
brightcove_url = self._og_search_video_url(webpage) brightcove_url = self._og_search_video_url(webpage)
except RegexNotFoundError: except RegexNotFoundError:
# Other videos works fine with the info from the object # Other videos works fine with the info from the object
brightcove_url = BrightcoveIE._extract_brightcove_url(webpage) brightcove_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
if brightcove_url is None: if brightcove_url is None:
raise ExtractorError( raise ExtractorError(
'The webpage does not contain a video', expected=True) 'The webpage does not contain a video', expected=True)
return self.url_result(brightcove_url, BrightcoveIE.ie_key()) return self.url_result(brightcove_url, BrightcoveLegacyIE.ie_key())

View file

@ -3,7 +3,7 @@ from __future__ import unicode_literals
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .brightcove import BrightcoveIE from .brightcove import BrightcoveLegacyIE
from .discovery import DiscoveryIE from .discovery import DiscoveryIE
from ..compat import compat_urlparse from ..compat import compat_urlparse
@ -66,6 +66,6 @@ class TlcDeIE(InfoExtractor):
return { return {
'_type': 'url', '_type': 'url',
'url': BrightcoveIE._extract_brightcove_url(iframe), 'url': BrightcoveLegacyIE._extract_brightcove_url(iframe),
'ie': BrightcoveIE.ie_key(), 'ie': BrightcoveLegacyIE.ie_key(),
} }