[livestream:original] Add support for folder urls (closes #2631)

The webpage only contains shortened links for the videos, since the server
doesn't support HEAD requests, we use an specific extractor for them.
This commit is contained in:
Jaime Marquínez Ferrándiz 2014-06-26 16:34:36 +02:00
parent f5172a3084
commit 78338f71ca
4 changed files with 64 additions and 6 deletions

View file

@ -30,6 +30,7 @@ from youtube_dl.extractor import (
SoundcloudPlaylistIE, SoundcloudPlaylistIE,
TeacherTubeClassroomIE, TeacherTubeClassroomIE,
LivestreamIE, LivestreamIE,
LivestreamOriginalIE,
NHLVideocenterIE, NHLVideocenterIE,
BambuserChannelIE, BambuserChannelIE,
BandcampAlbumIE, BandcampAlbumIE,
@ -155,6 +156,14 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], 'TEDCity2.0 (English)') self.assertEqual(result['title'], 'TEDCity2.0 (English)')
self.assertTrue(len(result['entries']) >= 4) self.assertTrue(len(result['entries']) >= 4)
def test_livestreamoriginal_folder(self):
dl = FakeYDL()
ie = LivestreamOriginalIE(dl)
result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3')
self.assertTrue(len(result['entries']) >= 28)
def test_nhl_videocenter(self): def test_nhl_videocenter(self):
dl = FakeYDL() dl = FakeYDL()
ie = NHLVideocenterIE(dl) ie = NHLVideocenterIE(dl)

View file

@ -147,7 +147,11 @@ from .ku6 import Ku6IE
from .la7 import LA7IE from .la7 import LA7IE
from .lifenews import LifeNewsIE from .lifenews import LifeNewsIE
from .liveleak import LiveLeakIE from .liveleak import LiveLeakIE
from .livestream import LivestreamIE, LivestreamOriginalIE from .livestream import (
LivestreamIE,
LivestreamOriginalIE,
LivestreamShortenerIE,
)
from .lynda import ( from .lynda import (
LyndaIE, LyndaIE,
LyndaCourseIE LyndaCourseIE

View file

@ -459,6 +459,9 @@ class InfoExtractor(object):
if secure: regexes = self._og_regexes('video:secure_url') + regexes if secure: regexes = self._og_regexes('video:secure_url') + regexes
return self._html_search_regex(regexes, html, name, **kargs) return self._html_search_regex(regexes, html, name, **kargs)
def _og_search_url(self, html, **kargs):
return self._og_search_property('url', html, **kargs)
def _html_search_meta(self, name, html, display_name=None, fatal=False): def _html_search_meta(self, name, html, display_name=None, fatal=False):
if display_name is None: if display_name is None:
display_name = name display_name = name

View file

@ -9,6 +9,7 @@ from ..utils import (
compat_urlparse, compat_urlparse,
xpath_with_ns, xpath_with_ns,
compat_str, compat_str,
orderedSet,
) )
@ -64,7 +65,10 @@ class LivestreamIE(InfoExtractor):
# The original version of Livestream uses a different system # The original version of Livestream uses a different system
class LivestreamOriginalIE(InfoExtractor): class LivestreamOriginalIE(InfoExtractor):
IE_NAME = 'livestream:original' IE_NAME = 'livestream:original'
_VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)' _VALID_URL = r'''(?x)https?://www\.livestream\.com/
(?P<user>[^/]+)/(?P<type>video|folder)
(?:\?.*?Id=|/)(?P<id>.*?)(&|$)
'''
_TEST = { _TEST = {
'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', 'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb',
'info_dict': { 'info_dict': {
@ -78,10 +82,7 @@ class LivestreamOriginalIE(InfoExtractor):
}, },
} }
def _real_extract(self, url): def _extract_video(self, user, video_id):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
user = mobj.group('user')
api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id) api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
info = self._download_xml(api_url, video_id) info = self._download_xml(api_url, video_id)
@ -99,3 +100,44 @@ class LivestreamOriginalIE(InfoExtractor):
'ext': 'flv', 'ext': 'flv',
'thumbnail': thumbnail_url, 'thumbnail': thumbnail_url,
} }
def _extract_folder(self, url, folder_id):
webpage = self._download_webpage(url, folder_id)
urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage))
return {
'_type': 'playlist',
'id': folder_id,
'entries': [{
'_type': 'url',
'url': video_url,
} for video_url in urls],
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
id = mobj.group('id')
user = mobj.group('user')
url_type = mobj.group('type')
if url_type == 'folder':
return self._extract_folder(url, id)
else:
return self._extract_video(user, id)
# The server doesn't support HEAD request, the generic extractor can't detect
# the redirection
class LivestreamShortenerIE(InfoExtractor):
IE_NAME = 'livestream:shortener'
IE_DESC = False # Do not list
_VALID_URL = r'https?://livestre\.am/(?P<id>.+)'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
id = mobj.group('id')
webpage = self._download_webpage(url, id)
return {
'_type': 'url',
'url': self._og_search_url(webpage),
}