From b509a4b17643422b750e5258f538894105c58d42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 21 Sep 2014 15:43:09 +0200 Subject: [PATCH 1/5] [downloader/f4m] If is in the manifest, add it to the fragments urls query (fixes #3176) It's used in some akamai videos (for example for theplatform.com). --- youtube_dl/downloader/f4m.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 71353f607..b3be16ff1 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -16,6 +16,7 @@ from ..utils import ( format_bytes, encodeFilename, sanitize_open, + xpath_text, ) @@ -251,6 +252,8 @@ class F4mFD(FileDownloader): # We only download the first fragment fragments_list = fragments_list[:1] total_frags = len(fragments_list) + # For some akamai manifests we'll need to add a query to the fragment url + akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) tmpfilename = self.temp_name(filename) (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') @@ -290,6 +293,8 @@ class F4mFD(FileDownloader): for (seg_i, frag_i) in fragments_list: name = 'Seg%d-Frag%d' % (seg_i, frag_i) url = base_url + name + if akamai_pv: + url += '?' + akamai_pv.strip(';') frag_filename = '%s-%s' % (tmpfilename, name) success = http_dl.download(frag_filename, {'url': url}) if not success: From dd41e8c82bb14bda0c407f9f0865cfb112e8fc30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 21 Sep 2014 15:47:58 +0200 Subject: [PATCH 2/5] [theplatform] Extract all formats for f4m videos --- youtube_dl/extractor/theplatform.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index b6b2dba9c..031a958fa 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -62,10 +62,7 @@ class ThePlatformIE(InfoExtractor): # the parameters are from syfy.com, other sites may use others, # they also work for nbc.com f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' - formats = [{ - 'ext': 'flv', - 'url': f4m_url, - }] + formats = self._extract_f4m_formats(f4m_url, video_id) else: base_url = head.find(_x('smil:meta')).attrib['base'] switch = body.find(_x('smil:switch')) From 224ce0d87299cf54469baccb9922e78f9594d029 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 21 Sep 2014 15:49:04 +0200 Subject: [PATCH 3/5] [nbc] Update test --- youtube_dl/extractor/nbc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index d2e4acbad..e75ab7c39 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -16,9 +16,9 @@ class NBCIE(InfoExtractor): _TEST = { 'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', - 'md5': '54d0fbc33e0b853a65d7b4de5c06d64e', + # md5 checksum is not stable 'info_dict': { - 'id': 'u1RInQZRN7QJ', + 'id': 'bTmnLCvIbaaH', 'ext': 'flv', 'title': 'I Am a Firefighter', 'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', From e35cb78c4099263c26f717669463a3c025c30d17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 21 Sep 2014 16:08:38 +0200 Subject: [PATCH 4/5] [theplatform] Correctly extract videos that don't use f4m or rtmp (reported in #3176) --- youtube_dl/extractor/sbs.py | 2 +- youtube_dl/extractor/theplatform.py | 48 ++++++++++++++++++----------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/sbs.py b/youtube_dl/extractor/sbs.py index 34058fd4b..214990e7a 100644 --- a/youtube_dl/extractor/sbs.py +++ b/youtube_dl/extractor/sbs.py @@ -21,7 +21,7 @@ class SBSIE(InfoExtractor): 'md5': '3150cf278965eeabb5b4cea1c963fe0a', 'info_dict': { 'id': '320403011771', - 'ext': 'flv', + 'ext': 'mp4', 'title': 'Dingo Conservation', 'description': 'Dingoes are on the brink of extinction; most of the animals we think are dingoes are in fact crossbred with wild dogs. This family run a dingo conservation park to prevent their extinction', 'thumbnail': 're:http://.*\.jpg', diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 031a958fa..0be793b1c 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -5,6 +5,7 @@ import json from .common import InfoExtractor from ..utils import ( + compat_str, ExtractorError, xpath_with_ns, ) @@ -55,7 +56,7 @@ class ThePlatformIE(InfoExtractor): body = meta.find(_x('smil:body')) f4m_node = body.find(_x('smil:seq//smil:video')) - if f4m_node is not None: + if f4m_node is not None and '.f4m' in f4m_node.attrib['src']: f4m_url = f4m_node.attrib['src'] if 'manifest.f4m?' not in f4m_url: f4m_url += '?' @@ -64,24 +65,35 @@ class ThePlatformIE(InfoExtractor): f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3' formats = self._extract_f4m_formats(f4m_url, video_id) else: - base_url = head.find(_x('smil:meta')).attrib['base'] - switch = body.find(_x('smil:switch')) formats = [] - for f in switch.findall(_x('smil:video')): - attr = f.attrib - width = int(attr['width']) - height = int(attr['height']) - vbr = int(attr['system-bitrate']) // 1000 - format_id = '%dx%d_%dk' % (width, height, vbr) - formats.append({ - 'format_id': format_id, - 'url': base_url, - 'play_path': 'mp4:' + attr['src'], - 'ext': 'flv', - 'width': width, - 'height': height, - 'vbr': vbr, - }) + switch = body.find(_x('smil:switch')) + if switch is not None: + base_url = head.find(_x('smil:meta')).attrib['base'] + for f in switch.findall(_x('smil:video')): + attr = f.attrib + width = int(attr['width']) + height = int(attr['height']) + vbr = int(attr['system-bitrate']) // 1000 + format_id = '%dx%d_%dk' % (width, height, vbr) + formats.append({ + 'format_id': format_id, + 'url': base_url, + 'play_path': 'mp4:' + attr['src'], + 'ext': 'flv', + 'width': width, + 'height': height, + 'vbr': vbr, + }) + else: + switch = body.find(_x('smil:seq//smil:switch')) + for f in switch.findall(_x('smil:video')): + attr = f.attrib + vbr = int(attr['system-bitrate']) // 1000 + formats.append({ + 'format_id': compat_str(vbr), + 'url': attr['src'], + 'vbr': vbr, + }) self._sort_formats(formats) return { From d0df92928bc099775e18f6413e387713839012ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 21 Sep 2014 16:53:00 +0200 Subject: [PATCH 5/5] [npo] Add extractor for tegenlicht.vpro.nl (closes #3778) --- youtube_dl/extractor/__init__.py | 5 ++++- youtube_dl/extractor/npo.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1a6033320..bca34ae73 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -249,7 +249,10 @@ from .nosvideo import NosVideoIE from .novamov import NovaMovIE from .nowness import NownessIE from .nowvideo import NowVideoIE -from .npo import NPOIE +from .npo import ( + NPOIE, + TegenlichtVproIE, +) from .nrk import ( NRKIE, NRKTVIE, diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 7a154e94a..f36d446d2 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -7,6 +7,7 @@ from ..utils import ( unified_strdate, parse_duration, qualities, + url_basename, ) @@ -55,7 +56,9 @@ class NPOIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + return self._get_info(video_id) + def _get_info(self, video_id): metadata = self._download_json( 'http://e.omroep.nl/metadata/aflevering/%s' % video_id, video_id, @@ -106,3 +109,30 @@ class NPOIE(InfoExtractor): 'duration': parse_duration(metadata.get('tijdsduur')), 'formats': formats, } + + +class TegenlichtVproIE(NPOIE): + IE_NAME = 'tegenlicht.vpro.nl' + _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?' + + _TESTS = [ + { + 'url': 'http://tegenlicht.vpro.nl/afleveringen/2012-2013/de-toekomst-komt-uit-afrika.html', + 'md5': 'f8065e4e5a7824068ed3c7e783178f2c', + 'info_dict': { + 'id': 'VPWON_1169289', + 'ext': 'm4v', + 'title': 'Tegenlicht', + 'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1', + 'upload_date': '20130225', + }, + }, + ] + + def _real_extract(self, url): + name = url_basename(url) + webpage = self._download_webpage(url, name) + urn = self._html_search_meta('mediaurn', webpage) + info_page = self._download_json( + 'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name) + return self._get_info(info_page['mid'])