From 774e208f94574ef9c62e82ac5ad0c57f245a8752 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 10 Dec 2014 13:21:24 +0100 Subject: [PATCH] [youtube] Handle missing DASH manifest (Fixes #4421, fixes #4420) --- youtube_dl/extractor/youtube.py | 113 +++++++++++++++++++------------- 1 file changed, 68 insertions(+), 45 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8b6e591a4..329690742 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -417,6 +417,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'upload_date': '20140605', }, }, + # video_info is None (https://github.com/rg3/youtube-dl/issues/4421) + { + 'url': '__2ABJjxzNo', + 'info_dict': { + 'id': '__2ABJjxzNo', + 'ext': 'mp4', + 'upload_date': '20100430', + 'uploader_id': 'deadmau5', + 'description': 'md5:12c56784b8032162bb936a5f76d55360', + 'uploader': 'deadmau5', + 'title': 'Deadmau5 - Some Chords (HD)', + }, + 'expected_warnings': [ + 'DASH manifest missing', + ] + } ] def __init__(self, *args, **kwargs): @@ -666,6 +682,45 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') + def _parse_dash_manifest(self, video_id, dash_manifest_url): + def decrypt_sig(mobj): + s = mobj.group(1) + dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) + return '/signature/%s' % dec_s + dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url) + dash_doc = self._download_xml( + dash_manifest_url, video_id, + note='Downloading DASH manifest', + errnote='Could not download DASH manifest') + + formats = [] + for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): + url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') + if url_el is None: + continue + format_id = r.attrib['id'] + video_url = url_el.text + filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) + f = { + 'format_id': format_id, + 'url': video_url, + 'width': int_or_none(r.attrib.get('width')), + 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), + 'asr': int_or_none(r.attrib.get('audioSamplingRate')), + 'filesize': filesize, + 'fps': int_or_none(r.attrib.get('frameRate')), + } + try: + existing_format = next( + fo for fo in formats + if fo['format_id'] == format_id) + except StopIteration: + f.update(self._formats.get(format_id, {})) + formats.append(f) + else: + existing_format.update(f) + return formats + def _real_extract(self, url): proto = ( 'http' if self._downloader.params.get('prefer_insecure', False) @@ -943,51 +998,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): # Look for the DASH manifest if self._downloader.params.get('youtube_include_dash_manifest', True): - try: - # The DASH manifest used needs to be the one from the original video_webpage. - # The one found in get_video_info seems to be using different signatures. - # However, in the case of an age restriction there won't be any embedded dashmpd in the video_webpage. - # Luckily, it seems, this case uses some kind of default signature (len == 86), so the - # combination of get_video_info and the _static_decrypt_signature() decryption fallback will work here. - dash_manifest_url = video_info.get('dashmpd')[0] - - def decrypt_sig(mobj): - s = mobj.group(1) - dec_s = self._decrypt_signature(s, video_id, player_url, age_gate) - return '/signature/%s' % dec_s - dash_manifest_url = re.sub(r'/s/([\w\.]+)', decrypt_sig, dash_manifest_url) - dash_doc = self._download_xml( - dash_manifest_url, video_id, - note='Downloading DASH manifest', - errnote='Could not download DASH manifest') - for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'): - url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL') - if url_el is None: - continue - format_id = r.attrib['id'] - video_url = url_el.text - filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength')) - f = { - 'format_id': format_id, - 'url': video_url, - 'width': int_or_none(r.attrib.get('width')), - 'tbr': int_or_none(r.attrib.get('bandwidth'), 1000), - 'asr': int_or_none(r.attrib.get('audioSamplingRate')), - 'filesize': filesize, - 'fps': int_or_none(r.attrib.get('frameRate')), - } - try: - existing_format = next( - fo for fo in formats - if fo['format_id'] == format_id) - except StopIteration: - f.update(self._formats.get(format_id, {})) - formats.append(f) - else: - existing_format.update(f) - - except (ExtractorError, KeyError) as e: - self.report_warning('Skipping DASH manifest: %r' % e, video_id) + dash_mpd = video_info.get('dashmpd') + if not dash_mpd: + self.report_warning('%s: DASH manifest missing' % video_id) + else: + dash_manifest_url = dash_mpd[0] + try: + dash_formats = self._parse_dash_manifest( + video_id, dash_manifest_url) + except (ExtractorError, KeyError) as e: + self.report_warning( + 'Skipping DASH manifest: %r' % e, video_id) + else: + formats.extend(dash_formats) self._sort_formats(formats)