support for vtt subtitles in m3u8 manifests
This commit is contained in:
parent
d506825a5c
commit
84412f41fa
|
@ -1531,6 +1531,19 @@ class HaruhiDL(object):
|
||||||
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
||||||
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
||||||
|
|
||||||
|
# Some fragmented media manifests like m3u8 allow embedding subtitles
|
||||||
|
# This is a weird hack to provide these subtitles to users without a very huge refactor of extractors
|
||||||
|
if 'formats' in info_dict:
|
||||||
|
formats_subtitles = list(filter(lambda x: x.get('_subtitle'), info_dict['formats']))
|
||||||
|
if formats_subtitles:
|
||||||
|
info_dict.setdefault('subtitles', {})
|
||||||
|
for sub in formats_subtitles:
|
||||||
|
if sub['_key'] not in info_dict['subtitles']:
|
||||||
|
info_dict['subtitles'][sub['_key']] = []
|
||||||
|
info_dict['subtitles'][sub['_key']].append(sub['_subtitle'])
|
||||||
|
# remove these subtitles from formats now
|
||||||
|
info_dict['formats'] = list(filter(lambda x: '_subtitle' not in x, info_dict['formats']))
|
||||||
|
|
||||||
for cc_kind in ('subtitles', 'automatic_captions'):
|
for cc_kind in ('subtitles', 'automatic_captions'):
|
||||||
cc = info_dict.get(cc_kind)
|
cc = info_dict.get(cc_kind)
|
||||||
if cc:
|
if cc:
|
||||||
|
@ -1538,6 +1551,12 @@ class HaruhiDL(object):
|
||||||
for subtitle_format in subtitle:
|
for subtitle_format in subtitle:
|
||||||
if subtitle_format.get('url'):
|
if subtitle_format.get('url'):
|
||||||
subtitle_format['url'] = sanitize_url(subtitle_format['url'])
|
subtitle_format['url'] = sanitize_url(subtitle_format['url'])
|
||||||
|
if subtitle_format.get('protocol') is None:
|
||||||
|
subtitle_format['protocol'] = determine_protocol(subtitle_format['url'])
|
||||||
|
if subtitle_format.get('http_headers') is None:
|
||||||
|
full_info = info_dict.copy()
|
||||||
|
full_info.update(subtitle_format)
|
||||||
|
subtitle_format['http_headers'] = self._calc_headers(full_info)
|
||||||
if subtitle_format.get('ext') is None:
|
if subtitle_format.get('ext') is None:
|
||||||
subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
|
subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
|
||||||
|
|
||||||
|
@ -1854,7 +1873,6 @@ class HaruhiDL(object):
|
||||||
# subtitles download errors are already managed as troubles in relevant IE
|
# subtitles download errors are already managed as troubles in relevant IE
|
||||||
# that way it will silently go on when used with unsupporting IE
|
# that way it will silently go on when used with unsupporting IE
|
||||||
subtitles = info_dict['requested_subtitles']
|
subtitles = info_dict['requested_subtitles']
|
||||||
ie = self.get_info_extractor(info_dict['extractor_key'])
|
|
||||||
for sub_lang, sub_info in subtitles.items():
|
for sub_lang, sub_info in subtitles.items():
|
||||||
sub_format = sub_info['ext']
|
sub_format = sub_info['ext']
|
||||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
|
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
|
||||||
|
@ -1873,10 +1891,8 @@ class HaruhiDL(object):
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
sub_data = ie._request_webpage(
|
subd = get_suitable_downloader(sub_info, self.params)(self, self.params)
|
||||||
sub_info['url'], info_dict['id'], note=False).read()
|
subd.download(sub_filename, sub_info)
|
||||||
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
|
|
||||||
subfile.write(sub_data)
|
|
||||||
except (ExtractorError, IOError, OSError, ValueError) as err:
|
except (ExtractorError, IOError, OSError, ValueError) as err:
|
||||||
self.report_warning('Unable to download subtitle for "%s": %s' %
|
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||||
(sub_lang, error_to_compat_str(err)))
|
(sub_lang, error_to_compat_str(err)))
|
||||||
|
|
|
@ -318,7 +318,9 @@ class FFmpegFD(ExternalFD):
|
||||||
args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
|
args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
|
||||||
|
|
||||||
if protocol in ('m3u8', 'm3u8_native'):
|
if protocol in ('m3u8', 'm3u8_native'):
|
||||||
if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
|
if info_dict['ext'] == 'vtt':
|
||||||
|
args += ['-f', 'webvtt']
|
||||||
|
elif self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
|
||||||
args += ['-f', 'mpegts']
|
args += ['-f', 'mpegts']
|
||||||
else:
|
else:
|
||||||
args += ['-f', 'mp4']
|
args += ['-f', 'mp4']
|
||||||
|
|
|
@ -1391,6 +1391,10 @@ class InfoExtractor(object):
|
||||||
f['tbr'] = f['abr'] + f['vbr']
|
f['tbr'] = f['abr'] + f['vbr']
|
||||||
|
|
||||||
def _formats_key(f):
|
def _formats_key(f):
|
||||||
|
# manifest subtitle workaround
|
||||||
|
if '_subtitle' in f:
|
||||||
|
return (-1,)
|
||||||
|
|
||||||
# TODO remove the following workaround
|
# TODO remove the following workaround
|
||||||
from ..utils import determine_ext
|
from ..utils import determine_ext
|
||||||
if not f.get('ext') and 'url' in f:
|
if not f.get('ext') and 'url' in f:
|
||||||
|
@ -1726,7 +1730,7 @@ class InfoExtractor(object):
|
||||||
if not (media_type and group_id and name):
|
if not (media_type and group_id and name):
|
||||||
return
|
return
|
||||||
groups.setdefault(group_id, []).append(media)
|
groups.setdefault(group_id, []).append(media)
|
||||||
if media_type not in ('VIDEO', 'AUDIO'):
|
if media_type not in ('VIDEO', 'AUDIO', 'SUBTITLES'):
|
||||||
return
|
return
|
||||||
media_url = media.get('URI')
|
media_url = media.get('URI')
|
||||||
if media_url:
|
if media_url:
|
||||||
|
@ -1734,6 +1738,16 @@ class InfoExtractor(object):
|
||||||
for v in (m3u8_id, group_id, name):
|
for v in (m3u8_id, group_id, name):
|
||||||
if v:
|
if v:
|
||||||
format_id.append(v)
|
format_id.append(v)
|
||||||
|
if media_type == 'SUBTITLES':
|
||||||
|
f = {
|
||||||
|
'_subtitle': {
|
||||||
|
'url': format_url(media_url),
|
||||||
|
'ext': 'vtt',
|
||||||
|
'protocol': entry_protocol,
|
||||||
|
},
|
||||||
|
'_key': media.get('LANGUAGE'),
|
||||||
|
}
|
||||||
|
else:
|
||||||
f = {
|
f = {
|
||||||
'format_id': '-'.join(format_id),
|
'format_id': '-'.join(format_id),
|
||||||
'url': format_url(media_url),
|
'url': format_url(media_url),
|
||||||
|
|
Loading…
Reference in a new issue