Merge branch 'ytdl-backports' into 'master'
youtube-dl backports Closes #26, #23, and #1 See merge request laudompat/haruhi-dl!3
This commit is contained in:
commit
7b16bb6509
|
@ -4,16 +4,18 @@
|
||||||
|
|
||||||
module.exports = function patchHook(patchContent) {
|
module.exports = function patchHook(patchContent) {
|
||||||
[
|
[
|
||||||
|
[/(?:youtube-|yt-?)dl\.org/g, 'haruhi.download'],
|
||||||
[/youtube_dl/g, 'haruhi_dl'],
|
[/youtube_dl/g, 'haruhi_dl'],
|
||||||
[/youtube-dl/g, 'haruhi-dl'],
|
[/youtube-dl/g, 'haruhi-dl'],
|
||||||
[/youtubedl/g, 'haruhidl'],
|
[/youtubedl/g, 'haruhidl'],
|
||||||
[/YoutubeDL/g, 'HaruhiDL'],
|
[/YoutubeDL/g, 'HaruhiDL'],
|
||||||
[/ytdl/g, 'hdl'],
|
[/ytdl/g, 'hdl'],
|
||||||
[/(?:youtube-|yt-?)dl\.org/g, 'haruhi.download'],
|
|
||||||
[/yt-dl/g, 'h-dl'],
|
[/yt-dl/g, 'h-dl'],
|
||||||
|
[/ydl/g, 'hdl'],
|
||||||
|
|
||||||
// prevent from linking to non-existent repository
|
// prevent from linking to non-existent repository
|
||||||
[/github\.com\/ytdl-org\/haruhi-dl/g, 'github.com/ytdl-org/youtube-dl'],
|
[/github\.com\/ytdl-org\/haruhi-dl/g, 'github.com/ytdl-org/youtube-dl'],
|
||||||
|
[/github\.com\/rg3\/haruhi-dl/g, 'github.com/ytdl-org/youtube-dl'],
|
||||||
// prevent changing the smuggle URLs (for compatibility with ytdl)
|
// prevent changing the smuggle URLs (for compatibility with ytdl)
|
||||||
[/__haruhidl_smuggle/g, '__youtubedl_smuggle'],
|
[/__haruhidl_smuggle/g, '__youtubedl_smuggle'],
|
||||||
].forEach(([regex, replacement]) => patchContent = patchContent.replace(regex, replacement));
|
].forEach(([regex, replacement]) => patchContent = patchContent.replace(regex, replacement));
|
||||||
|
|
|
@ -77,7 +77,7 @@ def build_lazy_ie(ie, name):
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
# find the correct sorting and add the required base classes so that sublcasses
|
# find the correct sorting and add the required base classes so that subclasses
|
||||||
# can be correctly created
|
# can be correctly created
|
||||||
classes = _ALL_CLASSES[:-1]
|
classes = _ALL_CLASSES[:-1]
|
||||||
ordered_cls = []
|
ordered_cls = []
|
||||||
|
|
|
@ -163,6 +163,7 @@ class HaruhiDL(object):
|
||||||
simulate: Do not download the video files.
|
simulate: Do not download the video files.
|
||||||
format: Video format code. See options.py for more information.
|
format: Video format code. See options.py for more information.
|
||||||
outtmpl: Template for output names.
|
outtmpl: Template for output names.
|
||||||
|
outtmpl_na_placeholder: Placeholder for unavailable meta fields.
|
||||||
restrictfilenames: Do not allow "&" and spaces in file names
|
restrictfilenames: Do not allow "&" and spaces in file names
|
||||||
ignoreerrors: Do not stop on download errors.
|
ignoreerrors: Do not stop on download errors.
|
||||||
force_generic_extractor: Force downloader to use the generic extractor
|
force_generic_extractor: Force downloader to use the generic extractor
|
||||||
|
@ -338,6 +339,8 @@ class HaruhiDL(object):
|
||||||
_pps = []
|
_pps = []
|
||||||
_download_retcode = None
|
_download_retcode = None
|
||||||
_num_downloads = None
|
_num_downloads = None
|
||||||
|
_playlist_level = 0
|
||||||
|
_playlist_urls = set()
|
||||||
_screen_file = None
|
_screen_file = None
|
||||||
|
|
||||||
def __init__(self, params=None, auto_init=True):
|
def __init__(self, params=None, auto_init=True):
|
||||||
|
@ -660,7 +663,7 @@ class HaruhiDL(object):
|
||||||
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
|
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
|
||||||
for k, v in template_dict.items()
|
for k, v in template_dict.items()
|
||||||
if v is not None and not isinstance(v, (list, tuple, dict)))
|
if v is not None and not isinstance(v, (list, tuple, dict)))
|
||||||
template_dict = collections.defaultdict(lambda: 'NA', template_dict)
|
template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
|
||||||
|
|
||||||
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
|
||||||
|
|
||||||
|
@ -680,8 +683,8 @@ class HaruhiDL(object):
|
||||||
|
|
||||||
# Missing numeric fields used together with integer presentation types
|
# Missing numeric fields used together with integer presentation types
|
||||||
# in format specification will break the argument substitution since
|
# in format specification will break the argument substitution since
|
||||||
# string 'NA' is returned for missing fields. We will patch output
|
# string NA placeholder is returned for missing fields. We will patch
|
||||||
# template for missing fields to meet string presentation type.
|
# output template for missing fields to meet string presentation type.
|
||||||
for numeric_field in self._NUMERIC_FIELDS:
|
for numeric_field in self._NUMERIC_FIELDS:
|
||||||
if numeric_field not in template_dict:
|
if numeric_field not in template_dict:
|
||||||
# As of [1] format syntax is:
|
# As of [1] format syntax is:
|
||||||
|
@ -797,10 +800,37 @@ class HaruhiDL(object):
|
||||||
self.report_warning('The program functionality for this site has been marked as broken, '
|
self.report_warning('The program functionality for this site has been marked as broken, '
|
||||||
'and will probably not work.')
|
'and will probably not work.')
|
||||||
|
|
||||||
|
return self.__extract_info(url, ie, download, extra_info, process)
|
||||||
|
else:
|
||||||
|
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
||||||
|
|
||||||
|
def __handle_extraction_exceptions(func):
|
||||||
|
def wrapper(self, *args, **kwargs):
|
||||||
try:
|
try:
|
||||||
|
return func(self, *args, **kwargs)
|
||||||
|
except GeoRestrictedError as e:
|
||||||
|
msg = e.msg
|
||||||
|
if e.countries:
|
||||||
|
msg += '\nThis video is available in %s.' % ', '.join(
|
||||||
|
map(ISO3166Utils.short2full, e.countries))
|
||||||
|
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
||||||
|
self.report_error(msg)
|
||||||
|
except ExtractorError as e: # An error we somewhat expected
|
||||||
|
self.report_error(compat_str(e), e.format_traceback())
|
||||||
|
except MaxDownloadsReached:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
if self.params.get('ignoreerrors', False):
|
||||||
|
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
|
||||||
|
else:
|
||||||
|
raise
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
@__handle_extraction_exceptions
|
||||||
|
def __extract_info(self, url, ie, download, extra_info, process):
|
||||||
ie_result = ie.extract(url)
|
ie_result = ie.extract(url)
|
||||||
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
|
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
|
||||||
break
|
return
|
||||||
if isinstance(ie_result, list):
|
if isinstance(ie_result, list):
|
||||||
# Backwards compatibility: old IE result format
|
# Backwards compatibility: old IE result format
|
||||||
ie_result = {
|
ie_result = {
|
||||||
|
@ -812,27 +842,6 @@ class HaruhiDL(object):
|
||||||
return self.process_ie_result(ie_result, download, extra_info)
|
return self.process_ie_result(ie_result, download, extra_info)
|
||||||
else:
|
else:
|
||||||
return ie_result
|
return ie_result
|
||||||
except GeoRestrictedError as e:
|
|
||||||
msg = e.msg
|
|
||||||
if e.countries:
|
|
||||||
msg += '\nThis video is available in %s.' % ', '.join(
|
|
||||||
map(ISO3166Utils.short2full, e.countries))
|
|
||||||
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
|
|
||||||
self.report_error(msg)
|
|
||||||
break
|
|
||||||
except ExtractorError as e: # An error we somewhat expected
|
|
||||||
self.report_error(compat_str(e), e.format_traceback())
|
|
||||||
break
|
|
||||||
except MaxDownloadsReached:
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
if self.params.get('ignoreerrors', False):
|
|
||||||
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
else:
|
|
||||||
self.report_error('no suitable InfoExtractor for URL %s' % url)
|
|
||||||
|
|
||||||
def add_default_extra_info(self, ie_result, ie, url):
|
def add_default_extra_info(self, ie_result, ie, url):
|
||||||
self.add_extra_info(ie_result, {
|
self.add_extra_info(ie_result, {
|
||||||
|
@ -904,8 +913,51 @@ class HaruhiDL(object):
|
||||||
return self.process_ie_result(
|
return self.process_ie_result(
|
||||||
new_result, download=download, extra_info=extra_info)
|
new_result, download=download, extra_info=extra_info)
|
||||||
elif result_type in ('playlist', 'multi_video'):
|
elif result_type in ('playlist', 'multi_video'):
|
||||||
|
# Protect from infinite recursion due to recursively nested playlists
|
||||||
|
# (see https://github.com/hdl-org/haruhi-dl/issues/27833)
|
||||||
|
webpage_url = ie_result['webpage_url']
|
||||||
|
if webpage_url in self._playlist_urls:
|
||||||
|
self.to_screen(
|
||||||
|
'[download] Skipping already downloaded playlist: %s'
|
||||||
|
% ie_result.get('title') or ie_result.get('id'))
|
||||||
|
return
|
||||||
|
|
||||||
|
self._playlist_level += 1
|
||||||
|
self._playlist_urls.add(webpage_url)
|
||||||
|
try:
|
||||||
|
return self.__process_playlist(ie_result, download)
|
||||||
|
finally:
|
||||||
|
self._playlist_level -= 1
|
||||||
|
if not self._playlist_level:
|
||||||
|
self._playlist_urls.clear()
|
||||||
|
elif result_type == 'compat_list':
|
||||||
|
self.report_warning(
|
||||||
|
'Extractor %s returned a compat_list result. '
|
||||||
|
'It needs to be updated.' % ie_result.get('extractor'))
|
||||||
|
|
||||||
|
def _fixup(r):
|
||||||
|
self.add_extra_info(
|
||||||
|
r,
|
||||||
|
{
|
||||||
|
'extractor': ie_result['extractor'],
|
||||||
|
'webpage_url': ie_result['webpage_url'],
|
||||||
|
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
||||||
|
'extractor_key': ie_result['extractor_key'],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return r
|
||||||
|
ie_result['entries'] = [
|
||||||
|
self.process_ie_result(_fixup(r), download, extra_info)
|
||||||
|
for r in ie_result['entries']
|
||||||
|
]
|
||||||
|
return ie_result
|
||||||
|
else:
|
||||||
|
raise Exception('Invalid result type: %s' % result_type)
|
||||||
|
|
||||||
|
def __process_playlist(self, ie_result, download):
|
||||||
# We process each entry in the playlist
|
# We process each entry in the playlist
|
||||||
playlist = ie_result.get('title') or ie_result.get('id')
|
playlist = ie_result.get('title') or ie_result.get('id')
|
||||||
|
|
||||||
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
self.to_screen('[download] Downloading playlist: %s' % playlist)
|
||||||
|
|
||||||
playlist_results = []
|
playlist_results = []
|
||||||
|
@ -1007,36 +1059,17 @@ class HaruhiDL(object):
|
||||||
self.to_screen('[download] ' + reason)
|
self.to_screen('[download] ' + reason)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
entry_result = self.process_ie_result(entry,
|
entry_result = self.__process_iterable_entry(entry, download, extra)
|
||||||
download=download,
|
# TODO: skip failed (empty) entries?
|
||||||
extra_info=extra)
|
|
||||||
playlist_results.append(entry_result)
|
playlist_results.append(entry_result)
|
||||||
ie_result['entries'] = playlist_results
|
ie_result['entries'] = playlist_results
|
||||||
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
self.to_screen('[download] Finished downloading playlist: %s' % playlist)
|
||||||
return ie_result
|
return ie_result
|
||||||
elif result_type == 'compat_list':
|
|
||||||
self.report_warning(
|
|
||||||
'Extractor %s returned a compat_list result. '
|
|
||||||
'It needs to be updated.' % ie_result.get('extractor'))
|
|
||||||
|
|
||||||
def _fixup(r):
|
@__handle_extraction_exceptions
|
||||||
self.add_extra_info(
|
def __process_iterable_entry(self, entry, download, extra_info):
|
||||||
r,
|
return self.process_ie_result(
|
||||||
{
|
entry, download=download, extra_info=extra_info)
|
||||||
'extractor': ie_result['extractor'],
|
|
||||||
'webpage_url': ie_result['webpage_url'],
|
|
||||||
'webpage_url_basename': url_basename(ie_result['webpage_url']),
|
|
||||||
'extractor_key': ie_result['extractor_key'],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
return r
|
|
||||||
ie_result['entries'] = [
|
|
||||||
self.process_ie_result(_fixup(r), download, extra_info)
|
|
||||||
for r in ie_result['entries']
|
|
||||||
]
|
|
||||||
return ie_result
|
|
||||||
else:
|
|
||||||
raise Exception('Invalid result type: %s' % result_type)
|
|
||||||
|
|
||||||
def _build_format_filter(self, filter_spec):
|
def _build_format_filter(self, filter_spec):
|
||||||
" Returns a function to filter the formats according to the filter_spec "
|
" Returns a function to filter the formats according to the filter_spec "
|
||||||
|
@ -1077,7 +1110,7 @@ class HaruhiDL(object):
|
||||||
'*=': lambda attr, value: value in attr,
|
'*=': lambda attr, value: value in attr,
|
||||||
}
|
}
|
||||||
str_operator_rex = re.compile(r'''(?x)
|
str_operator_rex = re.compile(r'''(?x)
|
||||||
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
|
\s*(?P<key>ext|acodec|vcodec|container|protocol|format_id|language)
|
||||||
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
|
||||||
\s*(?P<value>[a-zA-Z0-9._-]+)
|
\s*(?P<value>[a-zA-Z0-9._-]+)
|
||||||
\s*$
|
\s*$
|
||||||
|
@ -1220,6 +1253,8 @@ class HaruhiDL(object):
|
||||||
group = _parse_format_selection(tokens, inside_group=True)
|
group = _parse_format_selection(tokens, inside_group=True)
|
||||||
current_selector = FormatSelector(GROUP, group, [])
|
current_selector = FormatSelector(GROUP, group, [])
|
||||||
elif string == '+':
|
elif string == '+':
|
||||||
|
if inside_merge:
|
||||||
|
raise syntax_error('Unexpected "+"', start)
|
||||||
video_selector = current_selector
|
video_selector = current_selector
|
||||||
audio_selector = _parse_format_selection(tokens, inside_merge=True)
|
audio_selector = _parse_format_selection(tokens, inside_merge=True)
|
||||||
if not video_selector or not audio_selector:
|
if not video_selector or not audio_selector:
|
||||||
|
@ -1604,7 +1639,7 @@ class HaruhiDL(object):
|
||||||
if req_format is None:
|
if req_format is None:
|
||||||
req_format = self._default_format_spec(info_dict, download=download)
|
req_format = self._default_format_spec(info_dict, download=download)
|
||||||
if self.params.get('verbose'):
|
if self.params.get('verbose'):
|
||||||
self.to_stdout('[debug] Default format spec: %s' % req_format)
|
self._write_string('[debug] Default format spec: %s\n' % req_format)
|
||||||
|
|
||||||
format_selector = self.build_format_selector(req_format)
|
format_selector = self.build_format_selector(req_format)
|
||||||
|
|
||||||
|
@ -1771,6 +1806,8 @@ class HaruhiDL(object):
|
||||||
os.makedirs(dn)
|
os.makedirs(dn)
|
||||||
return True
|
return True
|
||||||
except (OSError, IOError) as err:
|
except (OSError, IOError) as err:
|
||||||
|
if isinstance(err, OSError) and err.errno == errno.EEXIST:
|
||||||
|
return True
|
||||||
self.report_error('unable to create directory ' + error_to_compat_str(err))
|
self.report_error('unable to create directory ' + error_to_compat_str(err))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -1865,7 +1902,7 @@ class HaruhiDL(object):
|
||||||
for ph in self._progress_hooks:
|
for ph in self._progress_hooks:
|
||||||
fd.add_progress_hook(ph)
|
fd.add_progress_hook(ph)
|
||||||
if self.params.get('verbose'):
|
if self.params.get('verbose'):
|
||||||
self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
|
self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
|
||||||
return fd.download(name, info)
|
return fd.download(name, info)
|
||||||
|
|
||||||
if info_dict.get('requested_formats') is not None:
|
if info_dict.get('requested_formats') is not None:
|
||||||
|
@ -2404,7 +2441,7 @@ class HaruhiDL(object):
|
||||||
thumb_ext = determine_ext(t['url'], 'jpg')
|
thumb_ext = determine_ext(t['url'], 'jpg')
|
||||||
suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
|
suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
|
||||||
thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
|
thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
|
||||||
t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
|
t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
|
||||||
|
|
||||||
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
|
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
|
||||||
self.to_screen('[%s] %s: Thumbnail %sis already present' %
|
self.to_screen('[%s] %s: Thumbnail %sis already present' %
|
||||||
|
|
|
@ -340,6 +340,7 @@ def _real_main(argv=None):
|
||||||
'format': opts.format,
|
'format': opts.format,
|
||||||
'listformats': opts.listformats,
|
'listformats': opts.listformats,
|
||||||
'outtmpl': outtmpl,
|
'outtmpl': outtmpl,
|
||||||
|
'outtmpl_na_placeholder': opts.outtmpl_na_placeholder,
|
||||||
'autonumber_size': opts.autonumber_size,
|
'autonumber_size': opts.autonumber_size,
|
||||||
'autonumber_start': opts.autonumber_start,
|
'autonumber_start': opts.autonumber_start,
|
||||||
'restrictfilenames': opts.restrictfilenames,
|
'restrictfilenames': opts.restrictfilenames,
|
||||||
|
|
|
@ -97,12 +97,15 @@ class FragmentFD(FileDownloader):
|
||||||
|
|
||||||
def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
|
def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
|
||||||
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
|
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
|
||||||
success = ctx['dl'].download(fragment_filename, {
|
fragment_info_dict = {
|
||||||
'url': frag_url,
|
'url': frag_url,
|
||||||
'http_headers': headers or info_dict.get('http_headers'),
|
'http_headers': headers or info_dict.get('http_headers'),
|
||||||
})
|
}
|
||||||
|
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
|
||||||
if not success:
|
if not success:
|
||||||
return False, None
|
return False, None
|
||||||
|
if fragment_info_dict.get('filetime'):
|
||||||
|
ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
|
||||||
down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
|
||||||
ctx['fragment_filename_sanitized'] = frag_sanitized
|
ctx['fragment_filename_sanitized'] = frag_sanitized
|
||||||
frag_content = down.read()
|
frag_content = down.read()
|
||||||
|
@ -258,6 +261,13 @@ class FragmentFD(FileDownloader):
|
||||||
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
downloaded_bytes = ctx['complete_frags_downloaded_bytes']
|
||||||
else:
|
else:
|
||||||
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
self.try_rename(ctx['tmpfilename'], ctx['filename'])
|
||||||
|
if self.params.get('updatetime', True):
|
||||||
|
filetime = ctx.get('fragment_filetime')
|
||||||
|
if filetime:
|
||||||
|
try:
|
||||||
|
os.utime(ctx['filename'], (time.time(), filetime))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
|
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
|
||||||
|
|
||||||
self._hook_progress({
|
self._hook_progress({
|
||||||
|
|
|
@ -42,11 +42,13 @@ class HlsFD(FragmentFD):
|
||||||
# no segments will definitely be appended to the end of the playlist.
|
# no segments will definitely be appended to the end of the playlist.
|
||||||
# r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
|
# r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
|
||||||
# # event media playlists [4]
|
# # event media playlists [4]
|
||||||
|
r'#EXT-X-MAP:', # media initialization [5]
|
||||||
|
|
||||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
|
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
|
||||||
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
|
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
|
||||||
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
|
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
|
||||||
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
|
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
|
||||||
|
# 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5
|
||||||
)
|
)
|
||||||
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
|
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
|
||||||
is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
|
is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
|
||||||
|
@ -170,6 +172,10 @@ class HlsFD(FragmentFD):
|
||||||
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
|
||||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.hdl.urlopen(
|
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.hdl.urlopen(
|
||||||
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
|
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
|
||||||
|
# Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
|
||||||
|
# size (see https://github.com/hdl-org/haruhi-dl/pull/27660). Tests only care that the correct data downloaded,
|
||||||
|
# not what it decrypts to.
|
||||||
|
if not test:
|
||||||
frag_content = AES.new(
|
frag_content = AES.new(
|
||||||
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
|
||||||
self._append_fragment(ctx, frag_content)
|
self._append_fragment(ctx, frag_content)
|
||||||
|
|
|
@ -109,7 +109,9 @@ class HttpFD(FileDownloader):
|
||||||
try:
|
try:
|
||||||
ctx.data = self.hdl.urlopen(request)
|
ctx.data = self.hdl.urlopen(request)
|
||||||
except (compat_urllib_error.URLError, ) as err:
|
except (compat_urllib_error.URLError, ) as err:
|
||||||
if isinstance(err.reason, socket.timeout):
|
# reason may not be available, e.g. for urllib2.HTTPError on python 2.6
|
||||||
|
reason = getattr(err, 'reason', None)
|
||||||
|
if isinstance(reason, socket.timeout):
|
||||||
raise RetryDownload(err)
|
raise RetryDownload(err)
|
||||||
raise err
|
raise err
|
||||||
# When trying to resume, Content-Range HTTP header of response has to be checked
|
# When trying to resume, Content-Range HTTP header of response has to be checked
|
||||||
|
|
|
@ -1,14 +1,15 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import calendar
|
|
||||||
import re
|
import re
|
||||||
import time
|
|
||||||
|
|
||||||
from .amp import AMPIE
|
from .amp import AMPIE
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .youtube import YoutubeIE
|
from ..utils import (
|
||||||
from ..compat import compat_urlparse
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class AbcNewsVideoIE(AMPIE):
|
class AbcNewsVideoIE(AMPIE):
|
||||||
|
@ -18,8 +19,8 @@ class AbcNewsVideoIE(AMPIE):
|
||||||
(?:
|
(?:
|
||||||
abcnews\.go\.com/
|
abcnews\.go\.com/
|
||||||
(?:
|
(?:
|
||||||
[^/]+/video/(?P<display_id>[0-9a-z-]+)-|
|
(?:[^/]+/)*video/(?P<display_id>[0-9a-z-]+)-|
|
||||||
video/embed\?.*?\bid=
|
video/(?:embed|itemfeed)\?.*?\bid=
|
||||||
)|
|
)|
|
||||||
fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
|
fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
|
||||||
)
|
)
|
||||||
|
@ -36,6 +37,8 @@ class AbcNewsVideoIE(AMPIE):
|
||||||
'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
|
'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
|
||||||
'duration': 180,
|
'duration': 180,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'timestamp': 1380454200,
|
||||||
|
'upload_date': '20130929',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
|
@ -47,6 +50,12 @@ class AbcNewsVideoIE(AMPIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
|
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://abcnews.go.com/video/itemfeed?id=46979033',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -67,28 +76,23 @@ class AbcNewsIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
|
_VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
|
# Youtube Embeds
|
||||||
|
'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '10505354',
|
'id': '51286501',
|
||||||
'ext': 'flv',
|
'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player",
|
||||||
'display_id': 'dramatic-video-rare-death-job-america',
|
'description': 'Billingsley went from a child actor to Hollywood power player.',
|
||||||
'title': 'Occupational Hazards',
|
|
||||||
'description': 'Nightline investigates the dangers that lurk at various jobs.',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'upload_date': '20100428',
|
|
||||||
'timestamp': 1272412800,
|
|
||||||
},
|
},
|
||||||
'add_ie': ['AbcNewsVideo'],
|
'playlist_count': 5,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
|
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '38897857',
|
'id': '38897857',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
|
|
||||||
'title': 'Justin Timberlake Drops Hints For Secret Single',
|
'title': 'Justin Timberlake Drops Hints For Secret Single',
|
||||||
'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
|
'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
|
||||||
'upload_date': '20160515',
|
'upload_date': '20160505',
|
||||||
'timestamp': 1463329500,
|
'timestamp': 1462442280,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
|
@ -100,49 +104,55 @@ class AbcNewsIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
|
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# inline.type == 'video'
|
||||||
|
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
story_id = self._match_id(url)
|
||||||
display_id = mobj.group('display_id')
|
webpage = self._download_webpage(url, story_id)
|
||||||
video_id = mobj.group('id')
|
story = self._parse_json(self._search_regex(
|
||||||
|
r"window\['__abcnews__'\]\s*=\s*({.+?});",
|
||||||
|
webpage, 'data'), story_id)['page']['content']['story']['everscroll'][0]
|
||||||
|
article_contents = story.get('articleContents') or {}
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
def entries():
|
||||||
video_url = self._search_regex(
|
featured_video = story.get('featuredVideo') or {}
|
||||||
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
|
feed = try_get(featured_video, lambda x: x['video']['feed'])
|
||||||
full_video_url = compat_urlparse.urljoin(url, video_url)
|
if feed:
|
||||||
|
yield {
|
||||||
youtube_url = YoutubeIE._extract_url(webpage)
|
'_type': 'url',
|
||||||
|
'id': featured_video.get('id'),
|
||||||
timestamp = None
|
'title': featured_video.get('name'),
|
||||||
date_str = self._html_search_regex(
|
'url': feed,
|
||||||
r'<span[^>]+class="timestamp">([^<]+)</span>',
|
'thumbnail': featured_video.get('images'),
|
||||||
webpage, 'timestamp', fatal=False)
|
'description': featured_video.get('description'),
|
||||||
if date_str:
|
'timestamp': parse_iso8601(featured_video.get('uploadDate')),
|
||||||
tz_offset = 0
|
'duration': parse_duration(featured_video.get('duration')),
|
||||||
if date_str.endswith(' ET'): # Eastern Time
|
|
||||||
tz_offset = -5
|
|
||||||
date_str = date_str[:-3]
|
|
||||||
date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
|
|
||||||
for date_format in date_formats:
|
|
||||||
try:
|
|
||||||
timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
|
|
||||||
except ValueError:
|
|
||||||
continue
|
|
||||||
if timestamp is not None:
|
|
||||||
timestamp -= tz_offset * 3600
|
|
||||||
|
|
||||||
entry = {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'ie_key': AbcNewsVideoIE.ie_key(),
|
'ie_key': AbcNewsVideoIE.ie_key(),
|
||||||
'url': full_video_url,
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'timestamp': timestamp,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if youtube_url:
|
for inline in (article_contents.get('inlines') or []):
|
||||||
entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
|
inline_type = inline.get('type')
|
||||||
return self.playlist_result(entries)
|
if inline_type == 'iframe':
|
||||||
|
iframe_url = try_get(inline, lambda x: x['attrs']['src'])
|
||||||
|
if iframe_url:
|
||||||
|
yield self.url_result(iframe_url)
|
||||||
|
elif inline_type == 'video':
|
||||||
|
video_id = inline.get('id')
|
||||||
|
if video_id:
|
||||||
|
yield {
|
||||||
|
'_type': 'url',
|
||||||
|
'id': video_id,
|
||||||
|
'url': 'http://abcnews.go.com/video/embed?id=' + video_id,
|
||||||
|
'thumbnail': inline.get('imgSrc') or inline.get('imgDefault'),
|
||||||
|
'description': inline.get('description'),
|
||||||
|
'duration': parse_duration(inline.get('duration')),
|
||||||
|
'ie_key': AbcNewsVideoIE.ie_key(),
|
||||||
|
}
|
||||||
|
|
||||||
return entry
|
return self.playlist_result(
|
||||||
|
entries(), story_id, article_contents.get('headline'),
|
||||||
|
article_contents.get('subHead'))
|
||||||
|
|
|
@ -2,21 +2,48 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import functools
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
float_or_none,
|
clean_podcast_url,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
try_get,
|
parse_iso8601,
|
||||||
unified_timestamp,
|
|
||||||
OnDemandPagedList,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ACastIE(InfoExtractor):
|
class ACastBaseIE(InfoExtractor):
|
||||||
|
def _extract_episode(self, episode, show_info):
|
||||||
|
title = episode['title']
|
||||||
|
info = {
|
||||||
|
'id': episode['id'],
|
||||||
|
'display_id': episode.get('episodeUrl'),
|
||||||
|
'url': clean_podcast_url(episode['url']),
|
||||||
|
'title': title,
|
||||||
|
'description': clean_html(episode.get('description') or episode.get('summary')),
|
||||||
|
'thumbnail': episode.get('image'),
|
||||||
|
'timestamp': parse_iso8601(episode.get('publishDate')),
|
||||||
|
'duration': int_or_none(episode.get('duration')),
|
||||||
|
'filesize': int_or_none(episode.get('contentLength')),
|
||||||
|
'season_number': int_or_none(episode.get('season')),
|
||||||
|
'episode': title,
|
||||||
|
'episode_number': int_or_none(episode.get('episode')),
|
||||||
|
}
|
||||||
|
info.update(show_info)
|
||||||
|
return info
|
||||||
|
|
||||||
|
def _extract_show_info(self, show):
|
||||||
|
return {
|
||||||
|
'creator': show.get('author'),
|
||||||
|
'series': show.get('title'),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _call_api(self, path, video_id, query=None):
|
||||||
|
return self._download_json(
|
||||||
|
'https://feeder.acast.com/api/v1/shows/' + path, video_id, query=query)
|
||||||
|
|
||||||
|
|
||||||
|
class ACastIE(ACastBaseIE):
|
||||||
IE_NAME = 'acast'
|
IE_NAME = 'acast'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
|
@ -28,15 +55,15 @@ class ACastIE(InfoExtractor):
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
|
||||||
'md5': '16d936099ec5ca2d5869e3a813ee8dc4',
|
'md5': 'f5598f3ad1e4776fed12ec1407153e4b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': '2. Raggarmordet - Röster ur det förflutna',
|
'title': '2. Raggarmordet - Röster ur det förflutna',
|
||||||
'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
|
'description': 'md5:a992ae67f4d98f1c0141598f7bebbf67',
|
||||||
'timestamp': 1477346700,
|
'timestamp': 1477346700,
|
||||||
'upload_date': '20161024',
|
'upload_date': '20161024',
|
||||||
'duration': 2766.602563,
|
'duration': 2766,
|
||||||
'creator': 'Anton Berg & Martin Johnson',
|
'creator': 'Anton Berg & Martin Johnson',
|
||||||
'series': 'Spår',
|
'series': 'Spår',
|
||||||
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
'episode': '2. Raggarmordet - Röster ur det förflutna',
|
||||||
|
@ -45,7 +72,7 @@ class ACastIE(InfoExtractor):
|
||||||
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
|
'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22',
|
'url': 'https://play.acast.com/s/rattegangspodden/s04e09styckmordetihelenelund-del2-2',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
|
'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
|
||||||
|
@ -54,40 +81,14 @@ class ACastIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel, display_id = re.match(self._VALID_URL, url).groups()
|
channel, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
s = self._download_json(
|
episode = self._call_api(
|
||||||
'https://feeder.acast.com/api/v1/shows/%s/episodes/%s' % (channel, display_id),
|
'%s/episodes/%s' % (channel, display_id),
|
||||||
display_id)
|
display_id, {'showInfo': 'true'})
|
||||||
media_url = s['url']
|
return self._extract_episode(
|
||||||
if re.search(r'[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}', display_id):
|
episode, self._extract_show_info(episode.get('show') or {}))
|
||||||
episode_url = s.get('episodeUrl')
|
|
||||||
if episode_url:
|
|
||||||
display_id = episode_url
|
|
||||||
else:
|
|
||||||
channel, display_id = re.match(self._VALID_URL, s['link']).groups()
|
|
||||||
cast_data = self._download_json(
|
|
||||||
'https://play-api.acast.com/splash/%s/%s' % (channel, display_id),
|
|
||||||
display_id)['result']
|
|
||||||
e = cast_data['episode']
|
|
||||||
title = e.get('name') or s['title']
|
|
||||||
return {
|
|
||||||
'id': compat_str(e['id']),
|
|
||||||
'display_id': display_id,
|
|
||||||
'url': media_url,
|
|
||||||
'title': title,
|
|
||||||
'description': e.get('summary') or clean_html(e.get('description') or s.get('description')),
|
|
||||||
'thumbnail': e.get('image'),
|
|
||||||
'timestamp': unified_timestamp(e.get('publishingDate') or s.get('publishDate')),
|
|
||||||
'duration': float_or_none(e.get('duration') or s.get('duration')),
|
|
||||||
'filesize': int_or_none(e.get('contentLength')),
|
|
||||||
'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str),
|
|
||||||
'series': try_get(cast_data, lambda x: x['show']['name'], compat_str),
|
|
||||||
'season_number': int_or_none(e.get('seasonNumber')),
|
|
||||||
'episode': title,
|
|
||||||
'episode_number': int_or_none(e.get('episodeNumber')),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class ACastChannelIE(InfoExtractor):
|
class ACastChannelIE(ACastBaseIE):
|
||||||
IE_NAME = 'acast:channel'
|
IE_NAME = 'acast:channel'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
|
@ -102,34 +103,24 @@ class ACastChannelIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4efc5294-5385-4847-98bd-519799ce5786',
|
'id': '4efc5294-5385-4847-98bd-519799ce5786',
|
||||||
'title': 'Today in Focus',
|
'title': 'Today in Focus',
|
||||||
'description': 'md5:9ba5564de5ce897faeb12963f4537a64',
|
'description': 'md5:c09ce28c91002ce4ffce71d6504abaae',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 35,
|
'playlist_mincount': 200,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://play.acast.com/s/ft-banking-weekly',
|
'url': 'http://play.acast.com/s/ft-banking-weekly',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_API_BASE_URL = 'https://play.acast.com/api/'
|
|
||||||
_PAGE_SIZE = 10
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
|
return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
|
||||||
|
|
||||||
def _fetch_page(self, channel_slug, page):
|
|
||||||
casts = self._download_json(
|
|
||||||
self._API_BASE_URL + 'channels/%s/acasts?page=%s' % (channel_slug, page),
|
|
||||||
channel_slug, note='Download page %d of channel data' % page)
|
|
||||||
for cast in casts:
|
|
||||||
yield self.url_result(
|
|
||||||
'https://play.acast.com/s/%s/%s' % (channel_slug, cast['url']),
|
|
||||||
'ACast', cast['id'])
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
channel_slug = self._match_id(url)
|
show_slug = self._match_id(url)
|
||||||
channel_data = self._download_json(
|
show = self._call_api(show_slug, show_slug)
|
||||||
self._API_BASE_URL + 'channels/%s' % channel_slug, channel_slug)
|
show_info = self._extract_show_info(show)
|
||||||
entries = OnDemandPagedList(functools.partial(
|
entries = []
|
||||||
self._fetch_page, channel_slug), self._PAGE_SIZE)
|
for episode in (show.get('episodes') or []):
|
||||||
return self.playlist_result(entries, compat_str(
|
entries.append(self._extract_episode(episode, show_info))
|
||||||
channel_data['id']), channel_data['name'], channel_data.get('description'))
|
return self.playlist_result(
|
||||||
|
entries, show.get('id'), show.get('title'), show.get('description'))
|
||||||
|
|
|
@ -10,6 +10,7 @@ import random
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..aes import aes_cbc_decrypt
|
from ..aes import aes_cbc_decrypt
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_HTTPError,
|
||||||
compat_b64decode,
|
compat_b64decode,
|
||||||
compat_ord,
|
compat_ord,
|
||||||
)
|
)
|
||||||
|
@ -18,11 +19,14 @@ from ..utils import (
|
||||||
bytes_to_long,
|
bytes_to_long,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
intlist_to_bytes,
|
intlist_to_bytes,
|
||||||
long_to_bytes,
|
long_to_bytes,
|
||||||
pkcs1pad,
|
pkcs1pad,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
urljoin,
|
try_get,
|
||||||
|
unified_strdate,
|
||||||
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -31,16 +35,30 @@ class ADNIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
|
||||||
'md5': 'e497370d847fd79d9d4c74be55575c7a',
|
'md5': '0319c99885ff5547565cacb4f3f9348d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '7778',
|
'id': '7778',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Blue Exorcist - Kyôto Saga - Épisode 1',
|
'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
|
||||||
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
|
||||||
|
'series': 'Blue Exorcist - Kyôto Saga',
|
||||||
|
'duration': 1467,
|
||||||
|
'release_date': '20170106',
|
||||||
|
'comment_count': int,
|
||||||
|
'average_rating': float,
|
||||||
|
'season_number': 2,
|
||||||
|
'episode': 'Début des hostilités',
|
||||||
|
'episode_number': 1,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_NETRC_MACHINE = 'animedigitalnetwork'
|
||||||
_BASE_URL = 'http://animedigitalnetwork.fr'
|
_BASE_URL = 'http://animedigitalnetwork.fr'
|
||||||
_RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
|
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
|
||||||
|
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
|
||||||
|
_HEADERS = {}
|
||||||
|
_LOGIN_ERR_MESSAGE = 'Unable to log in'
|
||||||
|
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
|
||||||
_POS_ALIGN_MAP = {
|
_POS_ALIGN_MAP = {
|
||||||
'start': 1,
|
'start': 1,
|
||||||
'end': 3,
|
'end': 3,
|
||||||
|
@ -54,26 +72,24 @@ class ADNIE(InfoExtractor):
|
||||||
def _ass_subtitles_timecode(seconds):
|
def _ass_subtitles_timecode(seconds):
|
||||||
return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100)
|
return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100)
|
||||||
|
|
||||||
def _get_subtitles(self, sub_path, video_id):
|
def _get_subtitles(self, sub_url, video_id):
|
||||||
if not sub_path:
|
if not sub_url:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
enc_subtitles = self._download_webpage(
|
enc_subtitles = self._download_webpage(
|
||||||
urljoin(self._BASE_URL, sub_path),
|
sub_url, video_id, 'Downloading subtitles location', fatal=False) or '{}'
|
||||||
video_id, 'Downloading subtitles location', fatal=False) or '{}'
|
|
||||||
subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location')
|
subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location')
|
||||||
if subtitle_location:
|
if subtitle_location:
|
||||||
enc_subtitles = self._download_webpage(
|
enc_subtitles = self._download_webpage(
|
||||||
urljoin(self._BASE_URL, subtitle_location),
|
subtitle_location, video_id, 'Downloading subtitles data',
|
||||||
video_id, 'Downloading subtitles data', fatal=False,
|
fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
|
||||||
headers={'Origin': 'https://animedigitalnetwork.fr'})
|
|
||||||
if not enc_subtitles:
|
if not enc_subtitles:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
# http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
|
||||||
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
|
||||||
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
|
||||||
bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')),
|
bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
|
||||||
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
|
||||||
))
|
))
|
||||||
subtitles_json = self._parse_json(
|
subtitles_json = self._parse_json(
|
||||||
|
@ -117,61 +133,100 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||||
}])
|
}])
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
if not username:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
access_token = (self._download_json(
|
||||||
|
self._API_BASE_URL + 'authentication/login', None,
|
||||||
|
'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
|
||||||
|
data=urlencode_postdata({
|
||||||
|
'password': password,
|
||||||
|
'rememberMe': False,
|
||||||
|
'source': 'Web',
|
||||||
|
'username': username,
|
||||||
|
})) or {}).get('accessToken')
|
||||||
|
if access_token:
|
||||||
|
self._HEADERS = {'authorization': 'Bearer ' + access_token}
|
||||||
|
except ExtractorError as e:
|
||||||
|
message = None
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
||||||
|
resp = self._parse_json(
|
||||||
|
e.cause.read().decode(), None, fatal=False) or {}
|
||||||
|
message = resp.get('message') or resp.get('code')
|
||||||
|
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
|
||||||
player_config = self._parse_json(self._search_regex(
|
player = self._download_json(
|
||||||
r'playerConfig\s*=\s*({.+});', webpage,
|
video_base_url + 'configuration', video_id,
|
||||||
'player config', default='{}'), video_id, fatal=False)
|
'Downloading player config JSON metadata',
|
||||||
if not player_config:
|
headers=self._HEADERS)['player']
|
||||||
config_url = urljoin(self._BASE_URL, self._search_regex(
|
options = player['options']
|
||||||
r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"',
|
|
||||||
webpage, 'config url'))
|
|
||||||
player_config = self._download_json(
|
|
||||||
config_url, video_id,
|
|
||||||
'Downloading player config JSON metadata')['player']
|
|
||||||
|
|
||||||
video_info = {}
|
user = options['user']
|
||||||
video_info_str = self._search_regex(
|
if not user.get('hasAccess'):
|
||||||
r'videoInfo\s*=\s*({.+});', webpage,
|
self.raise_login_required()
|
||||||
'video info', fatal=False)
|
|
||||||
if video_info_str:
|
|
||||||
video_info = self._parse_json(
|
|
||||||
video_info_str, video_id, fatal=False) or {}
|
|
||||||
|
|
||||||
options = player_config.get('options') or {}
|
token = self._download_json(
|
||||||
metas = options.get('metas') or {}
|
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
|
||||||
links = player_config.get('links') or {}
|
video_id, 'Downloading access token', headers={
|
||||||
sub_path = player_config.get('subtitles')
|
'x-player-refresh-token': user['refreshToken']
|
||||||
error = None
|
}, data=b'')['token']
|
||||||
if not links:
|
|
||||||
links_url = player_config.get('linksurl') or options['videoUrl']
|
links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
|
||||||
token = options['token']
|
|
||||||
self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
|
self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
|
||||||
message = bytes_to_intlist(json.dumps({
|
message = bytes_to_intlist(json.dumps({
|
||||||
'k': self._K,
|
'k': self._K,
|
||||||
'e': 60,
|
|
||||||
't': token,
|
't': token,
|
||||||
}))
|
}))
|
||||||
|
|
||||||
|
# Sometimes authentication fails for no good reason, retry with
|
||||||
|
# a different random padding
|
||||||
|
links_data = None
|
||||||
|
for _ in range(3):
|
||||||
padded_message = intlist_to_bytes(pkcs1pad(message, 128))
|
padded_message = intlist_to_bytes(pkcs1pad(message, 128))
|
||||||
n, e = self._RSA_KEY
|
n, e = self._RSA_KEY
|
||||||
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
|
encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
|
||||||
authorization = base64.b64encode(encrypted_message).decode()
|
authorization = base64.b64encode(encrypted_message).decode()
|
||||||
|
|
||||||
|
try:
|
||||||
links_data = self._download_json(
|
links_data = self._download_json(
|
||||||
urljoin(self._BASE_URL, links_url), video_id,
|
links_url, video_id, 'Downloading links JSON metadata', headers={
|
||||||
'Downloading links JSON metadata', headers={
|
'X-Player-Token': authorization
|
||||||
'Authorization': 'Bearer ' + authorization,
|
}, query={
|
||||||
|
'freeWithAds': 'true',
|
||||||
|
'adaptive': 'false',
|
||||||
|
'withMetadata': 'true',
|
||||||
|
'source': 'Web'
|
||||||
})
|
})
|
||||||
|
break
|
||||||
|
except ExtractorError as e:
|
||||||
|
if not isinstance(e.cause, compat_HTTPError):
|
||||||
|
raise e
|
||||||
|
|
||||||
|
if e.cause.code == 401:
|
||||||
|
# This usually goes away with a different random pkcs1pad, so retry
|
||||||
|
continue
|
||||||
|
|
||||||
|
error = self._parse_json(e.cause.read(), video_id)
|
||||||
|
message = error.get('message')
|
||||||
|
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
|
||||||
|
self.raise_geo_restricted(msg=message)
|
||||||
|
raise ExtractorError(message)
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Giving up retrying')
|
||||||
|
|
||||||
links = links_data.get('links') or {}
|
links = links_data.get('links') or {}
|
||||||
metas = metas or links_data.get('meta') or {}
|
metas = links_data.get('metadata') or {}
|
||||||
sub_path = sub_path or links_data.get('subtitles') or \
|
sub_url = (links.get('subtitles') or {}).get('all')
|
||||||
'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id
|
video_info = links_data.get('video') or {}
|
||||||
sub_path += '&token=' + token
|
title = metas['title']
|
||||||
error = links_data.get('error')
|
|
||||||
title = metas.get('title') or video_info['title']
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, qualities in links.items():
|
for format_id, qualities in (links.get('streaming') or {}).items():
|
||||||
if not isinstance(qualities, dict):
|
if not isinstance(qualities, dict):
|
||||||
continue
|
continue
|
||||||
for quality, load_balancer_url in qualities.items():
|
for quality, load_balancer_url in qualities.items():
|
||||||
|
@ -189,19 +244,26 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
|
||||||
for f in m3u8_formats:
|
for f in m3u8_formats:
|
||||||
f['language'] = 'fr'
|
f['language'] = 'fr'
|
||||||
formats.extend(m3u8_formats)
|
formats.extend(m3u8_formats)
|
||||||
if not error:
|
|
||||||
error = options.get('error')
|
|
||||||
if not formats and error:
|
|
||||||
raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
video = (self._download_json(
|
||||||
|
self._API_BASE_URL + 'video/%s' % video_id, video_id,
|
||||||
|
'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
|
||||||
|
show = video.get('show') or {}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
|
'description': strip_or_none(metas.get('summary') or video.get('summary')),
|
||||||
'thumbnail': video_info.get('image'),
|
'thumbnail': video_info.get('image') or player.get('image'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': self.extract_subtitles(sub_path, video_id),
|
'subtitles': self.extract_subtitles(sub_url, video_id),
|
||||||
'episode': metas.get('subtitle') or video_info.get('videoTitle'),
|
'episode': metas.get('subtitle') or video.get('name'),
|
||||||
'series': video_info.get('playlistTitle'),
|
'episode_number': int_or_none(video.get('shortNumber')),
|
||||||
|
'series': show.get('title'),
|
||||||
|
'season_number': int_or_none(video.get('season')),
|
||||||
|
'duration': int_or_none(video_info.get('duration') or video.get('duration')),
|
||||||
|
'release_date': unified_strdate(video.get('releaseDate')),
|
||||||
|
'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
|
||||||
|
'comment_count': int_or_none(video.get('commentsCount')),
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,20 +5,32 @@ import re
|
||||||
|
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
extract_attributes,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
GeoRestrictedError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
smuggle_url,
|
|
||||||
update_url_query,
|
update_url_query,
|
||||||
)
|
urlencode_postdata,
|
||||||
from ..compat import (
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AENetworksBaseIE(ThePlatformIE):
|
class AENetworksBaseIE(ThePlatformIE):
|
||||||
|
_BASE_URL_REGEX = r'''(?x)https?://
|
||||||
|
(?:(?:www|play|watch)\.)?
|
||||||
|
(?P<domain>
|
||||||
|
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
|
||||||
|
fyi\.tv
|
||||||
|
)/'''
|
||||||
_THEPLATFORM_KEY = 'crazyjava'
|
_THEPLATFORM_KEY = 'crazyjava'
|
||||||
_THEPLATFORM_SECRET = 's3cr3t'
|
_THEPLATFORM_SECRET = 's3cr3t'
|
||||||
|
_DOMAIN_MAP = {
|
||||||
|
'history.com': ('HISTORY', 'history'),
|
||||||
|
'aetv.com': ('AETV', 'aetv'),
|
||||||
|
'mylifetime.com': ('LIFETIME', 'lifetime'),
|
||||||
|
'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
|
||||||
|
'fyi.tv': ('FYI', 'fyi'),
|
||||||
|
'historyvault.com': (None, 'historyvault'),
|
||||||
|
'biography.com': (None, 'biography'),
|
||||||
|
}
|
||||||
|
|
||||||
def _extract_aen_smil(self, smil_url, video_id, auth=None):
|
def _extract_aen_smil(self, smil_url, video_id, auth=None):
|
||||||
query = {'mbr': 'true'}
|
query = {'mbr': 'true'}
|
||||||
|
@ -31,7 +43,7 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||||
'assetTypes': 'high_video_s3'
|
'assetTypes': 'high_video_s3'
|
||||||
}, {
|
}, {
|
||||||
'assetTypes': 'high_video_s3',
|
'assetTypes': 'high_video_s3',
|
||||||
'switch': 'hls_ingest_fastly'
|
'switch': 'hls_high_fastly',
|
||||||
}]
|
}]
|
||||||
formats = []
|
formats = []
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
@ -44,6 +56,8 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||||
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
tp_formats, tp_subtitles = self._extract_theplatform_smil(
|
||||||
m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
|
m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
|
if isinstance(e, GeoRestrictedError):
|
||||||
|
raise
|
||||||
last_e = e
|
last_e = e
|
||||||
continue
|
continue
|
||||||
formats.extend(tp_formats)
|
formats.extend(tp_formats)
|
||||||
|
@ -57,24 +71,45 @@ class AENetworksBaseIE(ThePlatformIE):
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _extract_aetn_info(self, domain, filter_key, filter_value, url):
|
||||||
|
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||||
|
result = self._download_json(
|
||||||
|
'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
|
||||||
|
filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
|
||||||
|
title = result['title']
|
||||||
|
video_id = result['id']
|
||||||
|
media_url = result['publicUrl']
|
||||||
|
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
||||||
|
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
||||||
|
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||||
|
auth = None
|
||||||
|
if theplatform_metadata.get('AETN$isBehindWall'):
|
||||||
|
resource = self._get_mvpd_resource(
|
||||||
|
requestor_id, theplatform_metadata['title'],
|
||||||
|
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||||
|
theplatform_metadata['ratings'][0]['rating'])
|
||||||
|
auth = self._extract_mvpd_auth(
|
||||||
|
url, video_id, requestor_id, resource)
|
||||||
|
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||||
|
info.update({
|
||||||
|
'title': title,
|
||||||
|
'series': result.get('seriesName'),
|
||||||
|
'season_number': int_or_none(result.get('tvSeasonNumber')),
|
||||||
|
'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
class AENetworksIE(AENetworksBaseIE):
|
class AENetworksIE(AENetworksBaseIE):
|
||||||
IE_NAME = 'aenetworks'
|
IE_NAME = 'aenetworks'
|
||||||
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
|
IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id>
|
||||||
https?://
|
shows/[^/]+/season-\d+/episode-\d+|
|
||||||
(?:www\.)?
|
|
||||||
(?P<domain>
|
|
||||||
(?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
|
|
||||||
fyi\.tv
|
|
||||||
)/
|
|
||||||
(?:
|
(?:
|
||||||
shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
|
(?:movie|special)s/[^/]+|
|
||||||
movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
|
(?:shows/[^/]+/)?videos
|
||||||
specials/(?P<special_display_id>[^/]+)/(?:full-special|preview-)|
|
)/[^/?#&]+
|
||||||
collections/[^/]+/(?P<collection_display_id>[^/]+)
|
)'''
|
||||||
)
|
|
||||||
'''
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -91,22 +126,23 @@ class AENetworksIE(AENetworksBaseIE):
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': ['ThePlatform'],
|
'add_ie': ['ThePlatform'],
|
||||||
}, {
|
'skip': 'This video is only available for users of participating TV providers.',
|
||||||
'url': 'http://www.history.com/shows/ancient-aliens/season-1',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '71889446852',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 5,
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.mylifetime.com/shows/atlanta-plastic',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'SERIES4317',
|
|
||||||
'title': 'Atlanta Plastic',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 2,
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
|
||||||
'only_matching': True
|
'info_dict': {
|
||||||
|
'id': '600587331957',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Inlawful Entry',
|
||||||
|
'description': 'md5:57c12115a2b384d883fe64ca50529e08',
|
||||||
|
'timestamp': 1452634428,
|
||||||
|
'upload_date': '20160112',
|
||||||
|
'uploader': 'AENE-NEW',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'add_ie': ['ThePlatform'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
|
@ -117,78 +153,125 @@ class AENetworksIE(AENetworksBaseIE):
|
||||||
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
|
'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us',
|
'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
}, {
|
|
||||||
'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward',
|
|
||||||
'only_matching': True
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
|
'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.history.com/videos/history-of-valentines-day',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
|
||||||
|
'only_matching': True
|
||||||
}]
|
}]
|
||||||
_DOMAIN_TO_REQUESTOR_ID = {
|
|
||||||
'history.com': 'HISTORY',
|
|
||||||
'aetv.com': 'AETV',
|
|
||||||
'mylifetime.com': 'LIFETIME',
|
|
||||||
'lifetimemovieclub.com': 'LIFETIMEMOVIECLUB',
|
|
||||||
'fyi.tv': 'FYI',
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
domain, show_path, movie_display_id, special_display_id, collection_display_id = re.match(self._VALID_URL, url).groups()
|
domain, canonical = re.match(self._VALID_URL, url).groups()
|
||||||
display_id = show_path or movie_display_id or special_display_id or collection_display_id
|
return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
|
||||||
webpage = self._download_webpage(url, display_id, headers=self.geo_verification_headers())
|
|
||||||
if show_path:
|
|
||||||
url_parts = show_path.split('/')
|
|
||||||
url_parts_len = len(url_parts)
|
|
||||||
if url_parts_len == 1:
|
|
||||||
entries = []
|
|
||||||
for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
|
|
||||||
entries.append(self.url_result(
|
|
||||||
compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
|
|
||||||
if entries:
|
|
||||||
return self.playlist_result(
|
|
||||||
entries, self._html_search_meta('aetn:SeriesId', webpage),
|
|
||||||
self._html_search_meta('aetn:SeriesTitle', webpage))
|
|
||||||
else:
|
|
||||||
# single season
|
|
||||||
url_parts_len = 2
|
|
||||||
if url_parts_len == 2:
|
|
||||||
entries = []
|
|
||||||
for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage):
|
|
||||||
episode_attributes = extract_attributes(episode_item)
|
|
||||||
episode_url = compat_urlparse.urljoin(
|
|
||||||
url, episode_attributes['data-canonical'])
|
|
||||||
entries.append(self.url_result(
|
|
||||||
episode_url, 'AENetworks',
|
|
||||||
episode_attributes.get('data-videoid') or episode_attributes.get('data-video-id')))
|
|
||||||
return self.playlist_result(
|
|
||||||
entries, self._html_search_meta('aetn:SeasonId', webpage))
|
|
||||||
|
|
||||||
video_id = self._html_search_meta('aetn:VideoID', webpage)
|
|
||||||
media_url = self._search_regex(
|
class AENetworksListBaseIE(AENetworksBaseIE):
|
||||||
[r"media_url\s*=\s*'(?P<url>[^']+)'",
|
def _call_api(self, resource, slug, brand, fields):
|
||||||
r'data-media-url=(?P<url>(?:https?:)?//[^\s>]+)',
|
return self._download_json(
|
||||||
r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
|
'https://yoga.appsvcs.aetnd.com/graphql',
|
||||||
webpage, 'video url', group='url')
|
slug, query={'brand': brand}, data=urlencode_postdata({
|
||||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
'query': '''{
|
||||||
r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
|
%s(slug: "%s") {
|
||||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
%s
|
||||||
auth = None
|
}
|
||||||
if theplatform_metadata.get('AETN$isBehindWall'):
|
}''' % (resource, slug, fields),
|
||||||
requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
|
}))['data'][resource]
|
||||||
resource = self._get_mvpd_resource(
|
|
||||||
requestor_id, theplatform_metadata['title'],
|
def _real_extract(self, url):
|
||||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
domain, slug = re.match(self._VALID_URL, url).groups()
|
||||||
theplatform_metadata['ratings'][0]['rating'])
|
_, brand = self._DOMAIN_MAP[domain]
|
||||||
auth = self._extract_mvpd_auth(
|
playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
|
||||||
url, video_id, requestor_id, resource)
|
base_url = 'http://watch.%s' % domain
|
||||||
info.update(self._search_json_ld(webpage, video_id, fatal=False))
|
|
||||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
entries = []
|
||||||
return info
|
for item in (playlist.get(self._ITEMS_KEY) or []):
|
||||||
|
doc = self._get_doc(item)
|
||||||
|
canonical = doc.get('canonical')
|
||||||
|
if not canonical:
|
||||||
|
continue
|
||||||
|
entries.append(self.url_result(
|
||||||
|
base_url + canonical, AENetworksIE.ie_key(), doc.get('id')))
|
||||||
|
|
||||||
|
description = None
|
||||||
|
if self._PLAYLIST_DESCRIPTION_KEY:
|
||||||
|
description = playlist.get(self._PLAYLIST_DESCRIPTION_KEY)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist.get('id'),
|
||||||
|
playlist.get(self._PLAYLIST_TITLE_KEY), description)
|
||||||
|
|
||||||
|
|
||||||
|
class AENetworksCollectionIE(AENetworksListBaseIE):
|
||||||
|
IE_NAME = 'aenetworks:collection'
|
||||||
|
_VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'(?:[^/]+/)*(?:list|collections)/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://watch.historyvault.com/list/america-the-story-of-us',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '282',
|
||||||
|
'title': 'America The Story of Us',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 12,
|
||||||
|
}, {
|
||||||
|
'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
|
||||||
|
'only_matching': True
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.historyvault.com/collections/mysteryquest',
|
||||||
|
'only_matching': True
|
||||||
|
}]
|
||||||
|
_RESOURCE = 'list'
|
||||||
|
_ITEMS_KEY = 'items'
|
||||||
|
_PLAYLIST_TITLE_KEY = 'display_title'
|
||||||
|
_PLAYLIST_DESCRIPTION_KEY = None
|
||||||
|
_FIELDS = '''id
|
||||||
|
display_title
|
||||||
|
items {
|
||||||
|
... on ListVideoItem {
|
||||||
|
doc {
|
||||||
|
canonical
|
||||||
|
id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}'''
|
||||||
|
|
||||||
|
def _get_doc(self, item):
|
||||||
|
return item.get('doc') or {}
|
||||||
|
|
||||||
|
|
||||||
|
class AENetworksShowIE(AENetworksListBaseIE):
|
||||||
|
IE_NAME = 'aenetworks:show'
|
||||||
|
_VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'shows/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://www.history.com/shows/ancient-aliens',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'SERIES1574',
|
||||||
|
'title': 'Ancient Aliens',
|
||||||
|
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 150,
|
||||||
|
}]
|
||||||
|
_RESOURCE = 'series'
|
||||||
|
_ITEMS_KEY = 'episodes'
|
||||||
|
_PLAYLIST_TITLE_KEY = 'title'
|
||||||
|
_PLAYLIST_DESCRIPTION_KEY = 'description'
|
||||||
|
_FIELDS = '''description
|
||||||
|
id
|
||||||
|
title
|
||||||
|
episodes {
|
||||||
|
canonical
|
||||||
|
id
|
||||||
|
}'''
|
||||||
|
|
||||||
|
def _get_doc(self, item):
|
||||||
|
return item
|
||||||
|
|
||||||
|
|
||||||
class HistoryTopicIE(AENetworksBaseIE):
|
class HistoryTopicIE(AENetworksBaseIE):
|
||||||
|
@ -204,6 +287,7 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||||
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
|
||||||
'timestamp': 1375819729,
|
'timestamp': 1375819729,
|
||||||
'upload_date': '20130806',
|
'upload_date': '20130806',
|
||||||
|
'uploader': 'AENE-NEW',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
|
@ -212,36 +296,47 @@ class HistoryTopicIE(AENetworksBaseIE):
|
||||||
'add_ie': ['ThePlatform'],
|
'add_ie': ['ThePlatform'],
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def theplatform_url_result(self, theplatform_url, video_id, query):
|
def _real_extract(self, url):
|
||||||
return {
|
display_id = self._match_id(url)
|
||||||
'_type': 'url_transparent',
|
return self.url_result(
|
||||||
'id': video_id,
|
'http://www.history.com/videos/' + display_id,
|
||||||
'url': smuggle_url(
|
AENetworksIE.ie_key())
|
||||||
update_url_query(theplatform_url, query),
|
|
||||||
{
|
|
||||||
'sig': {
|
class HistoryPlayerIE(AENetworksBaseIE):
|
||||||
'key': self._THEPLATFORM_KEY,
|
IE_NAME = 'history:player'
|
||||||
'secret': self._THEPLATFORM_SECRET,
|
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
|
||||||
|
_TESTS = []
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
domain, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
return self._extract_aetn_info(domain, 'id', video_id, url)
|
||||||
|
|
||||||
|
|
||||||
|
class BiographyIE(AENetworksBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?biography\.com/video/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.biography.com/video/vincent-van-gogh-full-episode-2075049808',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '30322987',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Vincent Van Gogh - Full Episode',
|
||||||
|
'description': 'A full biography about the most influential 20th century painter, Vincent Van Gogh.',
|
||||||
|
'timestamp': 1311970571,
|
||||||
|
'upload_date': '20110729',
|
||||||
|
'uploader': 'AENE-NEW',
|
||||||
},
|
},
|
||||||
'force_smil_url': True
|
'params': {
|
||||||
}),
|
# m3u8 download
|
||||||
'ie_key': 'ThePlatform',
|
'skip_download': True,
|
||||||
}
|
},
|
||||||
|
'add_ie': ['ThePlatform'],
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
video_id = self._search_regex(
|
player_url = self._search_regex(
|
||||||
r'<phoenix-iframe[^>]+src="[^"]+\btpid=(\d+)', webpage, 'tpid')
|
r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
|
||||||
result = self._download_json(
|
webpage, 'player URL')
|
||||||
'https://feeds.video.aetnd.com/api/v2/history/videos',
|
return self.url_result(player_url, HistoryPlayerIE.ie_key())
|
||||||
video_id, query={'filter[id]': video_id})['results'][0]
|
|
||||||
title = result['title']
|
|
||||||
info = self._extract_aen_smil(result['publicUrl'], video_id)
|
|
||||||
info.update({
|
|
||||||
'title': title,
|
|
||||||
'description': result.get('description'),
|
|
||||||
'duration': int_or_none(result.get('duration')),
|
|
||||||
'timestamp': int_or_none(result.get('added'), 1000),
|
|
||||||
})
|
|
||||||
return info
|
|
||||||
|
|
|
@ -1,13 +1,16 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class AlJazeeraIE(InfoExtractor):
|
class AlJazeeraIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
|
_VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?P<type>program/[^/]+|(?:feature|video)s)/\d{4}/\d{1,2}/\d{1,2}/(?P<id>[^/?&#]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
|
'url': 'https://www.aljazeera.com/program/episode/2014/9/19/deliverance',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3792260579001',
|
'id': '3792260579001',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -20,14 +23,34 @@ class AlJazeeraIE(InfoExtractor):
|
||||||
'add_ie': ['BrightcoveNew'],
|
'add_ie': ['BrightcoveNew'],
|
||||||
'skip': 'Not accessible from Travis CI server',
|
'skip': 'Not accessible from Travis CI server',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
|
'url': 'https://www.aljazeera.com/videos/2017/5/11/sierra-leone-709-carat-diamond-to-be-auctioned-off',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.aljazeera.com/features/2017/8/21/transforming-pakistans-buses-into-art',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
program_name = self._match_id(url)
|
post_type, name = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, program_name)
|
post_type = {
|
||||||
brightcove_id = self._search_regex(
|
'features': 'post',
|
||||||
r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
|
'program': 'episode',
|
||||||
return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
|
'videos': 'video',
|
||||||
|
}[post_type.split('/')[0]]
|
||||||
|
video = self._download_json(
|
||||||
|
'https://www.aljazeera.com/graphql', name, query={
|
||||||
|
'operationName': 'SingleArticleQuery',
|
||||||
|
'variables': json.dumps({
|
||||||
|
'name': name,
|
||||||
|
'postType': post_type,
|
||||||
|
}),
|
||||||
|
}, headers={
|
||||||
|
'wp-site': 'aje',
|
||||||
|
})['data']['article']['video']
|
||||||
|
video_id = video['id']
|
||||||
|
account_id = video.get('accountId') or '665003303001'
|
||||||
|
player_id = video.get('playerId') or 'BkeSH5BDb'
|
||||||
|
return self.url_result(
|
||||||
|
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
|
||||||
|
'BrightcoveNew', video_id)
|
||||||
|
|
103
haruhi_dl/extractor/amara.py
Normal file
103
haruhi_dl/extractor/amara.py
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
from .vimeo import VimeoIE
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AmaraIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# Youtube
|
||||||
|
'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
|
||||||
|
'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'h6ZuVdvYnfE',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Why jury trials are becoming less common',
|
||||||
|
'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'subtitles': dict,
|
||||||
|
'upload_date': '20160813',
|
||||||
|
'uploader': 'PBS NewsHour',
|
||||||
|
'uploader_id': 'PBSNewsHour',
|
||||||
|
'timestamp': 1549639570,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Vimeo
|
||||||
|
'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
|
||||||
|
'md5': '99392c75fa05d432a8f11df03612195e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '18622084',
|
||||||
|
'ext': 'mov',
|
||||||
|
'title': 'Vimeo at CES 2011!',
|
||||||
|
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'subtitles': dict,
|
||||||
|
'timestamp': 1294763658,
|
||||||
|
'upload_date': '20110111',
|
||||||
|
'uploader': 'Sam Morrill',
|
||||||
|
'uploader_id': 'sammorrill'
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# Direct Link
|
||||||
|
'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
|
||||||
|
'md5': 'd3970f08512738ee60c5807311ff5d3f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 's8KL7I3jLmh6',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'The danger of a single story',
|
||||||
|
'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'subtitles': dict,
|
||||||
|
'upload_date': '20091007',
|
||||||
|
'timestamp': 1254942511,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
meta = self._download_json(
|
||||||
|
'https://amara.org/api/videos/%s/' % video_id,
|
||||||
|
video_id, query={'format': 'json'})
|
||||||
|
title = meta['title']
|
||||||
|
video_url = meta['all_urls'][0]
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for language in (meta.get('languages') or []):
|
||||||
|
subtitles_uri = language.get('subtitles_uri')
|
||||||
|
if not (subtitles_uri and language.get('published')):
|
||||||
|
continue
|
||||||
|
subtitle = subtitles.setdefault(language.get('code') or 'en', [])
|
||||||
|
for f in ('json', 'srt', 'vtt'):
|
||||||
|
subtitle.append({
|
||||||
|
'ext': f,
|
||||||
|
'url': update_url_query(subtitles_uri, {'format': f}),
|
||||||
|
})
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'url': video_url,
|
||||||
|
'id': video_id,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'title': title,
|
||||||
|
'description': meta.get('description'),
|
||||||
|
'thumbnail': meta.get('thumbnail'),
|
||||||
|
'duration': int_or_none(meta.get('duration')),
|
||||||
|
'timestamp': parse_iso8601(meta.get('created')),
|
||||||
|
}
|
||||||
|
|
||||||
|
for ie in (YoutubeIE, VimeoIE):
|
||||||
|
if ie.suitable(video_url):
|
||||||
|
info.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': ie.ie_key(),
|
||||||
|
})
|
||||||
|
break
|
||||||
|
|
||||||
|
return info
|
|
@ -1,6 +1,8 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
@ -11,25 +13,22 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class AMCNetworksIE(ThePlatformIE):
|
class AMCNetworksIE(ThePlatformIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
|
'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631',
|
||||||
'md5': '',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 's3MX01Nl4vPH',
|
'id': '4Lq1dzOnZGt0',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Maron - Season 4 - Step 1',
|
'title': "The Graham Norton Show - Season 28 - Tina Fey's Adorable Airline-Themed Family Dinner",
|
||||||
'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.',
|
'description': "It turns out child stewardesses are very generous with the wine! All-new episodes of 'The Graham Norton Show' premiere Fridays at 11/10c on BBC America.",
|
||||||
'age_limit': 17,
|
'upload_date': '20201120',
|
||||||
'upload_date': '20160505',
|
'timestamp': 1605904350,
|
||||||
'timestamp': 1462468831,
|
|
||||||
'uploader': 'AMCN',
|
'uploader': 'AMCN',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Requires TV provider accounts',
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -55,32 +54,34 @@ class AMCNetworksIE(ThePlatformIE):
|
||||||
'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
|
'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
_REQUESTOR_ID_MAP = {
|
||||||
|
'amc': 'AMC',
|
||||||
|
'bbcamerica': 'BBCA',
|
||||||
|
'ifc': 'IFC',
|
||||||
|
'sundancetv': 'SUNDANCE',
|
||||||
|
'wetv': 'WETV',
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
site, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, display_id)
|
requestor_id = self._REQUESTOR_ID_MAP[site]
|
||||||
|
properties = self._download_json(
|
||||||
|
'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' % (requestor_id.lower(), display_id),
|
||||||
|
display_id)['data']['properties']
|
||||||
query = {
|
query = {
|
||||||
'mbr': 'true',
|
'mbr': 'true',
|
||||||
'manifest': 'm3u',
|
'manifest': 'm3u',
|
||||||
}
|
}
|
||||||
media_url = self._search_regex(
|
tp_path = 'M_UwQC/media/' + properties['videoPid']
|
||||||
r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)',
|
media_url = 'https://link.theplatform.com/s/' + tp_path
|
||||||
webpage, 'media url')
|
theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
|
||||||
theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
|
|
||||||
r'link\.theplatform\.com/s/([^?]+)',
|
|
||||||
media_url, 'theplatform_path'), display_id)
|
|
||||||
info = self._parse_theplatform_metadata(theplatform_metadata)
|
info = self._parse_theplatform_metadata(theplatform_metadata)
|
||||||
video_id = theplatform_metadata['pid']
|
video_id = theplatform_metadata['pid']
|
||||||
title = theplatform_metadata['title']
|
title = theplatform_metadata['title']
|
||||||
rating = try_get(
|
rating = try_get(
|
||||||
theplatform_metadata, lambda x: x['ratings'][0]['rating'])
|
theplatform_metadata, lambda x: x['ratings'][0]['rating'])
|
||||||
auth_required = self._search_regex(
|
video_category = properties.get('videoCategory')
|
||||||
r'window\.authRequired\s*=\s*(true|false);',
|
if video_category and video_category.endswith('-Auth'):
|
||||||
webpage, 'auth required')
|
|
||||||
if auth_required == 'true':
|
|
||||||
requestor_id = self._search_regex(
|
|
||||||
r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)',
|
|
||||||
webpage, 'requestor id')
|
|
||||||
resource = self._get_mvpd_resource(
|
resource = self._get_mvpd_resource(
|
||||||
requestor_id, title, video_id, rating)
|
requestor_id, title, video_id, rating)
|
||||||
query['auth'] = self._extract_mvpd_auth(
|
query['auth'] = self._extract_mvpd_auth(
|
||||||
|
|
|
@ -1,82 +1,159 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
|
||||||
try_get,
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AmericasTestKitchenIE(InfoExtractor):
|
class AmericasTestKitchenIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
|
'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
|
||||||
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
'md5': 'b861c3e365ac38ad319cfd509c30577f',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '5b400b9ee338f922cb06450c',
|
'id': '5b400b9ee338f922cb06450c',
|
||||||
'title': 'Weeknight Japanese Suppers',
|
'title': 'Japanese Suppers',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': 'md5:3d0c1a44bb3b27607ce82652db25b4a8',
|
'description': 'md5:64e606bfee910627efc4b5f050de92b3',
|
||||||
'thumbnail': r're:^https?://',
|
'thumbnail': r're:^https?://',
|
||||||
'timestamp': 1523664000,
|
'timestamp': 1523318400,
|
||||||
'upload_date': '20180414',
|
'upload_date': '20180410',
|
||||||
'release_date': '20180414',
|
'release_date': '20180410',
|
||||||
'series': "America's Test Kitchen",
|
'series': "America's Test Kitchen",
|
||||||
'season_number': 18,
|
'season_number': 18,
|
||||||
'episode': 'Weeknight Japanese Suppers',
|
'episode': 'Japanese Suppers',
|
||||||
'episode_number': 15,
|
'episode_number': 15,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# Metadata parsing behaves differently for newer episodes (705) as opposed to older episodes (582 above)
|
||||||
|
'url': 'https://www.americastestkitchen.com/episode/705-simple-chicken-dinner',
|
||||||
|
'md5': '06451608c57651e985a498e69cec17e5',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5fbe8c61bda2010001c6763b',
|
||||||
|
'title': 'Simple Chicken Dinner',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
|
||||||
|
'thumbnail': r're:^https?://',
|
||||||
|
'timestamp': 1610755200,
|
||||||
|
'upload_date': '20210116',
|
||||||
|
'release_date': '20210116',
|
||||||
|
'series': "America's Test Kitchen",
|
||||||
|
'season_number': 21,
|
||||||
|
'episode': 'Simple Chicken Dinner',
|
||||||
|
'episode_number': 3,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
resource_type, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
is_episode = resource_type == 'episode'
|
||||||
|
if is_episode:
|
||||||
|
resource_type = 'episodes'
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
resource = self._download_json(
|
||||||
|
'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id)
|
||||||
video_data = self._parse_json(
|
video = resource['video'] if is_episode else resource
|
||||||
self._search_regex(
|
episode = resource if is_episode else resource.get('episode') or {}
|
||||||
r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
|
|
||||||
webpage, 'initial context'),
|
|
||||||
video_id, js_to_json)
|
|
||||||
|
|
||||||
ep_data = try_get(
|
|
||||||
video_data,
|
|
||||||
(lambda x: x['episodeDetail']['content']['data'],
|
|
||||||
lambda x: x['videoDetail']['content']['data']), dict)
|
|
||||||
ep_meta = ep_data.get('full_video', {})
|
|
||||||
|
|
||||||
zype_id = ep_data.get('zype_id') or ep_meta['zype_id']
|
|
||||||
|
|
||||||
title = ep_data.get('title') or ep_meta.get('title')
|
|
||||||
description = clean_html(ep_meta.get('episode_description') or ep_data.get(
|
|
||||||
'description') or ep_meta.get('description'))
|
|
||||||
thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
|
|
||||||
release_date = unified_strdate(ep_data.get('aired_at'))
|
|
||||||
|
|
||||||
season_number = int_or_none(ep_meta.get('season_number'))
|
|
||||||
episode = ep_meta.get('title')
|
|
||||||
episode_number = int_or_none(ep_meta.get('episode_number'))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id,
|
'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
|
||||||
'ie_key': 'Zype',
|
'ie_key': 'Zype',
|
||||||
'title': title,
|
'description': clean_html(video.get('description')),
|
||||||
'description': description,
|
'timestamp': unified_timestamp(video.get('publishDate')),
|
||||||
'thumbnail': thumbnail,
|
'release_date': unified_strdate(video.get('publishDate')),
|
||||||
'release_date': release_date,
|
'episode_number': int_or_none(episode.get('number')),
|
||||||
'series': "America's Test Kitchen",
|
'season_number': int_or_none(episode.get('season')),
|
||||||
'season_number': season_number,
|
'series': try_get(episode, lambda x: x['show']['title']),
|
||||||
'episode': episode,
|
'episode': episode.get('title'),
|
||||||
'episode_number': episode_number,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class AmericasTestKitchenSeasonIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
# ATK Season
|
||||||
|
'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'season_1',
|
||||||
|
'title': 'Season 1',
|
||||||
|
},
|
||||||
|
'playlist_count': 13,
|
||||||
|
}, {
|
||||||
|
# Cooks Country Season
|
||||||
|
'url': 'https://www.cookscountry.com/episodes/browse/season_12',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'season_12',
|
||||||
|
'title': 'Season 12',
|
||||||
|
},
|
||||||
|
'playlist_count': 13,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
show_name, season_number = re.match(self._VALID_URL, url).groups()
|
||||||
|
season_number = int(season_number)
|
||||||
|
|
||||||
|
slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
|
||||||
|
|
||||||
|
season = 'Season %d' % season_number
|
||||||
|
|
||||||
|
season_search = self._download_json(
|
||||||
|
'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
|
||||||
|
season, headers={
|
||||||
|
'Origin': 'https://www.%s.com' % show_name,
|
||||||
|
'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
|
||||||
|
'X-Algolia-Application-Id': 'Y1FNZXUI30',
|
||||||
|
}, query={
|
||||||
|
'facetFilters': json.dumps([
|
||||||
|
'search_season_list:' + season,
|
||||||
|
'search_document_klass:episode',
|
||||||
|
'search_show_slug:' + slug,
|
||||||
|
]),
|
||||||
|
'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
|
||||||
|
'attributesToHighlight': '',
|
||||||
|
'hitsPerPage': 1000,
|
||||||
|
})
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
for episode in (season_search.get('hits') or []):
|
||||||
|
search_url = episode.get('search_url')
|
||||||
|
if not search_url:
|
||||||
|
continue
|
||||||
|
yield {
|
||||||
|
'_type': 'url',
|
||||||
|
'url': 'https://www.%s.com%s' % (show_name, search_url),
|
||||||
|
'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
|
||||||
|
'title': episode.get('title'),
|
||||||
|
'description': episode.get('description'),
|
||||||
|
'timestamp': unified_timestamp(episode.get('search_document_date')),
|
||||||
|
'season_number': season_number,
|
||||||
|
'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
|
||||||
|
'ie_key': AmericasTestKitchenIE.ie_key(),
|
||||||
|
}
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries(), 'season_%d' % season_number, season)
|
||||||
|
|
|
@ -8,6 +8,7 @@ from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -88,7 +89,7 @@ class AMPIE(InfoExtractor):
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
|
timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
|
|
@ -116,8 +116,6 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||||
r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
|
r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
|
||||||
webpage, 'anime description', default=None)
|
webpage, 'anime description', default=None)
|
||||||
|
|
||||||
entries = []
|
|
||||||
|
|
||||||
def extract_info(html, video_id, num=None):
|
def extract_info(html, video_id, num=None):
|
||||||
title, description = [None] * 2
|
title, description = [None] * 2
|
||||||
formats = []
|
formats = []
|
||||||
|
@ -233,7 +231,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||||
self._sort_formats(info['formats'])
|
self._sort_formats(info['formats'])
|
||||||
f = common_info.copy()
|
f = common_info.copy()
|
||||||
f.update(info)
|
f.update(info)
|
||||||
entries.append(f)
|
yield f
|
||||||
|
|
||||||
# Extract teaser/trailer only when full episode is not available
|
# Extract teaser/trailer only when full episode is not available
|
||||||
if not info['formats']:
|
if not info['formats']:
|
||||||
|
@ -247,7 +245,7 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||||
'title': m.group('title'),
|
'title': m.group('title'),
|
||||||
'url': urljoin(url, m.group('href')),
|
'url': urljoin(url, m.group('href')),
|
||||||
})
|
})
|
||||||
entries.append(f)
|
yield f
|
||||||
|
|
||||||
def extract_episodes(html):
|
def extract_episodes(html):
|
||||||
for num, episode_html in enumerate(re.findall(
|
for num, episode_html in enumerate(re.findall(
|
||||||
|
@ -275,7 +273,8 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||||
'episode_number': episode_number,
|
'episode_number': episode_number,
|
||||||
}
|
}
|
||||||
|
|
||||||
extract_entries(episode_html, video_id, common_info)
|
for e in extract_entries(episode_html, video_id, common_info):
|
||||||
|
yield e
|
||||||
|
|
||||||
def extract_film(html, video_id):
|
def extract_film(html, video_id):
|
||||||
common_info = {
|
common_info = {
|
||||||
|
@ -283,11 +282,18 @@ class AnimeOnDemandIE(InfoExtractor):
|
||||||
'title': anime_title,
|
'title': anime_title,
|
||||||
'description': anime_description,
|
'description': anime_description,
|
||||||
}
|
}
|
||||||
extract_entries(html, video_id, common_info)
|
for e in extract_entries(html, video_id, common_info):
|
||||||
|
yield e
|
||||||
|
|
||||||
extract_episodes(webpage)
|
def entries():
|
||||||
|
has_episodes = False
|
||||||
|
for e in extract_episodes(webpage):
|
||||||
|
has_episodes = True
|
||||||
|
yield e
|
||||||
|
|
||||||
if not entries:
|
if not has_episodes:
|
||||||
extract_film(webpage, anime_id)
|
for e in extract_film(webpage, anime_id):
|
||||||
|
yield e
|
||||||
|
|
||||||
return self.playlist_result(entries, anime_id, anime_title, anime_description)
|
return self.playlist_result(
|
||||||
|
entries(), anime_id, anime_title, anime_description)
|
||||||
|
|
|
@ -116,7 +116,76 @@ class AnvatoIE(InfoExtractor):
|
||||||
'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn',
|
'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn',
|
||||||
'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W',
|
'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W',
|
||||||
'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ',
|
'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ',
|
||||||
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
|
'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
|
||||||
|
'X8POa4zPPaKVZHqmWjuEzfP31b1QM9VN': 'Dn5vOY9ooDw7VSl9qztjZI5o0g08mA0z',
|
||||||
|
'M2v78QkBMpNJlSPp9diX5F2PBmBy6Bog': 'ka6K32kyo7nDZfNkjQCGWf1lpApXMd1B',
|
||||||
|
'bvJ0dQpav07l0hG5JgfVLF2dv1vARwpP': 'BzoQW24GrJZoJfmNodiJKSPeB9B8NOxj',
|
||||||
|
'lxQMLg2XZKuEZaWgsqubBxV9INZ6bryY': 'Vm2Mx6noKds9jB71h6urazwlTG3m9x8l',
|
||||||
|
'04EnjvXeoSmkbJ9ckPs7oY0mcxv7PlyN': 'aXERQP9LMfQVlEDsgGs6eEA1SWznAQ8P',
|
||||||
|
'mQbO2ge6BFRWVPYCYpU06YvNt80XLvAX': 'E2BV1NGmasN5v7eujECVPJgwflnLPm2A',
|
||||||
|
'g43oeBzJrCml7o6fa5fRL1ErCdeD8z4K': 'RX34mZ6zVH4Nr6whbxIGLv9WSbxEKo8V',
|
||||||
|
'VQrDJoP7mtdBzkxhXbSPwGB1coeElk4x': 'j2VejQx0VFKQepAF7dI0mJLKtOVJE18z',
|
||||||
|
'WxA5NzLRjCrmq0NUgaU5pdMDuZO7RJ4w': 'lyY5ADLKaIOLEgAsGQCveEMAcqnx3rY9',
|
||||||
|
'M4lpMXB71ie0PjMCjdFzVXq0SeRVqz49': 'n2zVkOqaLIv3GbLfBjcwW51LcveWOZ2e',
|
||||||
|
'dyDZGEqN8u8nkJZcJns0oxYmtP7KbGAn': 'VXOEqQW9BtEVLajfZQSLEqxgS5B7qn2D',
|
||||||
|
'E7QNjrVY5u5mGvgu67IoDgV1CjEND8QR': 'rz8AaDmdKIkLmPNhB5ILPJnjS5PnlL8d',
|
||||||
|
'a4zrqjoKlfzg0dwHEWtP31VqcLBpjm4g': 'LY9J16gwETdGWa3hjBu5o0RzuoQDjqXQ',
|
||||||
|
'dQP5BZroMsMVLO1hbmT5r2Enu86GjxA6': '7XR3oOdbPF6x3PRFLDCq9RkgsRjAo48V',
|
||||||
|
'M4lKNBO1NFe0PjMCj1tzVXq0SeRVqzA9': 'n2zoRqGLRUv3GbLfBmTwW51LcveWOZYe',
|
||||||
|
'nAZ7MZdpGCGg1pqFEbsoJOz2C60mv143': 'dYJgdqA9aT4yojETqGi7yNgoFADxqmXP',
|
||||||
|
'3y1MERYgOuE9NzbFgwhV6Wv2F0YKvbyz': '081xpZDQgC4VadLTavhWQxrku56DAgXV',
|
||||||
|
'bmQvmEXr5HWklBMCZOcpE2Z3HBYwqGyl': 'zxXPbVNyMiMAZldhr9FkOmA0fl4aKr2v',
|
||||||
|
'wA7oDNYldfr6050Hwxi52lPZiVlB86Ap': 'ZYK16aA7ni0d3l3c34uwpxD7CbReMm8Q',
|
||||||
|
'g43MbKMWmFml7o7sJoSRkXxZiXRvJ3QK': 'RX3oBJonvs4Nr6rUWBCGn3matRGqJPXV',
|
||||||
|
'mA9VdlqpLS0raGaSDvtoqNrBTzb8XY4q': '0XN4OjBD3fnW7r7IbmtJB4AyfOmlrE2r',
|
||||||
|
'mAajOwgkGt17oGoFmEuklMP9H0GnW54d': 'lXbBLPGyzikNGeGujAuAJGjZiwLRxyXR',
|
||||||
|
'vy8vjJ9kbUwrRqRu59Cj5dWZfzYErlAb': 'K8l7gpwaGcBpnAnCLNCmPZRdin3eaQX0',
|
||||||
|
'xQMWBpR8oHEZaWaSMGUb0avOHjLVYn4Y': 'm2MrN4vEaf9jB7BFy5Srb40jTrN67AYl',
|
||||||
|
'xyKEmVO3miRr6D6UVkt7oB8jtD6aJEAv': 'g2ddDebqDfqdgKgswyUKwGjbTWwzq923',
|
||||||
|
'7Qk0wa2D9FjKapacoJF27aLvUDKkLGA0': 'b2kgBEkephJaMkMTL7s1PLe4Ua6WyP2P',
|
||||||
|
'3QLg6nqmNTJ5VvVTo7f508LPidz1xwyY': 'g2L1GgpraipmAOAUqmIbBnPxHOmw4MYa',
|
||||||
|
'3y1B7zZjXTE9NZNSzZSVNPZaTNLjo6Qz': '081b5G6wzH4VagaURmcWbN5mT4JGEe2V',
|
||||||
|
'lAqnwvkw6SG6D8DSqmUg6DRLUp0w3G4x': 'O2pbP0xPDFNJjpjIEvcdryOJtpkVM4X5',
|
||||||
|
'awA7xd1N0Hr6050Hw2c52lPZiVlB864p': 'GZYKpn4aoT0d3l3c3PiwpxD7CbReMmXQ',
|
||||||
|
'jQVqPLl9YHL1WGWtR1HDgWBGT63qRNyV': '6X03ne6vrU4oWyWUN7tQVoajikxJR3Ye',
|
||||||
|
'GQRMR8mL7uZK797t7xH3eNzPIP5dOny1': 'm2vqPWGd4U31zWzSyasDRAoMT1PKRp8o',
|
||||||
|
'zydq9RdmRhXLkNkfNoTJlMzaF0lWekQB': '3X7LnvE7vH5nkEkSqLiey793Un7dLB8e',
|
||||||
|
'VQrDzwkB2IdBzjzu9MHPbEYkSB50gR4x': 'j2VebLzoKUKQeEesmVh0gM1eIp9jKz8z',
|
||||||
|
'mAa2wMamBs17oGoFmktklMP9H0GnW54d': 'lXbgP74xZTkNGeGujVUAJGjZiwLRxy8R',
|
||||||
|
'7yjB6ZLG6sW8R6RF2xcan1KGfJ5dNoyd': 'wXQkPorvPHZ45N5t4Jf6qwg5Tp4xvw29',
|
||||||
|
'a4zPpNeWGuzg0m0iX3tPeanGSkRKWXQg': 'LY9oa3QAyHdGW9Wu3Ri5JGeEik7l1N8Q',
|
||||||
|
'k2rneA2M38k25cXDwwSknTJlxPxQLZ6M': '61lyA2aEVDzklfdwmmh31saPxQx2VRjp',
|
||||||
|
'bK9Zk4OvPnvxduLgxvi8VUeojnjA02eV': 'o5jANYjbeMb4nfBaQvcLAt1jzLzYx6ze',
|
||||||
|
'5VD6EydM3R9orHmNMGInGCJwbxbQvGRw': 'w3zjmX7g4vnxzCxElvUEOiewkokXprkZ',
|
||||||
|
'70X35QbVYVYNPUmP9YfbzI06YqYQk2R1': 'vG4Aj2BMjMjoztB7zeFOnCVPJpJ8lMOa',
|
||||||
|
'26qYwQVG9p1Bks2GgBckjfDJOXOAMgG1': 'r4ev9X0mv5zqJc0yk5IBDcQOwZw8mnwQ',
|
||||||
|
'rvVKpA56MBXWlSxMw3cobT5pdkd4Dm7q': '1J7ZkY53pZ645c93owcLZuveE7E8B3rL',
|
||||||
|
'qN1zdy1zlYL23IWZGWtDvfV6WeWQWkJo': 'qN1zdy1zlYL23IWZGWtDvfV6WeWQWkJo',
|
||||||
|
'jdKqRGF16dKsBviMDae7IGDl7oTjEbVV': 'Q09l7vhlNxPFErIOK6BVCe7KnwUW5DVV',
|
||||||
|
'3QLkogW1OUJ5VvPsrDH56DY2u7lgZWyY': 'g2LRE1V9espmAOPhE4ubj4ZdUA57yDXa',
|
||||||
|
'wyJvWbXGBSdbkEzhv0CW8meou82aqRy8': 'M2wolPvyBIpQGkbT4juedD4ruzQGdK2y',
|
||||||
|
'7QkdZrzEkFjKap6IYDU2PB0oCNZORmA0': 'b2kN1l96qhJaMkPs9dt1lpjBfwqZoA8P',
|
||||||
|
'pvA05113MHG1w3JTYxc6DVlRCjErVz4O': 'gQXeAbblBUnDJ7vujbHvbRd1cxlz3AXO',
|
||||||
|
'mA9blJDZwT0raG1cvkuoeVjLC7ZWd54q': '0XN9jRPwMHnW7rvumgfJZOD9CJgVkWYr',
|
||||||
|
'5QwRN5qKJTvGKlDTmnf7xwNZcjRmvEy9': 'R2GP6LWBJU1QlnytwGt0B9pytWwAdDYy',
|
||||||
|
'eyn5rPPbkfw2KYxH32fG1q58CbLJzM40': 'p2gyqooZnS56JWeiDgfmOy1VugOQEBXn',
|
||||||
|
'3BABn3b5RfPJGDwilbHe7l82uBoR05Am': '7OYZG7KMVhbPdKJS3xcWEN3AuDlLNmXj',
|
||||||
|
'xA5zNGXD3HrmqMlF6OS5pdMDuZO7RJ4w': 'yY5DAm6r1IOLE3BCVMFveEMAcqnx3r29',
|
||||||
|
'g43PgW3JZfml7o6fDEURL1ErCdeD8zyK': 'RX3aQn1zrS4Nr6whDgCGLv9WSbxEKo2V',
|
||||||
|
'lAqp8WbGgiG6D8LTKJcg3O72CDdre1Qx': 'O2pnm6473HNJjpKuVosd3vVeh975yrX5',
|
||||||
|
'wyJbYEDxKSdbkJ6S6RhW8meou82aqRy8': 'M2wPm7EgRSpQGlAh70CedD4ruzQGdKYy',
|
||||||
|
'M4lgW28nLCe0PVdtaXszVXq0SeRVqzA9': 'n2zmJvg4jHv3G0ETNgiwW51LcveWOZ8e',
|
||||||
|
'5Qw3OVvp9FvGKlDTmOC7xwNZcjRmvEQ9': 'R2GzDdml9F1Qlnytw9s0B9pytWwAdD8y',
|
||||||
|
'vy8a98X7zCwrRqbHrLUjYzwDiK2b70Qb': 'K8lVwzyjZiBpnAaSGeUmnAgxuGOBxmY0',
|
||||||
|
'g4eGjJLLoiqRD3Pf9oT5O03LuNbLRDQp': '6XqD59zzpfN4EwQuaGt67qNpSyRBlnYy',
|
||||||
|
'g43OPp9boIml7o6fDOIRL1ErCdeD8z4K': 'RX33alNB4s4Nr6whDPUGLv9WSbxEKoXV',
|
||||||
|
'xA2ng9OkBcGKzDbTkKsJlx7dUK8R3dA5': 'z2aPnJvzBfObkwGC3vFaPxeBhxoMqZ8K',
|
||||||
|
'xyKEgBajZuRr6DEC0Kt7XpD1cnNW9gAv': 'g2ddlEBvRsqdgKaI4jUK9PrgfMexGZ23',
|
||||||
|
'BAogww51jIMa2JnH1BcYpXM5F658RNAL': 'rYWDmm0KptlkGv4FGJFMdZmjs9RDE6XR',
|
||||||
|
'BAokpg62VtMa2JnH1mHYpXM5F658RNAL': 'rYWryDnlNslkGv4FG4HMdZmjs9RDE62R',
|
||||||
|
'a4z1Px5e2hzg0m0iMMCPeanGSkRKWXAg': 'LY9eorNQGUdGW9WuKKf5JGeEik7l1NYQ',
|
||||||
|
'kAx69R58kF9nY5YcdecJdl2pFXP53WyX': 'gXyRxELpbfPvLeLSaRil0mp6UEzbZJ8L',
|
||||||
|
'BAoY13nwViMa2J2uo2cY6BlETgmdwryL': 'rYWwKzJmNFlkGvGtNoUM9bzwIJVzB1YR',
|
||||||
}
|
}
|
||||||
|
|
||||||
_MCP_TO_ACCESS_KEY_TABLE = {
|
_MCP_TO_ACCESS_KEY_TABLE = {
|
||||||
|
@ -189,19 +258,17 @@ class AnvatoIE(InfoExtractor):
|
||||||
|
|
||||||
video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii')
|
video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii')
|
||||||
anvrid = md5_text(time.time() * 1000 * random.random())[:30]
|
anvrid = md5_text(time.time() * 1000 * random.random())[:30]
|
||||||
payload = {
|
api = {
|
||||||
'api': {
|
|
||||||
'anvrid': anvrid,
|
'anvrid': anvrid,
|
||||||
'anvstk': md5_text('%s|%s|%d|%s' % (
|
|
||||||
access_key, anvrid, server_time,
|
|
||||||
self._ANVACK_TABLE.get(access_key, self._API_KEY))),
|
|
||||||
'anvts': server_time,
|
'anvts': server_time,
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
api['anvstk'] = md5_text('%s|%s|%d|%s' % (
|
||||||
|
access_key, anvrid, server_time,
|
||||||
|
self._ANVACK_TABLE.get(access_key, self._API_KEY)))
|
||||||
|
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
video_data_url, video_id, transform_source=strip_jsonp,
|
video_data_url, video_id, transform_source=strip_jsonp,
|
||||||
data=json.dumps(payload).encode('utf-8'))
|
data=json.dumps({'api': api}).encode('utf-8'))
|
||||||
|
|
||||||
def _get_anvato_videos(self, access_key, video_id):
|
def _get_anvato_videos(self, access_key, video_id):
|
||||||
video_data = self._get_video_json(access_key, video_id)
|
video_data = self._get_video_json(access_key, video_id)
|
||||||
|
@ -259,7 +326,7 @@ class AnvatoIE(InfoExtractor):
|
||||||
'description': video_data.get('def_description'),
|
'description': video_data.get('def_description'),
|
||||||
'tags': video_data.get('def_tags', '').split(','),
|
'tags': video_data.get('def_tags', '').split(','),
|
||||||
'categories': video_data.get('categories'),
|
'categories': video_data.get('categories'),
|
||||||
'thumbnail': video_data.get('thumbnail'),
|
'thumbnail': video_data.get('src_image_url') or video_data.get('thumbnail'),
|
||||||
'timestamp': int_or_none(video_data.get(
|
'timestamp': int_or_none(video_data.get(
|
||||||
'ts_published') or video_data.get('ts_added')),
|
'ts_published') or video_data.get('ts_added')),
|
||||||
'uploader': video_data.get('mcp_id'),
|
'uploader': video_data.get('mcp_id'),
|
||||||
|
|
|
@ -3,7 +3,7 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .yahoo import YahooIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
|
@ -15,9 +15,9 @@ from ..utils import (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class AolIE(InfoExtractor):
|
class AolIE(YahooIE):
|
||||||
IE_NAME = 'aol.com'
|
IE_NAME = 'aol.com'
|
||||||
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)'
|
_VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# video with 5min ID
|
# video with 5min ID
|
||||||
|
@ -76,10 +76,16 @@ class AolIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
|
'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Yahoo video
|
||||||
|
'url': 'https://www.aol.com/video/play/991e6700-ac02-11ea-99ff-357400036f61/24bbc846-3e30-3c46-915e-fe8ccd7fcc46/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
if '-' in video_id:
|
||||||
|
return self._extract_yahoo_video(video_id, 'us')
|
||||||
|
|
||||||
response = self._download_json(
|
response = self._download_json(
|
||||||
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
|
'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
|
||||||
|
|
|
@ -6,25 +6,21 @@ import re
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
js_to_json,
|
int_or_none,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class APAIE(InfoExtractor):
|
class APAIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
_VALID_URL = r'(?P<base_url>https?://[^/]+\.apa\.at)/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
|
'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
|
||||||
'md5': '2b12292faeb0a7d930c778c7a5b4759b',
|
'md5': '2b12292faeb0a7d930c778c7a5b4759b',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'jjv85FdZ',
|
'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
|
'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
|
||||||
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'duration': 254,
|
|
||||||
'timestamp': 1519211149,
|
|
||||||
'upload_date': '20180221',
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
|
'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
|
||||||
|
@ -46,9 +42,11 @@ class APAIE(InfoExtractor):
|
||||||
webpage)]
|
webpage)]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id, base_url = mobj.group('id', 'base_url')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(
|
||||||
|
'%s/player/%s' % (base_url, video_id), video_id)
|
||||||
|
|
||||||
jwplatform_id = self._search_regex(
|
jwplatform_id = self._search_regex(
|
||||||
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
|
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
|
||||||
|
@ -59,16 +57,18 @@ class APAIE(InfoExtractor):
|
||||||
'jwplatform:' + jwplatform_id, ie='JWPlatform',
|
'jwplatform:' + jwplatform_id, ie='JWPlatform',
|
||||||
video_id=video_id)
|
video_id=video_id)
|
||||||
|
|
||||||
sources = self._parse_json(
|
def extract(field, name=None):
|
||||||
self._search_regex(
|
return self._search_regex(
|
||||||
r'sources\s*=\s*(\[.+?\])\s*;', webpage, 'sources'),
|
r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field,
|
||||||
video_id, transform_source=js_to_json)
|
webpage, name or field, default=None, group='value')
|
||||||
|
|
||||||
|
title = extract('title') or video_id
|
||||||
|
description = extract('description')
|
||||||
|
thumbnail = extract('poster', 'thumbnail')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for source in sources:
|
for format_id in ('hls', 'progressive'):
|
||||||
if not isinstance(source, dict):
|
source_url = url_or_none(extract(format_id))
|
||||||
continue
|
|
||||||
source_url = url_or_none(source.get('file'))
|
|
||||||
if not source_url:
|
if not source_url:
|
||||||
continue
|
continue
|
||||||
ext = determine_ext(source_url)
|
ext = determine_ext(source_url)
|
||||||
|
@ -77,18 +77,19 @@ class APAIE(InfoExtractor):
|
||||||
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
else:
|
else:
|
||||||
|
height = int_or_none(self._search_regex(
|
||||||
|
r'(\d+)\.mp4', source_url, 'height', default=None))
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': source_url,
|
'url': source_url,
|
||||||
|
'format_id': format_id,
|
||||||
|
'height': height,
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
thumbnail = self._search_regex(
|
|
||||||
r'image\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
|
||||||
'thumbnail', fatal=False, group='url')
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': video_id,
|
'title': title,
|
||||||
|
'description': description,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
get_element_by_id,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
|
@ -39,23 +40,15 @@ class AparatIE(InfoExtractor):
|
||||||
webpage = self._download_webpage(url, video_id, fatal=False)
|
webpage = self._download_webpage(url, video_id, fatal=False)
|
||||||
|
|
||||||
if not webpage:
|
if not webpage:
|
||||||
# Note: There is an easier-to-parse configuration at
|
|
||||||
# http://www.aparat.com/video/video/config/videohash/%video_id
|
|
||||||
# but the URL in there does not work
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
|
||||||
video_id)
|
video_id)
|
||||||
|
|
||||||
options = self._parse_json(
|
options = self._parse_json(self._search_regex(
|
||||||
self._search_regex(
|
r'options\s*=\s*({.+?})\s*;', webpage, 'options'), video_id)
|
||||||
r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
|
|
||||||
webpage, 'options', group='value'),
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
player = options['plugins']['sabaPlayerPlugin']
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for sources in player['multiSRC']:
|
for sources in (options.get('multiSRC') or []):
|
||||||
for item in sources:
|
for item in sources:
|
||||||
if not isinstance(item, dict):
|
if not isinstance(item, dict):
|
||||||
continue
|
continue
|
||||||
|
@ -85,11 +78,12 @@ class AparatIE(InfoExtractor):
|
||||||
info = self._search_json_ld(webpage, video_id, default={})
|
info = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
|
||||||
if not info.get('title'):
|
if not info.get('title'):
|
||||||
info['title'] = player['title']
|
info['title'] = get_element_by_id('videoTitle', webpage) or \
|
||||||
|
self._html_search_meta(['og:title', 'twitter:title', 'DC.Title', 'title'], webpage, fatal=True)
|
||||||
|
|
||||||
return merge_dicts(info, {
|
return merge_dicts(info, {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'thumbnail': url_or_none(options.get('poster')),
|
'thumbnail': url_or_none(options.get('poster')),
|
||||||
'duration': int_or_none(player.get('duration')),
|
'duration': int_or_none(options.get('duration')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
})
|
})
|
||||||
|
|
61
haruhi_dl/extractor/applepodcasts.py
Normal file
61
haruhi_dl/extractor/applepodcasts.py
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_podcast_url,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ApplePodcastsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||||
|
'md5': 'df02e6acb11c10e844946a39e7222b08',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1000482637777',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '207 - Whitney Webb Returns',
|
||||||
|
'description': 'md5:13a73bade02d2e43737751e3987e1399',
|
||||||
|
'upload_date': '20200705',
|
||||||
|
'timestamp': 1593921600,
|
||||||
|
'duration': 6425,
|
||||||
|
'series': 'The Tim Dillon Show',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns?i=1000482637777',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://podcasts.apple.com/podcast/id1135137367?i=1000482637777',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
episode_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, episode_id)
|
||||||
|
ember_data = self._parse_json(self._search_regex(
|
||||||
|
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||||
|
webpage, 'ember data'), episode_id)
|
||||||
|
episode = ember_data['data']['attributes']
|
||||||
|
description = episode.get('description') or {}
|
||||||
|
|
||||||
|
series = None
|
||||||
|
for inc in (ember_data.get('included') or []):
|
||||||
|
if inc.get('type') == 'media/podcast':
|
||||||
|
series = try_get(inc, lambda x: x['attributes']['name'])
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': episode_id,
|
||||||
|
'title': episode['name'],
|
||||||
|
'url': clean_podcast_url(episode['assetUrl']),
|
||||||
|
'description': description.get('standard') or description.get('short'),
|
||||||
|
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
|
||||||
|
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
|
||||||
|
'series': series,
|
||||||
|
}
|
|
@ -2,15 +2,17 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unified_strdate,
|
|
||||||
clean_html,
|
clean_html,
|
||||||
|
extract_attributes,
|
||||||
|
unified_strdate,
|
||||||
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class ArchiveOrgIE(InfoExtractor):
|
class ArchiveOrgIE(InfoExtractor):
|
||||||
IE_NAME = 'archive.org'
|
IE_NAME = 'archive.org'
|
||||||
IE_DESC = 'archive.org videos'
|
IE_DESC = 'archive.org videos'
|
||||||
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
|
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||||
'md5': '8af1d4cf447933ed3c7f4871162602db',
|
'md5': '8af1d4cf447933ed3c7f4871162602db',
|
||||||
|
@ -19,8 +21,11 @@ class ArchiveOrgIE(InfoExtractor):
|
||||||
'ext': 'ogg',
|
'ext': 'ogg',
|
||||||
'title': '1968 Demo - FJCC Conference Presentation Reel #1',
|
'title': '1968 Demo - FJCC Conference Presentation Reel #1',
|
||||||
'description': 'md5:da45c349df039f1cc8075268eb1b5c25',
|
'description': 'md5:da45c349df039f1cc8075268eb1b5c25',
|
||||||
'upload_date': '19681210',
|
'creator': 'SRI International',
|
||||||
'uploader': 'SRI International'
|
'release_date': '19681210',
|
||||||
|
'uploader': 'SRI International',
|
||||||
|
'timestamp': 1268695290,
|
||||||
|
'upload_date': '20100315',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://archive.org/details/Cops1922',
|
'url': 'https://archive.org/details/Cops1922',
|
||||||
|
@ -29,22 +34,43 @@ class ArchiveOrgIE(InfoExtractor):
|
||||||
'id': 'Cops1922',
|
'id': 'Cops1922',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Buster Keaton\'s "Cops" (1922)',
|
'title': 'Buster Keaton\'s "Cops" (1922)',
|
||||||
'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6',
|
'description': 'md5:43a603fd6c5b4b90d12a96b921212b9c',
|
||||||
|
'timestamp': 1387699629,
|
||||||
|
'upload_date': '20131222',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://archive.org/details/MSNBCW_20131125_040000_To_Catch_a_Predator/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://archive.org/embed/' + video_id, video_id)
|
'http://archive.org/embed/' + video_id, video_id)
|
||||||
jwplayer_playlist = self._parse_json(self._search_regex(
|
|
||||||
|
playlist = None
|
||||||
|
play8 = self._search_regex(
|
||||||
|
r'(<[^>]+\bclass=["\']js-play8-playlist[^>]+>)', webpage,
|
||||||
|
'playlist', default=None)
|
||||||
|
if play8:
|
||||||
|
attrs = extract_attributes(play8)
|
||||||
|
playlist = attrs.get('value')
|
||||||
|
if not playlist:
|
||||||
|
# Old jwplayer fallback
|
||||||
|
playlist = self._search_regex(
|
||||||
r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
|
r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
|
||||||
webpage, 'jwplayer playlist'), video_id)
|
webpage, 'jwplayer playlist', default='[]')
|
||||||
|
jwplayer_playlist = self._parse_json(playlist, video_id, fatal=False)
|
||||||
|
if jwplayer_playlist:
|
||||||
info = self._parse_jwplayer_data(
|
info = self._parse_jwplayer_data(
|
||||||
{'playlist': jwplayer_playlist}, video_id, base_url=url)
|
{'playlist': jwplayer_playlist}, video_id, base_url=url)
|
||||||
|
else:
|
||||||
|
# HTML5 media fallback
|
||||||
|
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||||
|
info['id'] = video_id
|
||||||
|
|
||||||
def get_optional(metadata, field):
|
def get_optional(metadata, field):
|
||||||
return metadata.get(field, [None])[0]
|
return metadata.get(field, [None])[0]
|
||||||
|
@ -58,8 +84,12 @@ class ArchiveOrgIE(InfoExtractor):
|
||||||
'description': clean_html(get_optional(metadata, 'description')),
|
'description': clean_html(get_optional(metadata, 'description')),
|
||||||
})
|
})
|
||||||
if info.get('_type') != 'playlist':
|
if info.get('_type') != 'playlist':
|
||||||
|
creator = get_optional(metadata, 'creator')
|
||||||
info.update({
|
info.update({
|
||||||
'uploader': get_optional(metadata, 'creator'),
|
'creator': creator,
|
||||||
'upload_date': unified_strdate(get_optional(metadata, 'date')),
|
'release_date': unified_strdate(get_optional(metadata, 'date')),
|
||||||
|
'uploader': get_optional(metadata, 'publisher') or creator,
|
||||||
|
'timestamp': unified_timestamp(get_optional(metadata, 'publicdate')),
|
||||||
|
'language': get_optional(metadata, 'language'),
|
||||||
})
|
})
|
||||||
return info
|
return info
|
||||||
|
|
174
haruhi_dl/extractor/arcpublishing.py
Normal file
174
haruhi_dl/extractor/arcpublishing.py
Normal file
|
@ -0,0 +1,174 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ArcPublishingIE(InfoExtractor):
|
||||||
|
_UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
|
||||||
|
_VALID_URL = r'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX
|
||||||
|
_TESTS = [{
|
||||||
|
# https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/
|
||||||
|
'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# https://www.bostonglobe.com/video/2020/12/30/metro/footage-released-showing-officer-talking-about-striking-protesters-with-car/
|
||||||
|
'url': 'arcpublishing:bostonglobe:232b7ae6-7d73-432d-bc0a-85dbf0119ab1',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# https://www.actionnewsjax.com/video/live-stream/
|
||||||
|
'url': 'arcpublishing:cmg:cfb1cf1b-3ab5-4d1b-86c5-a5515d311f2a',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# https://elcomercio.pe/videos/deportes/deporte-total-futbol-peruano-seleccion-peruana-la-valorizacion-de-los-peruanos-en-el-exterior-tras-un-2020-atipico-nnav-vr-video-noticia/
|
||||||
|
'url': 'arcpublishing:elcomercio:27a7e1f8-2ec7-4177-874f-a4feed2885b3',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# https://www.clickondetroit.com/video/community/2020/05/15/events-surrounding-woodward-dream-cruise-being-canceled/
|
||||||
|
'url': 'arcpublishing:gmg:c8793fb2-8d44-4242-881e-2db31da2d9fe',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# https://www.wabi.tv/video/2020/12/30/trenton-company-making-equipment-pfizer-covid-vaccine/
|
||||||
|
'url': 'arcpublishing:gray:0b0ba30e-032a-4598-8810-901d70e6033e',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# https://www.lateja.cr/el-mundo/video-china-aprueba-con-condiciones-su-primera/dfcbfa57-527f-45ff-a69b-35fe71054143/video/
|
||||||
|
'url': 'arcpublishing:gruponacion:dfcbfa57-527f-45ff-a69b-35fe71054143',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# https://www.fifthdomain.com/video/2018/03/09/is-america-vulnerable-to-a-cyber-attack/
|
||||||
|
'url': 'arcpublishing:mco:aa0ca6fe-1127-46d4-b32c-be0d6fdb8055',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# https://www.vl.no/kultur/2020/12/09/en-melding-fra-en-lytter-endret-julelista-til-lewi-bergrud/
|
||||||
|
'url': 'arcpublishing:mentormedier:47a12084-650b-4011-bfd0-3699b6947b2d',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# https://www.14news.com/2020/12/30/whiskey-theft-caught-camera-henderson-liquor-store/
|
||||||
|
'url': 'arcpublishing:raycom:b89f61f8-79fa-4c09-8255-e64237119bf7',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# https://www.theglobeandmail.com/world/video-ethiopian-woman-who-became-symbol-of-integration-in-italy-killed-on/
|
||||||
|
'url': 'arcpublishing:tgam:411b34c1-8701-4036-9831-26964711664b',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# https://www.pilotonline.com/460f2931-8130-4719-8ea1-ffcb2d7cb685-132.html
|
||||||
|
'url': 'arcpublishing:tronc:460f2931-8130-4719-8ea1-ffcb2d7cb685',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_POWA_DEFAULTS = [
|
||||||
|
(['cmg', 'prisa'], '%s-config-prod.api.cdn.arcpublishing.com/video'),
|
||||||
|
([
|
||||||
|
'adn', 'advancelocal', 'answers', 'bonnier', 'bostonglobe', 'demo',
|
||||||
|
'gmg', 'gruponacion', 'infobae', 'mco', 'nzme', 'pmn', 'raycom',
|
||||||
|
'spectator', 'tbt', 'tgam', 'tronc', 'wapo', 'wweek',
|
||||||
|
], 'video-api-cdn.%s.arcpublishing.com/api'),
|
||||||
|
]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage, **kw):
|
||||||
|
entries = []
|
||||||
|
# https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview
|
||||||
|
for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage):
|
||||||
|
powa = extract_attributes(powa_el) or {}
|
||||||
|
org = powa.get('data-org')
|
||||||
|
uuid = powa.get('data-uuid')
|
||||||
|
if org and uuid:
|
||||||
|
entries.append('arcpublishing:%s:%s' % (org, uuid))
|
||||||
|
return entries
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
org, uuid = re.match(self._VALID_URL, url).groups()
|
||||||
|
for orgs, tmpl in self._POWA_DEFAULTS:
|
||||||
|
if org in orgs:
|
||||||
|
base_api_tmpl = tmpl
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
base_api_tmpl = '%s-prod-cdn.video-api.arcpublishing.com/api'
|
||||||
|
if org == 'wapo':
|
||||||
|
org = 'washpost'
|
||||||
|
video = self._download_json(
|
||||||
|
'https://%s/v1/ansvideos/findByUuid' % (base_api_tmpl % org),
|
||||||
|
uuid, query={'uuid': uuid})[0]
|
||||||
|
title = video['headlines']['basic']
|
||||||
|
is_live = video.get('status') == 'live'
|
||||||
|
|
||||||
|
urls = []
|
||||||
|
formats = []
|
||||||
|
for s in video.get('streams', []):
|
||||||
|
s_url = s.get('url')
|
||||||
|
if not s_url or s_url in urls:
|
||||||
|
continue
|
||||||
|
urls.append(s_url)
|
||||||
|
stream_type = s.get('stream_type')
|
||||||
|
if stream_type == 'smil':
|
||||||
|
smil_formats = self._extract_smil_formats(
|
||||||
|
s_url, uuid, fatal=False)
|
||||||
|
for f in smil_formats:
|
||||||
|
if f['url'].endswith('/cfx/st'):
|
||||||
|
f['app'] = 'cfx/st'
|
||||||
|
if not f['play_path'].startswith('mp4:'):
|
||||||
|
f['play_path'] = 'mp4:' + f['play_path']
|
||||||
|
if isinstance(f['tbr'], float):
|
||||||
|
f['vbr'] = f['tbr'] * 1000
|
||||||
|
del f['tbr']
|
||||||
|
f['format_id'] = 'rtmp-%d' % f['vbr']
|
||||||
|
formats.extend(smil_formats)
|
||||||
|
elif stream_type in ('ts', 'hls'):
|
||||||
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
|
s_url, uuid, 'mp4', 'm3u8' if is_live else 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False)
|
||||||
|
if all([f.get('acodec') == 'none' for f in m3u8_formats]):
|
||||||
|
continue
|
||||||
|
for f in m3u8_formats:
|
||||||
|
if f.get('acodec') == 'none':
|
||||||
|
f['preference'] = -40
|
||||||
|
elif f.get('vcodec') == 'none':
|
||||||
|
f['preference'] = -50
|
||||||
|
height = f.get('height')
|
||||||
|
if not height:
|
||||||
|
continue
|
||||||
|
vbr = self._search_regex(
|
||||||
|
r'[_x]%d[_-](\d+)' % height, f['url'], 'vbr', default=None)
|
||||||
|
if vbr:
|
||||||
|
f['vbr'] = int(vbr)
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
|
else:
|
||||||
|
vbr = int_or_none(s.get('bitrate'))
|
||||||
|
formats.append({
|
||||||
|
'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type,
|
||||||
|
'vbr': vbr,
|
||||||
|
'width': int_or_none(s.get('width')),
|
||||||
|
'height': int_or_none(s.get('height')),
|
||||||
|
'filesize': int_or_none(s.get('filesize')),
|
||||||
|
'url': s_url,
|
||||||
|
'preference': -1,
|
||||||
|
})
|
||||||
|
self._sort_formats(
|
||||||
|
formats, ('preference', 'width', 'height', 'vbr', 'filesize', 'tbr', 'ext', 'format_id'))
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for subtitle in (try_get(video, lambda x: x['subtitles']['urls'], list) or []):
|
||||||
|
subtitle_url = subtitle.get('url')
|
||||||
|
if subtitle_url:
|
||||||
|
subtitles.setdefault('en', []).append({'url': subtitle_url})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': uuid,
|
||||||
|
'title': self._live_title(title) if is_live else title,
|
||||||
|
'thumbnail': try_get(video, lambda x: x['promo_image']['url']),
|
||||||
|
'description': try_get(video, lambda x: x['subheadlines']['basic']),
|
||||||
|
'formats': formats,
|
||||||
|
'duration': int_or_none(video.get('duration'), 100),
|
||||||
|
'timestamp': parse_iso8601(video.get('created_date')),
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'is_live': is_live,
|
||||||
|
}
|
|
@ -187,13 +187,13 @@ class ARDMediathekIE(ARDMediathekBaseIE):
|
||||||
if doc.tag == 'rss':
|
if doc.tag == 'rss':
|
||||||
return GenericIE()._extract_rss(url, video_id, doc)
|
return GenericIE()._extract_rss(url, video_id, doc)
|
||||||
|
|
||||||
title = self._html_search_regex(
|
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||||
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
|
||||||
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
r'<meta name="dcterms\.title" content="(.*?)"/>',
|
||||||
r'<h4 class="headline">(.*?)</h4>',
|
r'<h4 class="headline">(.*?)</h4>',
|
||||||
r'<title[^>]*>(.*?)</title>'],
|
r'<title[^>]*>(.*?)</title>'],
|
||||||
webpage, 'title')
|
webpage, 'title')
|
||||||
description = self._html_search_meta(
|
description = self._og_search_description(webpage, default=None) or self._html_search_meta(
|
||||||
'dcterms.abstract', webpage, 'description', default=None)
|
'dcterms.abstract', webpage, 'description', default=None)
|
||||||
if description is None:
|
if description is None:
|
||||||
description = self._html_search_meta(
|
description = self._html_search_meta(
|
||||||
|
@ -249,18 +249,18 @@ class ARDMediathekIE(ARDMediathekBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class ARDIE(InfoExtractor):
|
class ARDIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
|
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?:video-?)?(?P<id>[0-9]+))\.html'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# available till 14.02.2019
|
# available till 7.01.2022
|
||||||
'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
|
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
|
||||||
'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
|
'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
|
'display_id': 'maischberger-die-woche',
|
||||||
'id': '102',
|
'id': '100',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'duration': 4435.0,
|
'duration': 3687.0,
|
||||||
'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
|
'title': 'maischberger. die woche vom 7. Januar 2021',
|
||||||
'upload_date': '20180214',
|
'upload_date': '20210107',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
@ -284,20 +284,42 @@ class ARDIE(InfoExtractor):
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for a in video_node.findall('.//asset'):
|
for a in video_node.findall('.//asset'):
|
||||||
|
file_name = xpath_text(a, './fileName', default=None)
|
||||||
|
if not file_name:
|
||||||
|
continue
|
||||||
|
format_type = a.attrib.get('type')
|
||||||
|
format_url = url_or_none(file_name)
|
||||||
|
if format_url:
|
||||||
|
ext = determine_ext(file_name)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, display_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=format_type or 'hls', fatal=False))
|
||||||
|
continue
|
||||||
|
elif ext == 'f4m':
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
update_url_query(format_url, {'hdcore': '3.7.0'}),
|
||||||
|
display_id, f4m_id=format_type or 'hds', fatal=False))
|
||||||
|
continue
|
||||||
f = {
|
f = {
|
||||||
'format_id': a.attrib['type'],
|
'format_id': format_type,
|
||||||
'width': int_or_none(a.find('./frameWidth').text),
|
'width': int_or_none(xpath_text(a, './frameWidth')),
|
||||||
'height': int_or_none(a.find('./frameHeight').text),
|
'height': int_or_none(xpath_text(a, './frameHeight')),
|
||||||
'vbr': int_or_none(a.find('./bitrateVideo').text),
|
'vbr': int_or_none(xpath_text(a, './bitrateVideo')),
|
||||||
'abr': int_or_none(a.find('./bitrateAudio').text),
|
'abr': int_or_none(xpath_text(a, './bitrateAudio')),
|
||||||
'vcodec': a.find('./codecVideo').text,
|
'vcodec': xpath_text(a, './codecVideo'),
|
||||||
'tbr': int_or_none(a.find('./totalBitrate').text),
|
'tbr': int_or_none(xpath_text(a, './totalBitrate')),
|
||||||
}
|
}
|
||||||
if a.find('./serverPrefix').text:
|
server_prefix = xpath_text(a, './serverPrefix', default=None)
|
||||||
f['url'] = a.find('./serverPrefix').text
|
if server_prefix:
|
||||||
f['playpath'] = a.find('./fileName').text
|
f.update({
|
||||||
|
'url': server_prefix,
|
||||||
|
'playpath': file_name,
|
||||||
|
})
|
||||||
else:
|
else:
|
||||||
f['url'] = a.find('./fileName').text
|
if not format_url:
|
||||||
|
continue
|
||||||
|
f['url'] = format_url
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@ -315,17 +337,17 @@ class ARDIE(InfoExtractor):
|
||||||
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
|
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||||
'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
|
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'display_id': 'die-robuste-roswita',
|
'display_id': 'die-robuste-roswita',
|
||||||
'id': '70153354',
|
'id': '78566716',
|
||||||
'title': 'Die robuste Roswita',
|
'title': 'Die robuste Roswita',
|
||||||
'description': r're:^Der Mord.*trüber ist als die Ilm.',
|
'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita',
|
||||||
'duration': 5316,
|
'duration': 5316,
|
||||||
'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard',
|
'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard',
|
||||||
'timestamp': 1577047500,
|
'timestamp': 1596658200,
|
||||||
'upload_date': '20191222',
|
'upload_date': '20200805',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
|
|
@ -4,23 +4,57 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
qualities,
|
qualities,
|
||||||
try_get,
|
try_get,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
# There are different sources of video in arte.tv, the extraction process
|
|
||||||
# is different for each one. The videos usually expire in 7 days, so we can't
|
|
||||||
# add tests.
|
|
||||||
|
|
||||||
|
|
||||||
class ArteTVBaseIE(InfoExtractor):
|
class ArteTVBaseIE(InfoExtractor):
|
||||||
def _extract_from_json_url(self, json_url, video_id, lang, title=None):
|
_ARTE_LANGUAGES = 'fr|de|en|es|it|pl'
|
||||||
info = self._download_json(json_url, video_id)
|
_API_BASE = 'https://api.arte.tv/api/player/v1'
|
||||||
|
|
||||||
|
|
||||||
|
class ArteTVIE(ArteTVBaseIE):
|
||||||
|
_VALID_URL = r'''(?x)
|
||||||
|
https?://
|
||||||
|
(?:
|
||||||
|
(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
|
||||||
|
api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
|
||||||
|
)
|
||||||
|
/(?P<id>\d{6}-\d{3}-[AF])
|
||||||
|
''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '088501-000-A',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Mexico: Stealing Petrol to Survive',
|
||||||
|
'upload_date': '20190628',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
lang = mobj.group('lang') or mobj.group('lang_2')
|
||||||
|
|
||||||
|
info = self._download_json(
|
||||||
|
'%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id)
|
||||||
player_info = info['videoJsonPlayer']
|
player_info = info['videoJsonPlayer']
|
||||||
|
|
||||||
vsr = try_get(player_info, lambda x: x['VSR'], dict)
|
vsr = try_get(player_info, lambda x: x['VSR'], dict)
|
||||||
|
@ -37,18 +71,11 @@ class ArteTVBaseIE(InfoExtractor):
|
||||||
if not upload_date_str:
|
if not upload_date_str:
|
||||||
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
|
upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
|
||||||
|
|
||||||
title = (player_info.get('VTI') or title or player_info['VID']).strip()
|
title = (player_info.get('VTI') or player_info['VID']).strip()
|
||||||
subtitle = player_info.get('VSU', '').strip()
|
subtitle = player_info.get('VSU', '').strip()
|
||||||
if subtitle:
|
if subtitle:
|
||||||
title += ' - %s' % subtitle
|
title += ' - %s' % subtitle
|
||||||
|
|
||||||
info_dict = {
|
|
||||||
'id': player_info['VID'],
|
|
||||||
'title': title,
|
|
||||||
'description': player_info.get('VDE'),
|
|
||||||
'upload_date': unified_strdate(upload_date_str),
|
|
||||||
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
|
||||||
}
|
|
||||||
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
|
qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
|
||||||
|
|
||||||
LANGS = {
|
LANGS = {
|
||||||
|
@ -65,6 +92,10 @@ class ArteTVBaseIE(InfoExtractor):
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_dict in vsr.items():
|
for format_id, format_dict in vsr.items():
|
||||||
f = dict(format_dict)
|
f = dict(format_dict)
|
||||||
|
format_url = url_or_none(f.get('url'))
|
||||||
|
streamer = f.get('streamer')
|
||||||
|
if not format_url and not streamer:
|
||||||
|
continue
|
||||||
versionCode = f.get('versionCode')
|
versionCode = f.get('versionCode')
|
||||||
l = re.escape(langcode)
|
l = re.escape(langcode)
|
||||||
|
|
||||||
|
@ -107,6 +138,16 @@ class ArteTVBaseIE(InfoExtractor):
|
||||||
else:
|
else:
|
||||||
lang_pref = -1
|
lang_pref = -1
|
||||||
|
|
||||||
|
media_type = f.get('mediaType')
|
||||||
|
if media_type == 'hls':
|
||||||
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id=format_id, fatal=False)
|
||||||
|
for m3u8_format in m3u8_formats:
|
||||||
|
m3u8_format['language_preference'] = lang_pref
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
|
continue
|
||||||
|
|
||||||
format = {
|
format = {
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
|
'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
|
||||||
|
@ -118,7 +159,7 @@ class ArteTVBaseIE(InfoExtractor):
|
||||||
'quality': qfunc(f.get('quality')),
|
'quality': qfunc(f.get('quality')),
|
||||||
}
|
}
|
||||||
|
|
||||||
if f.get('mediaType') == 'rtmp':
|
if media_type == 'rtmp':
|
||||||
format['url'] = f['streamer']
|
format['url'] = f['streamer']
|
||||||
format['play_path'] = 'mp4:' + f['url']
|
format['play_path'] = 'mp4:' + f['url']
|
||||||
format['ext'] = 'flv'
|
format['ext'] = 'flv'
|
||||||
|
@ -127,56 +168,50 @@ class ArteTVBaseIE(InfoExtractor):
|
||||||
|
|
||||||
formats.append(format)
|
formats.append(format)
|
||||||
|
|
||||||
self._check_formats(formats, video_id)
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info_dict['formats'] = formats
|
return {
|
||||||
return info_dict
|
'id': player_info.get('VID') or video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': player_info.get('VDE'),
|
||||||
|
'upload_date': unified_strdate(upload_date_str),
|
||||||
|
'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class ArteTVPlus7IE(ArteTVBaseIE):
|
class ArteTVEmbedIE(InfoExtractor):
|
||||||
IE_NAME = 'arte.tv:+7'
|
_VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
|
||||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>\d{6}-\d{3}-[AF])'
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
|
'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '088501-000-A',
|
'id': '100605-013-A',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Mexico: Stealing Petrol to Survive',
|
'title': 'United we Stream November Lockdown Edition #13',
|
||||||
'upload_date': '20190628',
|
'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
|
||||||
|
'upload_date': '20201116',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
@staticmethod
|
||||||
lang, video_id = re.match(self._VALID_URL, url).groups()
|
def _extract_urls(webpage):
|
||||||
return self._extract_from_json_url(
|
return [url for _, url in re.findall(
|
||||||
'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id),
|
r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1',
|
||||||
video_id, lang)
|
webpage)]
|
||||||
|
|
||||||
|
|
||||||
class ArteTVEmbedIE(ArteTVPlus7IE):
|
|
||||||
IE_NAME = 'arte.tv:embed'
|
|
||||||
_VALID_URL = r'''(?x)
|
|
||||||
https://www\.arte\.tv
|
|
||||||
/player/v3/index\.php\?json_url=
|
|
||||||
(?P<json_url>
|
|
||||||
https?://api\.arte\.tv/api/player/v1/config/
|
|
||||||
(?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF])
|
|
||||||
)
|
|
||||||
'''
|
|
||||||
|
|
||||||
_TESTS = []
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
json_url, lang, video_id = re.match(self._VALID_URL, url).groups()
|
qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
|
||||||
return self._extract_from_json_url(json_url, video_id, lang)
|
json_url = qs['json_url'][0]
|
||||||
|
video_id = ArteTVIE._match_id(json_url)
|
||||||
|
return self.url_result(
|
||||||
|
json_url, ie=ArteTVIE.ie_key(), video_id=video_id)
|
||||||
|
|
||||||
|
|
||||||
class ArteTVPlaylistIE(ArteTVBaseIE):
|
class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||||
IE_NAME = 'arte.tv:playlist'
|
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
|
||||||
_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})'
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
|
'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -185,17 +220,35 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
|
||||||
'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
|
'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 6,
|
'playlist_mincount': 6,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
lang, playlist_id = re.match(self._VALID_URL, url).groups()
|
lang, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||||
collection = self._download_json(
|
collection = self._download_json(
|
||||||
'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
|
'%s/collectionData/%s/%s?source=videos'
|
||||||
% (lang, playlist_id), playlist_id)
|
% (self._API_BASE, lang, playlist_id), playlist_id)
|
||||||
|
entries = []
|
||||||
|
for video in collection['videos']:
|
||||||
|
if not isinstance(video, dict):
|
||||||
|
continue
|
||||||
|
video_url = url_or_none(video.get('url')) or url_or_none(video.get('jsonUrl'))
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
video_id = video.get('programId')
|
||||||
|
entries.append({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': video_url,
|
||||||
|
'id': video_id,
|
||||||
|
'title': video.get('title'),
|
||||||
|
'alt_title': video.get('subtitle'),
|
||||||
|
'thumbnail': url_or_none(try_get(video, lambda x: x['mainImage']['url'], compat_str)),
|
||||||
|
'duration': int_or_none(video.get('durationSeconds')),
|
||||||
|
'view_count': int_or_none(video.get('views')),
|
||||||
|
'ie_key': ArteTVIE.ie_key(),
|
||||||
|
})
|
||||||
title = collection.get('title')
|
title = collection.get('title')
|
||||||
description = collection.get('shortDescription') or collection.get('teaserText')
|
description = collection.get('shortDescription') or collection.get('teaserText')
|
||||||
entries = [
|
|
||||||
self._extract_from_json_url(
|
|
||||||
video['jsonUrl'], video.get('programId') or playlist_id, lang)
|
|
||||||
for video in collection['videos'] if video.get('jsonUrl')]
|
|
||||||
return self.playlist_result(entries, playlist_id, title, description)
|
return self.playlist_result(entries, playlist_id, title, description)
|
||||||
|
|
|
@ -1,27 +1,91 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import functools
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
from ..utils import extract_attributes
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
|
int_or_none,
|
||||||
|
OnDemandPagedList,
|
||||||
|
parse_age_limit,
|
||||||
|
strip_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class AsianCrushIE(InfoExtractor):
|
class AsianCrushBaseIE(InfoExtractor):
|
||||||
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|cocoro\.tv))'
|
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))'
|
||||||
_VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % _VALID_URL_BASE
|
_KALTURA_KEYS = [
|
||||||
|
'video_url', 'progressive_url', 'download_url', 'thumbnail_url',
|
||||||
|
'widescreen_thumbnail_url', 'screencap_widescreen',
|
||||||
|
]
|
||||||
|
_API_SUFFIX = {'retrocrush.tv': '-ott'}
|
||||||
|
|
||||||
|
def _call_api(self, host, endpoint, video_id, query, resource):
|
||||||
|
return self._download_json(
|
||||||
|
'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id,
|
||||||
|
'Downloading %s JSON metadata' % resource, query=query,
|
||||||
|
headers=self.geo_verification_headers())['objects']
|
||||||
|
|
||||||
|
def _download_object_data(self, host, object_id, resource):
|
||||||
|
return self._call_api(
|
||||||
|
host, 'search', object_id, {'id': object_id}, resource)[0]
|
||||||
|
|
||||||
|
def _get_object_description(self, obj):
|
||||||
|
return strip_or_none(obj.get('long_description') or obj.get('short_description'))
|
||||||
|
|
||||||
|
def _parse_video_data(self, video):
|
||||||
|
title = video['name']
|
||||||
|
|
||||||
|
entry_id, partner_id = [None] * 2
|
||||||
|
for k in self._KALTURA_KEYS:
|
||||||
|
k_url = video.get(k)
|
||||||
|
if k_url:
|
||||||
|
mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url)
|
||||||
|
if mobj:
|
||||||
|
partner_id, entry_id = mobj.groups()
|
||||||
|
break
|
||||||
|
|
||||||
|
meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or []
|
||||||
|
categories = list(filter(None, [c.get('name') for c in meta_categories]))
|
||||||
|
|
||||||
|
show_info = video.get('show_info') or {}
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': 'kaltura:%s:%s' % (partner_id, entry_id),
|
||||||
|
'ie_key': KalturaIE.ie_key(),
|
||||||
|
'id': entry_id,
|
||||||
|
'title': title,
|
||||||
|
'description': self._get_object_description(video),
|
||||||
|
'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')),
|
||||||
|
'categories': categories,
|
||||||
|
'series': show_info.get('show_name'),
|
||||||
|
'season_number': int_or_none(show_info.get('season_num')),
|
||||||
|
'season_id': show_info.get('season_id'),
|
||||||
|
'episode_number': int_or_none(show_info.get('episode_num')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class AsianCrushIE(AsianCrushBaseIE):
|
||||||
|
_VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
|
'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt',
|
||||||
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1_y4tmjm5r',
|
'id': '1_y4tmjm5r',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Women Who Flirt',
|
'title': 'Women Who Flirt',
|
||||||
'description': 'md5:7e986615808bcfb11756eb503a751487',
|
'description': 'md5:b65c7e0ae03a85585476a62a186f924c',
|
||||||
'timestamp': 1496936429,
|
'timestamp': 1496936429,
|
||||||
'upload_date': '20170608',
|
'upload_date': '20170608',
|
||||||
'uploader_id': 'craig@crifkin.com',
|
'uploader_id': 'craig@crifkin.com',
|
||||||
|
'age_limit': 13,
|
||||||
|
'categories': 'count:5',
|
||||||
|
'duration': 5812,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
||||||
|
@ -41,67 +105,35 @@ class AsianCrushIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
|
'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
host, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
host = mobj.group('host')
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
|
|
||||||
|
if host == 'cocoro.tv':
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
embed_vars = self._parse_json(self._search_regex(
|
||||||
entry_id, partner_id, title = [None] * 3
|
|
||||||
|
|
||||||
vars = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
|
r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
|
||||||
default='{}'), video_id, fatal=False)
|
default='{}'), video_id, fatal=False) or {}
|
||||||
if vars:
|
video_id = embed_vars.get('entry_id') or video_id
|
||||||
entry_id = vars.get('entry_id')
|
|
||||||
partner_id = vars.get('partner_id')
|
|
||||||
title = vars.get('vid_label')
|
|
||||||
|
|
||||||
if not entry_id:
|
video = self._download_object_data(host, video_id, 'video')
|
||||||
entry_id = self._search_regex(
|
return self._parse_video_data(video)
|
||||||
r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id')
|
|
||||||
|
|
||||||
player = self._download_webpage(
|
|
||||||
'https://api.%s/embeddedVideoPlayer' % host, video_id,
|
|
||||||
query={'id': entry_id})
|
|
||||||
|
|
||||||
kaltura_id = self._search_regex(
|
|
||||||
r'entry_id["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', player,
|
|
||||||
'kaltura id', group='id')
|
|
||||||
|
|
||||||
if not partner_id:
|
|
||||||
partner_id = self._search_regex(
|
|
||||||
r'/p(?:artner_id)?/(\d+)', player, 'partner id',
|
|
||||||
default='513551')
|
|
||||||
|
|
||||||
description = self._html_search_regex(
|
|
||||||
r'(?s)<div[^>]+\bclass=["\']description["\'][^>]*>(.+?)</div>',
|
|
||||||
webpage, 'description', fatal=False)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
|
|
||||||
'ie_key': KalturaIE.ie_key(),
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class AsianCrushPlaylistIE(InfoExtractor):
|
class AsianCrushPlaylistIE(AsianCrushBaseIE):
|
||||||
_VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushIE._VALID_URL_BASE
|
_VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
|
'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '12481',
|
'id': '6447',
|
||||||
'title': 'Scholar Who Walks the Night',
|
'title': 'Fruity Samurai',
|
||||||
'description': 'md5:7addd7c5132a09fd4741152d96cce886',
|
'description': 'md5:7535174487e4a202d3872a7fc8f2f154',
|
||||||
},
|
},
|
||||||
'playlist_count': 20,
|
'playlist_count': 13,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
|
'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -111,11 +143,27 @@ class AsianCrushPlaylistIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
|
'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.retrocrush.tv/series/012355s/true-tears',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
_PAGE_SIZE = 1000000000
|
||||||
|
|
||||||
|
def _fetch_page(self, domain, parent_id, page):
|
||||||
|
videos = self._call_api(
|
||||||
|
domain, 'getreferencedobjects', parent_id, {
|
||||||
|
'max': self._PAGE_SIZE,
|
||||||
|
'object_type': 'video',
|
||||||
|
'parent_id': parent_id,
|
||||||
|
'start': page * self._PAGE_SIZE,
|
||||||
|
}, 'page %d' % (page + 1))
|
||||||
|
for video in videos:
|
||||||
|
yield self._parse_video_data(video)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
host, playlist_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
|
if host == 'cocoro.tv':
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
@ -141,5 +189,12 @@ class AsianCrushPlaylistIE(InfoExtractor):
|
||||||
description = self._og_search_description(
|
description = self._og_search_description(
|
||||||
webpage, default=None) or self._html_search_meta(
|
webpage, default=None) or self._html_search_meta(
|
||||||
'twitter:description', webpage, 'description', fatal=False)
|
'twitter:description', webpage, 'description', fatal=False)
|
||||||
|
else:
|
||||||
|
show = self._download_object_data(host, playlist_id, 'show')
|
||||||
|
title = show.get('name')
|
||||||
|
description = self._get_object_description(show)
|
||||||
|
entries = OnDemandPagedList(
|
||||||
|
functools.partial(self._fetch_page, host, playlist_id),
|
||||||
|
self._PAGE_SIZE)
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id, title, description)
|
return self.playlist_result(entries, playlist_id, title, description)
|
||||||
|
|
|
@ -48,6 +48,7 @@ class AWAANBaseIE(InfoExtractor):
|
||||||
'duration': int_or_none(video_data.get('duration')),
|
'duration': int_or_none(video_data.get('duration')),
|
||||||
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
|
'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
|
'uploader_id': video_data.get('user_id'),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -107,6 +108,7 @@ class AWAANLiveIE(AWAANBaseIE):
|
||||||
'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
'upload_date': '20150107',
|
'upload_date': '20150107',
|
||||||
'timestamp': 1420588800,
|
'timestamp': 1420588800,
|
||||||
|
'uploader_id': '71',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
|
|
|
@ -47,7 +47,7 @@ class AZMedienIE(InfoExtractor):
|
||||||
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
}]
|
}]
|
||||||
_API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/cb9f2f81ed22e9b47f4ca64ea3cc5a5d13e88d1d'
|
_API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be'
|
||||||
_PARTNER_ID = '1719221'
|
_PARTNER_ID = '1719221'
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import random
|
import random
|
||||||
|
@ -5,10 +6,7 @@ import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import compat_str
|
||||||
compat_str,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
@ -17,30 +15,32 @@ from ..utils import (
|
||||||
parse_filesize,
|
parse_filesize,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
|
||||||
update_url_query,
|
update_url_query,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class BandcampIE(InfoExtractor):
|
class BandcampIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
|
_VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://haruhi-dl.bandcamp.com/track/haruhi-dl-test-song',
|
'url': 'http://haruhi-dl.bandcamp.com/track/haruhi-dl-test-song',
|
||||||
'md5': 'c557841d5e50261777a6585648adf439',
|
'md5': 'c557841d5e50261777a6585648adf439',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1812978515',
|
'id': '1812978515',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': "haruhi-dl \"'/\\\u00e4\u21ad - haruhi-dl test song \"'/\\\u00e4\u21ad",
|
'title': "haruhi-dl \"'/\\ä↭ - haruhi-dl \"'/\\ä↭ - haruhi-dl test song \"'/\\ä↭",
|
||||||
'duration': 9.8485,
|
'duration': 9.8485,
|
||||||
|
'uploader': 'haruhi-dl "\'/\\ä↭',
|
||||||
|
'upload_date': '20121129',
|
||||||
|
'timestamp': 1354224127,
|
||||||
},
|
},
|
||||||
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
'_skip': 'There is a limit of 200 free downloads / month for the test song'
|
||||||
}, {
|
}, {
|
||||||
# free download
|
# free download
|
||||||
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
|
||||||
'md5': '853e35bf34aa1d6fe2615ae612564b36',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2650410135',
|
'id': '2650410135',
|
||||||
'ext': 'aiff',
|
'ext': 'aiff',
|
||||||
|
@ -79,11 +79,16 @@ class BandcampIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True):
|
||||||
|
return self._parse_json(self._html_search_regex(
|
||||||
|
r'data-%s=(["\'])({.+?})\1' % attr, webpage,
|
||||||
|
attr + ' data', group=2), video_id, fatal=fatal)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
title = self._match_id(url)
|
||||||
title = mobj.group('title')
|
|
||||||
webpage = self._download_webpage(url, title)
|
webpage = self._download_webpage(url, title)
|
||||||
thumbnail = self._html_search_meta('og:image', webpage, default=None)
|
tralbum = self._extract_data_attr(webpage, title)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
|
||||||
track_id = None
|
track_id = None
|
||||||
track = None
|
track = None
|
||||||
|
@ -91,10 +96,7 @@ class BandcampIE(InfoExtractor):
|
||||||
duration = None
|
duration = None
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
track_info = self._parse_json(
|
track_info = try_get(tralbum, lambda x: x['trackinfo'][0], dict)
|
||||||
self._search_regex(
|
|
||||||
r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n',
|
|
||||||
webpage, 'track info', default='{}'), title)
|
|
||||||
if track_info:
|
if track_info:
|
||||||
file_ = track_info.get('file')
|
file_ = track_info.get('file')
|
||||||
if isinstance(file_, dict):
|
if isinstance(file_, dict):
|
||||||
|
@ -111,37 +113,25 @@ class BandcampIE(InfoExtractor):
|
||||||
'abr': int_or_none(abr_str),
|
'abr': int_or_none(abr_str),
|
||||||
})
|
})
|
||||||
track = track_info.get('title')
|
track = track_info.get('title')
|
||||||
track_id = str_or_none(track_info.get('track_id') or track_info.get('id'))
|
track_id = str_or_none(
|
||||||
|
track_info.get('track_id') or track_info.get('id'))
|
||||||
track_number = int_or_none(track_info.get('track_num'))
|
track_number = int_or_none(track_info.get('track_num'))
|
||||||
duration = float_or_none(track_info.get('duration'))
|
duration = float_or_none(track_info.get('duration'))
|
||||||
|
|
||||||
def extract(key):
|
embed = self._extract_data_attr(webpage, title, 'embed', False)
|
||||||
return self._search_regex(
|
current = tralbum.get('current') or {}
|
||||||
r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key,
|
artist = embed.get('artist') or current.get('artist') or tralbum.get('artist')
|
||||||
webpage, key, default=None, group='value')
|
|
||||||
|
|
||||||
artist = extract('artist')
|
|
||||||
album = extract('album_title')
|
|
||||||
timestamp = unified_timestamp(
|
timestamp = unified_timestamp(
|
||||||
extract('publish_date') or extract('album_publish_date'))
|
current.get('publish_date') or tralbum.get('album_publish_date'))
|
||||||
release_date = unified_strdate(extract('album_release_date'))
|
|
||||||
|
|
||||||
download_link = self._search_regex(
|
download_link = tralbum.get('freeDownloadPage')
|
||||||
r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
|
|
||||||
'download link', default=None, group='url')
|
|
||||||
if download_link:
|
if download_link:
|
||||||
track_id = self._search_regex(
|
track_id = compat_str(tralbum['id'])
|
||||||
r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
|
|
||||||
webpage, 'track id')
|
|
||||||
|
|
||||||
download_webpage = self._download_webpage(
|
download_webpage = self._download_webpage(
|
||||||
download_link, track_id, 'Downloading free downloads page')
|
download_link, track_id, 'Downloading free downloads page')
|
||||||
|
|
||||||
blob = self._parse_json(
|
blob = self._extract_data_attr(download_webpage, track_id, 'blob')
|
||||||
self._search_regex(
|
|
||||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
|
|
||||||
'blob', group='blob'),
|
|
||||||
track_id, transform_source=unescapeHTML)
|
|
||||||
|
|
||||||
info = try_get(
|
info = try_get(
|
||||||
blob, (lambda x: x['digital_items'][0],
|
blob, (lambda x: x['digital_items'][0],
|
||||||
|
@ -207,20 +197,20 @@ class BandcampIE(InfoExtractor):
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': artist,
|
'uploader': artist,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'release_date': release_date,
|
'release_date': unified_strdate(tralbum.get('album_release_date')),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'track': track,
|
'track': track,
|
||||||
'track_number': track_number,
|
'track_number': track_number,
|
||||||
'track_id': track_id,
|
'track_id': track_id,
|
||||||
'artist': artist,
|
'artist': artist,
|
||||||
'album': album,
|
'album': embed.get('album_title'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class BandcampAlbumIE(InfoExtractor):
|
class BandcampAlbumIE(BandcampIE):
|
||||||
IE_NAME = 'Bandcamp:album'
|
IE_NAME = 'Bandcamp:album'
|
||||||
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
|
_VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<id>[^/?#&]+))?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
|
||||||
|
@ -230,7 +220,10 @@ class BandcampAlbumIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1353101989',
|
'id': '1353101989',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Intro',
|
'title': 'Blazo - Intro',
|
||||||
|
'timestamp': 1311756226,
|
||||||
|
'upload_date': '20110727',
|
||||||
|
'uploader': 'Blazo',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -238,7 +231,10 @@ class BandcampAlbumIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '38097443',
|
'id': '38097443',
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Kero One - Keep It Alive (Blazo remix)',
|
'title': 'Blazo - Kero One - Keep It Alive (Blazo remix)',
|
||||||
|
'timestamp': 1311757238,
|
||||||
|
'upload_date': '20110727',
|
||||||
|
'uploader': 'Blazo',
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
@ -274,6 +270,7 @@ class BandcampAlbumIE(InfoExtractor):
|
||||||
'title': '"Entropy" EP',
|
'title': '"Entropy" EP',
|
||||||
'uploader_id': 'jstrecords',
|
'uploader_id': 'jstrecords',
|
||||||
'id': 'entropy-ep',
|
'id': 'entropy-ep',
|
||||||
|
'description': 'md5:0ff22959c943622972596062f2f366a5',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 3,
|
'playlist_mincount': 3,
|
||||||
}, {
|
}, {
|
||||||
|
@ -283,6 +280,7 @@ class BandcampAlbumIE(InfoExtractor):
|
||||||
'id': 'we-are-the-plague',
|
'id': 'we-are-the-plague',
|
||||||
'title': 'WE ARE THE PLAGUE',
|
'title': 'WE ARE THE PLAGUE',
|
||||||
'uploader_id': 'insulters',
|
'uploader_id': 'insulters',
|
||||||
|
'description': 'md5:b3cf845ee41b2b1141dc7bde9237255f',
|
||||||
},
|
},
|
||||||
'playlist_count': 2,
|
'playlist_count': 2,
|
||||||
}]
|
}]
|
||||||
|
@ -294,41 +292,34 @@ class BandcampAlbumIE(InfoExtractor):
|
||||||
else super(BandcampAlbumIE, cls).suitable(url))
|
else super(BandcampAlbumIE, cls).suitable(url))
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
uploader_id, album_id = re.match(self._VALID_URL, url).groups()
|
||||||
uploader_id = mobj.group('subdomain')
|
|
||||||
album_id = mobj.group('album_id')
|
|
||||||
playlist_id = album_id or uploader_id
|
playlist_id = album_id or uploader_id
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
track_elements = re.findall(
|
tralbum = self._extract_data_attr(webpage, playlist_id)
|
||||||
r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
|
track_info = tralbum.get('trackinfo')
|
||||||
if not track_elements:
|
if not track_info:
|
||||||
raise ExtractorError('The page doesn\'t contain any tracks')
|
raise ExtractorError('The page doesn\'t contain any tracks')
|
||||||
# Only tracks with duration info have songs
|
# Only tracks with duration info have songs
|
||||||
entries = [
|
entries = [
|
||||||
self.url_result(
|
self.url_result(
|
||||||
compat_urlparse.urljoin(url, t_path),
|
urljoin(url, t['title_link']), BandcampIE.ie_key(),
|
||||||
ie=BandcampIE.ie_key(),
|
str_or_none(t.get('track_id') or t.get('id')), t.get('title'))
|
||||||
video_title=self._search_regex(
|
for t in track_info
|
||||||
r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
|
if t.get('duration')]
|
||||||
elem_content, 'track title', fatal=False))
|
|
||||||
for elem_content, t_path in track_elements
|
current = tralbum.get('current') or {}
|
||||||
if self._html_search_meta('duration', elem_content, default=None)]
|
|
||||||
|
|
||||||
title = self._html_search_regex(
|
|
||||||
r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
|
|
||||||
webpage, 'title', fatal=False)
|
|
||||||
if title:
|
|
||||||
title = title.replace(r'\"', '"')
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
'id': playlist_id,
|
'id': playlist_id,
|
||||||
'title': title,
|
'title': current.get('title'),
|
||||||
|
'description': current.get('about'),
|
||||||
'entries': entries,
|
'entries': entries,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class BandcampWeeklyIE(InfoExtractor):
|
class BandcampWeeklyIE(BandcampIE):
|
||||||
IE_NAME = 'Bandcamp:weekly'
|
IE_NAME = 'Bandcamp:weekly'
|
||||||
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -343,29 +334,23 @@ class BandcampWeeklyIE(InfoExtractor):
|
||||||
'release_date': '20170404',
|
'release_date': '20170404',
|
||||||
'series': 'Bandcamp Weekly',
|
'series': 'Bandcamp Weekly',
|
||||||
'episode': 'Magic Moments',
|
'episode': 'Magic Moments',
|
||||||
'episode_number': 208,
|
|
||||||
'episode_id': '224',
|
'episode_id': '224',
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'opus-lo',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
'url': 'https://bandcamp.com/?blah/blah@&show=228',
|
||||||
'only_matching': True
|
'only_matching': True
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
show_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, show_id)
|
||||||
|
|
||||||
blob = self._parse_json(
|
blob = self._extract_data_attr(webpage, show_id, 'blob')
|
||||||
self._search_regex(
|
|
||||||
r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
|
|
||||||
'blob', group='blob'),
|
|
||||||
video_id, transform_source=unescapeHTML)
|
|
||||||
|
|
||||||
show = blob['bcw_show']
|
show = blob['bcw_data'][show_id]
|
||||||
|
|
||||||
# This is desired because any invalid show id redirects to `bandcamp.com`
|
|
||||||
# which happens to expose the latest Bandcamp Weekly episode.
|
|
||||||
show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for format_id, format_url in show['audio_stream'].items():
|
for format_id, format_url in show['audio_stream'].items():
|
||||||
|
@ -390,20 +375,8 @@ class BandcampWeeklyIE(InfoExtractor):
|
||||||
if subtitle:
|
if subtitle:
|
||||||
title += ' - %s' % subtitle
|
title += ' - %s' % subtitle
|
||||||
|
|
||||||
episode_number = None
|
|
||||||
seq = blob.get('bcw_seq')
|
|
||||||
|
|
||||||
if seq and isinstance(seq, list):
|
|
||||||
try:
|
|
||||||
episode_number = next(
|
|
||||||
int_or_none(e.get('episode_number'))
|
|
||||||
for e in seq
|
|
||||||
if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
|
|
||||||
except StopIteration:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': show_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': show.get('desc') or show.get('short_desc'),
|
'description': show.get('desc') or show.get('short_desc'),
|
||||||
'duration': float_or_none(show.get('audio_duration')),
|
'duration': float_or_none(show.get('audio_duration')),
|
||||||
|
@ -411,7 +384,6 @@ class BandcampWeeklyIE(InfoExtractor):
|
||||||
'release_date': unified_strdate(show.get('published_date')),
|
'release_date': unified_strdate(show.get('published_date')),
|
||||||
'series': 'Bandcamp Weekly',
|
'series': 'Bandcamp Weekly',
|
||||||
'episode': show.get('subtitle'),
|
'episode': show.get('subtitle'),
|
||||||
'episode_number': episode_number,
|
'episode_id': show_id,
|
||||||
'episode_id': compat_str(video_id),
|
|
||||||
'formats': formats
|
'formats': formats
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,22 +49,17 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
_LOGIN_URL = 'https://account.bbc.com/signin'
|
_LOGIN_URL = 'https://account.bbc.com/signin'
|
||||||
_NETRC_MACHINE = 'bbc'
|
_NETRC_MACHINE = 'bbc'
|
||||||
|
|
||||||
_MEDIASELECTOR_URLS = [
|
_MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
|
||||||
|
_MEDIA_SETS = [
|
||||||
# Provides HQ HLS streams with even better quality that pc mediaset but fails
|
# Provides HQ HLS streams with even better quality that pc mediaset but fails
|
||||||
# with geolocation in some cases when it's even not geo restricted at all (e.g.
|
# with geolocation in some cases when it's even not geo restricted at all (e.g.
|
||||||
# http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
|
# http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
|
||||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
|
'iptv-all',
|
||||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
|
'pc',
|
||||||
]
|
]
|
||||||
|
|
||||||
_MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
|
|
||||||
_EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
|
_EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
|
||||||
|
|
||||||
_NAMESPACES = (
|
|
||||||
_MEDIASELECTION_NS,
|
|
||||||
_EMP_PLAYLIST_NS,
|
|
||||||
)
|
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
|
||||||
|
@ -261,8 +256,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
|
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
username, password = self._get_login_info()
|
username, password = self._get_login_info()
|
||||||
if username is None:
|
if username is None:
|
||||||
|
@ -307,22 +300,14 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
def _extract_items(self, playlist):
|
def _extract_items(self, playlist):
|
||||||
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
|
||||||
|
|
||||||
def _findall_ns(self, element, xpath):
|
|
||||||
elements = []
|
|
||||||
for ns in self._NAMESPACES:
|
|
||||||
elements.extend(element.findall(xpath % ns))
|
|
||||||
return elements
|
|
||||||
|
|
||||||
def _extract_medias(self, media_selection):
|
def _extract_medias(self, media_selection):
|
||||||
error = media_selection.find('./{%s}error' % self._MEDIASELECTION_NS)
|
error = media_selection.get('result')
|
||||||
if error is None:
|
if error:
|
||||||
media_selection.find('./{%s}error' % self._EMP_PLAYLIST_NS)
|
raise BBCCoUkIE.MediaSelectionError(error)
|
||||||
if error is not None:
|
return media_selection.get('media') or []
|
||||||
raise BBCCoUkIE.MediaSelectionError(error.get('id'))
|
|
||||||
return self._findall_ns(media_selection, './{%s}media')
|
|
||||||
|
|
||||||
def _extract_connections(self, media):
|
def _extract_connections(self, media):
|
||||||
return self._findall_ns(media, './{%s}connection')
|
return media.get('connection') or []
|
||||||
|
|
||||||
def _get_subtitles(self, media, programme_id):
|
def _get_subtitles(self, media, programme_id):
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
|
@ -334,13 +319,13 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
cc_url, programme_id, 'Downloading captions', fatal=False)
|
cc_url, programme_id, 'Downloading captions', fatal=False)
|
||||||
if not isinstance(captions, compat_etree_Element):
|
if not isinstance(captions, compat_etree_Element):
|
||||||
continue
|
continue
|
||||||
lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
|
subtitles['en'] = [
|
||||||
subtitles[lang] = [
|
|
||||||
{
|
{
|
||||||
'url': connection.get('href'),
|
'url': connection.get('href'),
|
||||||
'ext': 'ttml',
|
'ext': 'ttml',
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
break
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|
||||||
def _raise_extractor_error(self, media_selection_error):
|
def _raise_extractor_error(self, media_selection_error):
|
||||||
|
@ -350,10 +335,10 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
|
|
||||||
def _download_media_selector(self, programme_id):
|
def _download_media_selector(self, programme_id):
|
||||||
last_exception = None
|
last_exception = None
|
||||||
for mediaselector_url in self._MEDIASELECTOR_URLS:
|
for media_set in self._MEDIA_SETS:
|
||||||
try:
|
try:
|
||||||
return self._download_media_selector_url(
|
return self._download_media_selector_url(
|
||||||
mediaselector_url % programme_id, programme_id)
|
self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
|
||||||
except BBCCoUkIE.MediaSelectionError as e:
|
except BBCCoUkIE.MediaSelectionError as e:
|
||||||
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
|
if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
|
||||||
last_exception = e
|
last_exception = e
|
||||||
|
@ -362,8 +347,8 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
self._raise_extractor_error(last_exception)
|
self._raise_extractor_error(last_exception)
|
||||||
|
|
||||||
def _download_media_selector_url(self, url, programme_id=None):
|
def _download_media_selector_url(self, url, programme_id=None):
|
||||||
media_selection = self._download_xml(
|
media_selection = self._download_json(
|
||||||
url, programme_id, 'Downloading media selection XML',
|
url, programme_id, 'Downloading media selection JSON',
|
||||||
expected_status=(403, 404))
|
expected_status=(403, 404))
|
||||||
return self._process_media_selector(media_selection, programme_id)
|
return self._process_media_selector(media_selection, programme_id)
|
||||||
|
|
||||||
|
@ -377,7 +362,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
if kind in ('video', 'audio'):
|
if kind in ('video', 'audio'):
|
||||||
bitrate = int_or_none(media.get('bitrate'))
|
bitrate = int_or_none(media.get('bitrate'))
|
||||||
encoding = media.get('encoding')
|
encoding = media.get('encoding')
|
||||||
service = media.get('service')
|
|
||||||
width = int_or_none(media.get('width'))
|
width = int_or_none(media.get('width'))
|
||||||
height = int_or_none(media.get('height'))
|
height = int_or_none(media.get('height'))
|
||||||
file_size = int_or_none(media.get('media_file_size'))
|
file_size = int_or_none(media.get('media_file_size'))
|
||||||
|
@ -392,8 +376,6 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
supplier = connection.get('supplier')
|
supplier = connection.get('supplier')
|
||||||
transfer_format = connection.get('transferFormat')
|
transfer_format = connection.get('transferFormat')
|
||||||
format_id = supplier or conn_kind or protocol
|
format_id = supplier or conn_kind or protocol
|
||||||
if service:
|
|
||||||
format_id = '%s_%s' % (service, format_id)
|
|
||||||
# ASX playlist
|
# ASX playlist
|
||||||
if supplier == 'asx':
|
if supplier == 'asx':
|
||||||
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
|
||||||
|
@ -408,20 +390,11 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
href, programme_id, ext='mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id=format_id, fatal=False))
|
m3u8_id=format_id, fatal=False))
|
||||||
if re.search(self._USP_RE, href):
|
|
||||||
usp_formats = self._extract_m3u8_formats(
|
|
||||||
re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
|
|
||||||
programme_id, ext='mp4', entry_protocol='m3u8_native',
|
|
||||||
m3u8_id=format_id, fatal=False)
|
|
||||||
for f in usp_formats:
|
|
||||||
if f.get('height') and f['height'] > 720:
|
|
||||||
continue
|
|
||||||
formats.append(f)
|
|
||||||
elif transfer_format == 'hds':
|
elif transfer_format == 'hds':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
href, programme_id, f4m_id=format_id, fatal=False))
|
href, programme_id, f4m_id=format_id, fatal=False))
|
||||||
else:
|
else:
|
||||||
if not service and not supplier and bitrate:
|
if not supplier and bitrate:
|
||||||
format_id += '-%d' % bitrate
|
format_id += '-%d' % bitrate
|
||||||
fmt = {
|
fmt = {
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
|
@ -554,7 +527,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
webpage = self._download_webpage(url, group_id, 'Downloading video page')
|
||||||
|
|
||||||
error = self._search_regex(
|
error = self._search_regex(
|
||||||
r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
|
r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
|
||||||
webpage, 'error', default=None)
|
webpage, 'error', default=None)
|
||||||
if error:
|
if error:
|
||||||
raise ExtractorError(error, expected=True)
|
raise ExtractorError(error, expected=True)
|
||||||
|
@ -607,16 +580,9 @@ class BBCIE(BBCCoUkIE):
|
||||||
IE_DESC = 'BBC'
|
IE_DESC = 'BBC'
|
||||||
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
|
_VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
|
||||||
|
|
||||||
_MEDIASELECTOR_URLS = [
|
_MEDIA_SETS = [
|
||||||
# Provides HQ HLS streams but fails with geolocation in some cases when it's
|
'mobile-tablet-main',
|
||||||
# even not geo restricted at all
|
'pc',
|
||||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
|
|
||||||
# Provides more formats, namely direct mp4 links, but fails on some videos with
|
|
||||||
# notukerror for non UK (?) users (e.g.
|
|
||||||
# http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
|
|
||||||
'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s',
|
|
||||||
# Provides fewer formats, but works everywhere for everybody (hopefully)
|
|
||||||
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s',
|
|
||||||
]
|
]
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -981,7 +947,7 @@ class BBCIE(BBCCoUkIE):
|
||||||
group_id = self._search_regex(
|
group_id = self._search_regex(
|
||||||
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
|
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
|
||||||
webpage, 'group id', default=None)
|
webpage, 'group id', default=None)
|
||||||
if playlist_id:
|
if group_id:
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
'https://www.bbc.co.uk/programmes/%s' % group_id,
|
||||||
ie=BBCCoUkIE.ie_key())
|
ie=BBCCoUkIE.ie_key())
|
||||||
|
@ -1092,10 +1058,26 @@ class BBCIE(BBCCoUkIE):
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
|
||||||
'bbcthree config', default='{}'),
|
'bbcthree config', default='{}'),
|
||||||
playlist_id, transform_source=js_to_json, fatal=False)
|
playlist_id, transform_source=js_to_json, fatal=False) or {}
|
||||||
if bbc3_config:
|
payload = bbc3_config.get('payload') or {}
|
||||||
|
if payload:
|
||||||
|
clip = payload.get('currentClip') or {}
|
||||||
|
clip_vpid = clip.get('vpid')
|
||||||
|
clip_title = clip.get('title')
|
||||||
|
if clip_vpid and clip_title:
|
||||||
|
formats, subtitles = self._download_media_selector(clip_vpid)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return {
|
||||||
|
'id': clip_vpid,
|
||||||
|
'title': clip_title,
|
||||||
|
'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
|
||||||
|
'description': clip.get('description'),
|
||||||
|
'duration': parse_duration(clip.get('duration')),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
}
|
||||||
bbc3_playlist = try_get(
|
bbc3_playlist = try_get(
|
||||||
bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'],
|
payload, lambda x: x['content']['bbcMedia']['playlist'],
|
||||||
dict)
|
dict)
|
||||||
if bbc3_playlist:
|
if bbc3_playlist:
|
||||||
playlist_title = bbc3_playlist.get('title') or playlist_title
|
playlist_title = bbc3_playlist.get('title') or playlist_title
|
||||||
|
@ -1118,6 +1100,39 @@ class BBCIE(BBCCoUkIE):
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, playlist_id, playlist_title, playlist_description)
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
|
initial_data = self._parse_json(self._search_regex(
|
||||||
|
r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
|
||||||
|
'preload state', default='{}'), playlist_id, fatal=False)
|
||||||
|
if initial_data:
|
||||||
|
def parse_media(media):
|
||||||
|
if not media:
|
||||||
|
return
|
||||||
|
for item in (try_get(media, lambda x: x['media']['items'], list) or []):
|
||||||
|
item_id = item.get('id')
|
||||||
|
item_title = item.get('title')
|
||||||
|
if not (item_id and item_title):
|
||||||
|
continue
|
||||||
|
formats, subtitles = self._download_media_selector(item_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
entries.append({
|
||||||
|
'id': item_id,
|
||||||
|
'title': item_title,
|
||||||
|
'thumbnail': item.get('holdingImageUrl'),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
})
|
||||||
|
for resp in (initial_data.get('data') or {}).values():
|
||||||
|
name = resp.get('name')
|
||||||
|
if name == 'media-experience':
|
||||||
|
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
|
||||||
|
elif name == 'article':
|
||||||
|
for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
|
||||||
|
if block.get('type') != 'media':
|
||||||
|
continue
|
||||||
|
parse_media(block.get('model'))
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id, playlist_title, playlist_description)
|
||||||
|
|
||||||
def extract_all(pattern):
|
def extract_all(pattern):
|
||||||
return list(filter(None, map(
|
return list(filter(None, map(
|
||||||
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
lambda s: self._parse_json(s, playlist_id, fatal=False),
|
||||||
|
|
|
@ -1,194 +0,0 @@
|
||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
clean_html,
|
|
||||||
compat_str,
|
|
||||||
float_or_none,
|
|
||||||
int_or_none,
|
|
||||||
parse_iso8601,
|
|
||||||
try_get,
|
|
||||||
urljoin,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BeamProBaseIE(InfoExtractor):
|
|
||||||
_API_BASE = 'https://mixer.com/api/v1'
|
|
||||||
_RATINGS = {'family': 0, 'teen': 13, '18+': 18}
|
|
||||||
|
|
||||||
def _extract_channel_info(self, chan):
|
|
||||||
user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
|
|
||||||
return {
|
|
||||||
'uploader': chan.get('token') or try_get(
|
|
||||||
chan, lambda x: x['user']['username'], compat_str),
|
|
||||||
'uploader_id': compat_str(user_id) if user_id else None,
|
|
||||||
'age_limit': self._RATINGS.get(chan.get('audience')),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class BeamProLiveIE(BeamProBaseIE):
|
|
||||||
IE_NAME = 'Mixer:live'
|
|
||||||
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://mixer.com/niterhayven',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '261562',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Introducing The Witcher 3 // The Grind Starts Now!',
|
|
||||||
'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
|
|
||||||
'thumbnail': r're:https://.*\.jpg$',
|
|
||||||
'timestamp': 1483477281,
|
|
||||||
'upload_date': '20170103',
|
|
||||||
'uploader': 'niterhayven',
|
|
||||||
'uploader_id': '373396',
|
|
||||||
'age_limit': 18,
|
|
||||||
'is_live': True,
|
|
||||||
'view_count': int,
|
|
||||||
},
|
|
||||||
'skip': 'niterhayven is offline',
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
_MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def suitable(cls, url):
|
|
||||||
return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
channel_name = self._match_id(url)
|
|
||||||
|
|
||||||
chan = self._download_json(
|
|
||||||
'%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
|
|
||||||
|
|
||||||
if chan.get('online') is False:
|
|
||||||
raise ExtractorError(
|
|
||||||
'{0} is offline'.format(channel_name), expected=True)
|
|
||||||
|
|
||||||
channel_id = chan['id']
|
|
||||||
|
|
||||||
def manifest_url(kind):
|
|
||||||
return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
|
|
||||||
fatal=False)
|
|
||||||
formats.extend(self._extract_smil_formats(
|
|
||||||
manifest_url('smil'), channel_name, fatal=False))
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': compat_str(chan.get('id') or channel_name),
|
|
||||||
'title': self._live_title(chan.get('name') or channel_name),
|
|
||||||
'description': clean_html(chan.get('description')),
|
|
||||||
'thumbnail': try_get(
|
|
||||||
chan, lambda x: x['thumbnail']['url'], compat_str),
|
|
||||||
'timestamp': parse_iso8601(chan.get('updatedAt')),
|
|
||||||
'is_live': True,
|
|
||||||
'view_count': int_or_none(chan.get('viewersTotal')),
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
info.update(self._extract_channel_info(chan))
|
|
||||||
|
|
||||||
return info
|
|
||||||
|
|
||||||
|
|
||||||
class BeamProVodIE(BeamProBaseIE):
|
|
||||||
IE_NAME = 'Mixer:vod'
|
|
||||||
_VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>[^?#&]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://mixer.com/willow8714?vod=2259830',
|
|
||||||
'md5': 'b2431e6e8347dc92ebafb565d368b76b',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2259830',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'willow8714\'s Channel',
|
|
||||||
'duration': 6828.15,
|
|
||||||
'thumbnail': r're:https://.*source\.png$',
|
|
||||||
'timestamp': 1494046474,
|
|
||||||
'upload_date': '20170506',
|
|
||||||
'uploader': 'willow8714',
|
|
||||||
'uploader_id': '6085379',
|
|
||||||
'age_limit': 13,
|
|
||||||
'view_count': int,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://mixer.com/streamer?vod=Rh3LY0VAqkGpEQUe2pN-ig',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _extract_format(vod, vod_type):
|
|
||||||
if not vod.get('baseUrl'):
|
|
||||||
return []
|
|
||||||
|
|
||||||
if vod_type == 'hls':
|
|
||||||
filename, protocol = 'manifest.m3u8', 'm3u8_native'
|
|
||||||
elif vod_type == 'raw':
|
|
||||||
filename, protocol = 'source.mp4', 'https'
|
|
||||||
else:
|
|
||||||
assert False
|
|
||||||
|
|
||||||
data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
|
|
||||||
|
|
||||||
format_id = [vod_type]
|
|
||||||
if isinstance(data.get('Height'), compat_str):
|
|
||||||
format_id.append('%sp' % data['Height'])
|
|
||||||
|
|
||||||
return [{
|
|
||||||
'url': urljoin(vod['baseUrl'], filename),
|
|
||||||
'format_id': '-'.join(format_id),
|
|
||||||
'ext': 'mp4',
|
|
||||||
'protocol': protocol,
|
|
||||||
'width': int_or_none(data.get('Width')),
|
|
||||||
'height': int_or_none(data.get('Height')),
|
|
||||||
'fps': int_or_none(data.get('Fps')),
|
|
||||||
'tbr': int_or_none(data.get('Bitrate'), 1000),
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
vod_id = self._match_id(url)
|
|
||||||
|
|
||||||
vod_info = self._download_json(
|
|
||||||
'%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
|
|
||||||
|
|
||||||
state = vod_info.get('state')
|
|
||||||
if state != 'AVAILABLE':
|
|
||||||
raise ExtractorError(
|
|
||||||
'VOD %s is not available (state: %s)' % (vod_id, state),
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
thumbnail_url = None
|
|
||||||
|
|
||||||
for vod in vod_info['vods']:
|
|
||||||
vod_type = vod.get('format')
|
|
||||||
if vod_type in ('hls', 'raw'):
|
|
||||||
formats.extend(self._extract_format(vod, vod_type))
|
|
||||||
elif vod_type == 'thumbnail':
|
|
||||||
thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
info = {
|
|
||||||
'id': vod_id,
|
|
||||||
'title': vod_info.get('name') or vod_id,
|
|
||||||
'duration': float_or_none(vod_info.get('duration')),
|
|
||||||
'thumbnail': thumbnail_url,
|
|
||||||
'timestamp': parse_iso8601(vod_info.get('createdAt')),
|
|
||||||
'view_count': int_or_none(vod_info.get('viewsTotal')),
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
info.update(self._extract_channel_info(vod_info.get('channel') or {}))
|
|
||||||
|
|
||||||
return info
|
|
103
haruhi_dl/extractor/bfmtv.py
Normal file
103
haruhi_dl/extractor/bfmtv.py
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import extract_attributes
|
||||||
|
|
||||||
|
|
||||||
|
class BFMTVBaseIE(InfoExtractor):
|
||||||
|
_VALID_URL_BASE = r'https?://(?:www\.)?bfmtv\.com/'
|
||||||
|
_VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
|
||||||
|
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block"[^>]*>)'
|
||||||
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||||
|
|
||||||
|
def _brightcove_url_result(self, video_id, video_block):
|
||||||
|
account_id = video_block.get('accountid') or '876450612001'
|
||||||
|
player_id = video_block.get('playerid') or 'I2qBTln4u'
|
||||||
|
return self.url_result(
|
||||||
|
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
|
||||||
|
'BrightcoveNew', video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class BFMTVIE(BFMTVBaseIE):
|
||||||
|
IE_NAME = 'bfmtv'
|
||||||
|
_VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'V'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.bfmtv.com/politique/emmanuel-macron-l-islam-est-une-religion-qui-vit-une-crise-aujourd-hui-partout-dans-le-monde_VN-202010020146.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6196747868001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Emmanuel Macron: "L\'Islam est une religion qui vit une crise aujourd’hui, partout dans le monde"',
|
||||||
|
'description': 'Le Président s\'exprime sur la question du séparatisme depuis les Mureaux, dans les Yvelines.',
|
||||||
|
'uploader_id': '876450610001',
|
||||||
|
'upload_date': '20201002',
|
||||||
|
'timestamp': 1601629620,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
bfmtv_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, bfmtv_id)
|
||||||
|
video_block = extract_attributes(self._search_regex(
|
||||||
|
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
|
||||||
|
return self._brightcove_url_result(video_block['videoid'], video_block)
|
||||||
|
|
||||||
|
|
||||||
|
class BFMTVLiveIE(BFMTVIE):
|
||||||
|
IE_NAME = 'bfmtv:live'
|
||||||
|
_VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.bfmtv.com/en-direct/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5615950982001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
|
||||||
|
'uploader_id': '876450610001',
|
||||||
|
'upload_date': '20171018',
|
||||||
|
'timestamp': 1508329950,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bfmtv.com/economie/en-direct/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class BFMTVArticleIE(BFMTVBaseIE):
|
||||||
|
IE_NAME = 'bfmtv:article'
|
||||||
|
_VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'A'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.bfmtv.com/sante/covid-19-un-responsable-de-l-institut-pasteur-se-demande-quand-la-france-va-se-reconfiner_AV-202101060198.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '202101060198',
|
||||||
|
'title': 'Covid-19: un responsable de l\'Institut Pasteur se demande "quand la France va se reconfiner"',
|
||||||
|
'description': 'md5:947974089c303d3ac6196670ae262843',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bfmtv.com/international/pour-bolsonaro-le-bresil-est-en-faillite-mais-il-ne-peut-rien-faire_AD-202101060232.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bfmtv.com/sante/covid-19-oui-le-vaccin-de-pfizer-distribue-en-france-a-bien-ete-teste-sur-des-personnes-agees_AN-202101060275.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
bfmtv_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, bfmtv_id)
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
|
||||||
|
video_block = extract_attributes(video_block_el)
|
||||||
|
video_id = video_block.get('videoid')
|
||||||
|
if not video_id:
|
||||||
|
continue
|
||||||
|
entries.append(self._brightcove_url_result(video_id, video_block))
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, bfmtv_id, self._og_search_title(webpage, fatal=False),
|
||||||
|
self._html_search_meta(['og:description', 'description'], webpage))
|
30
haruhi_dl/extractor/bibeltv.py
Normal file
30
haruhi_dl/extractor/bibeltv.py
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class BibelTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?:crn/)?(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.bibeltv.de/mediathek/videos/329703-sprachkurs-in-malaiisch',
|
||||||
|
'md5': '252f908192d611de038b8504b08bf97f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ref:329703',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Sprachkurs in Malaiisch',
|
||||||
|
'description': 'md5:3e9f197d29ee164714e67351cf737dfe',
|
||||||
|
'timestamp': 1608316701,
|
||||||
|
'uploader_id': '5840105145001',
|
||||||
|
'upload_date': '20201218',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.bibeltv.de/mediathek/videos/crn/326374',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5840105145001/default_default/index.html?videoId=ref:%s'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
crn_id = self._match_id(url)
|
||||||
|
return self.url_result(
|
||||||
|
self.BRIGHTCOVE_URL_TEMPLATE % crn_id, 'BrightcoveNew')
|
|
@ -90,13 +90,19 @@ class BleacherReportCMSIE(AMPIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
|
_VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
|
'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
|
||||||
'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
|
'md5': '670b2d73f48549da032861130488c681',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
|
'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
|
||||||
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
|
'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
|
||||||
|
'upload_date': '20150723',
|
||||||
|
'timestamp': 1437679032,
|
||||||
|
|
||||||
},
|
},
|
||||||
|
'expected_warnings': [
|
||||||
|
'Unable to download f4m manifest'
|
||||||
|
]
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
60
haruhi_dl/extractor/bongacams.py
Normal file
60
haruhi_dl/extractor/bongacams.py
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BongaCamsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://de.bongacams.com/azumi-8',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://cn.bongacams.com/azumi-8',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
host = mobj.group('host')
|
||||||
|
channel_id = mobj.group('id')
|
||||||
|
|
||||||
|
amf = self._download_json(
|
||||||
|
'https://%s/tools/amf.php' % host, channel_id,
|
||||||
|
data=urlencode_postdata((
|
||||||
|
('method', 'getRoomData'),
|
||||||
|
('args[]', channel_id),
|
||||||
|
('args[]', 'false'),
|
||||||
|
)), headers={'X-Requested-With': 'XMLHttpRequest'})
|
||||||
|
|
||||||
|
server_url = amf['localData']['videoServerUrl']
|
||||||
|
|
||||||
|
uploader_id = try_get(
|
||||||
|
amf, lambda x: x['performerData']['username'], compat_str) or channel_id
|
||||||
|
uploader = try_get(
|
||||||
|
amf, lambda x: x['performerData']['displayName'], compat_str)
|
||||||
|
like_count = int_or_none(try_get(
|
||||||
|
amf, lambda x: x['performerData']['loversCount']))
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
'%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id),
|
||||||
|
channel_id, 'mp4', m3u8_id='hls', live=True)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': channel_id,
|
||||||
|
'title': self._live_title(uploader or uploader_id),
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'like_count': like_count,
|
||||||
|
'age_limit': 18,
|
||||||
|
'is_live': True,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
98
haruhi_dl/extractor/box.py
Normal file
98
haruhi_dl/extractor/box.py
Normal file
|
@ -0,0 +1,98 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
parse_iso8601,
|
||||||
|
# try_get,
|
||||||
|
update_url_query,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class BoxIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/]+)/file/(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
|
||||||
|
'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '510727257538',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Garber St. Louis will be 28th MLS team +scarving.mp4',
|
||||||
|
'uploader': 'MLS Video',
|
||||||
|
'timestamp': 1566320259,
|
||||||
|
'upload_date': '20190820',
|
||||||
|
'uploader_id': '235196876',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
shared_name, file_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
webpage = self._download_webpage(url, file_id)
|
||||||
|
request_token = self._parse_json(self._search_regex(
|
||||||
|
r'Box\.config\s*=\s*({.+?});', webpage,
|
||||||
|
'Box config'), file_id)['requestToken']
|
||||||
|
access_token = self._download_json(
|
||||||
|
'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
|
||||||
|
'Downloading token JSON metadata',
|
||||||
|
data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'X-Request-Token': request_token,
|
||||||
|
'X-Box-EndUser-API': 'sharedName=' + shared_name,
|
||||||
|
})[file_id]['read']
|
||||||
|
shared_link = 'https://app.box.com/s/' + shared_name
|
||||||
|
f = self._download_json(
|
||||||
|
'https://api.box.com/2.0/files/' + file_id, file_id,
|
||||||
|
'Downloading file JSON metadata', headers={
|
||||||
|
'Authorization': 'Bearer ' + access_token,
|
||||||
|
'BoxApi': 'shared_link=' + shared_link,
|
||||||
|
'X-Rep-Hints': '[dash]', # TODO: extract `hls` formats
|
||||||
|
}, query={
|
||||||
|
'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size'
|
||||||
|
})
|
||||||
|
title = f['name']
|
||||||
|
|
||||||
|
query = {
|
||||||
|
'access_token': access_token,
|
||||||
|
'shared_link': shared_link
|
||||||
|
}
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
|
||||||
|
# for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []):
|
||||||
|
# entry_url_template = try_get(
|
||||||
|
# entry, lambda x: x['content']['url_template'])
|
||||||
|
# if not entry_url_template:
|
||||||
|
# continue
|
||||||
|
# representation = entry.get('representation')
|
||||||
|
# if representation == 'dash':
|
||||||
|
# TODO: append query to every fragment URL
|
||||||
|
# formats.extend(self._extract_mpd_formats(
|
||||||
|
# entry_url_template.replace('{+asset_path}', 'manifest.mpd'),
|
||||||
|
# file_id, query=query))
|
||||||
|
|
||||||
|
authenticated_download_url = f.get('authenticated_download_url')
|
||||||
|
if authenticated_download_url and f.get('is_download_available'):
|
||||||
|
formats.append({
|
||||||
|
'ext': f.get('extension') or determine_ext(title),
|
||||||
|
'filesize': f.get('size'),
|
||||||
|
'format_id': 'download',
|
||||||
|
'url': update_url_query(authenticated_download_url, query),
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
creator = f.get('created_by') or {}
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': file_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'description': f.get('description') or None,
|
||||||
|
'uploader': creator.get('name'),
|
||||||
|
'timestamp': parse_iso8601(f.get('created_at')),
|
||||||
|
'uploader_id': creator.get('id'),
|
||||||
|
}
|
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class BravoTVIE(AdobePassIE):
|
class BravoTVIE(AdobePassIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
|
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
|
||||||
'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
|
'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
|
||||||
|
@ -28,10 +28,13 @@ class BravoTVIE(AdobePassIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
site, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
settings = self._parse_json(self._search_regex(
|
settings = self._parse_json(self._search_regex(
|
||||||
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
|
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
|
||||||
|
@ -53,11 +56,14 @@ class BravoTVIE(AdobePassIE):
|
||||||
tp_path = release_pid = tve['release_pid']
|
tp_path = release_pid = tve['release_pid']
|
||||||
if tve.get('entitlement') == 'auth':
|
if tve.get('entitlement') == 'auth':
|
||||||
adobe_pass = settings.get('tve_adobe_auth', {})
|
adobe_pass = settings.get('tve_adobe_auth', {})
|
||||||
|
if site == 'bravotv':
|
||||||
|
site = 'bravo'
|
||||||
resource = self._get_mvpd_resource(
|
resource = self._get_mvpd_resource(
|
||||||
adobe_pass.get('adobePassResourceId', 'bravo'),
|
adobe_pass.get('adobePassResourceId') or site,
|
||||||
tve['title'], release_pid, tve.get('rating'))
|
tve['title'], release_pid, tve.get('rating'))
|
||||||
query['auth'] = self._extract_mvpd_auth(
|
query['auth'] = self._extract_mvpd_auth(
|
||||||
url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource)
|
url, release_pid,
|
||||||
|
adobe_pass.get('adobePassRequestorId') or site, resource)
|
||||||
else:
|
else:
|
||||||
shared_playlist = settings['ls_playlist']
|
shared_playlist = settings['ls_playlist']
|
||||||
account_pid = shared_playlist['account_pid']
|
account_pid = shared_playlist['account_pid']
|
||||||
|
|
|
@ -28,6 +28,7 @@ from ..utils import (
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
|
@ -470,13 +471,18 @@ class BrightcoveNewIE(AdobePassIE):
|
||||||
def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
|
def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
|
||||||
title = json_data['name'].strip()
|
title = json_data['name'].strip()
|
||||||
|
|
||||||
|
num_drm_sources = 0
|
||||||
formats = []
|
formats = []
|
||||||
for source in json_data.get('sources', []):
|
sources = json_data.get('sources') or []
|
||||||
|
for source in sources:
|
||||||
container = source.get('container')
|
container = source.get('container')
|
||||||
ext = mimetype2ext(source.get('type'))
|
ext = mimetype2ext(source.get('type'))
|
||||||
src = source.get('src')
|
src = source.get('src')
|
||||||
# https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
|
# https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
|
||||||
if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
|
if container == 'WVM' or source.get('key_systems'):
|
||||||
|
num_drm_sources += 1
|
||||||
|
continue
|
||||||
|
elif ext == 'ism':
|
||||||
continue
|
continue
|
||||||
elif ext == 'm3u8' or container == 'M2TS':
|
elif ext == 'm3u8' or container == 'M2TS':
|
||||||
if not src:
|
if not src:
|
||||||
|
@ -533,20 +539,15 @@ class BrightcoveNewIE(AdobePassIE):
|
||||||
'format_id': build_format_id('rtmp'),
|
'format_id': build_format_id('rtmp'),
|
||||||
})
|
})
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
if not formats:
|
|
||||||
# for sonyliv.com DRM protected videos
|
|
||||||
s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
|
|
||||||
if s3_source_url:
|
|
||||||
formats.append({
|
|
||||||
'url': s3_source_url,
|
|
||||||
'format_id': 'source',
|
|
||||||
})
|
|
||||||
|
|
||||||
|
if not formats:
|
||||||
errors = json_data.get('errors')
|
errors = json_data.get('errors')
|
||||||
if not formats and errors:
|
if errors:
|
||||||
error = errors[0]
|
error = errors[0]
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
|
||||||
|
if sources and num_drm_sources == len(sources):
|
||||||
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
@ -600,11 +601,14 @@ class BrightcoveNewIE(AdobePassIE):
|
||||||
store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
|
store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
|
||||||
|
|
||||||
def extract_policy_key():
|
def extract_policy_key():
|
||||||
|
base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed)
|
||||||
|
config = self._download_json(
|
||||||
|
base_url + 'config.json', video_id, fatal=False) or {}
|
||||||
|
policy_key = try_get(
|
||||||
|
config, lambda x: x['video_cloud']['policy_key'])
|
||||||
|
if not policy_key:
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://players.brightcove.net/%s/%s_%s/index.min.js'
|
base_url + 'index.min.js', video_id)
|
||||||
% (account_id, player_id, embed), video_id)
|
|
||||||
|
|
||||||
policy_key = None
|
|
||||||
|
|
||||||
catalog = self._search_regex(
|
catalog = self._search_regex(
|
||||||
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
|
||||||
|
|
|
@ -8,18 +8,20 @@ from .gigya import GigyaBaseIE
|
||||||
from ..compat import compat_HTTPError
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
strip_or_none,
|
clean_html,
|
||||||
|
extract_attributes,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
parse_iso8601,
|
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
strip_or_none,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class CanvasIE(InfoExtractor):
|
class CanvasIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza|dako)/assets/(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
||||||
'md5': '68993eda72ef62386a15ea2cf3c93107',
|
'md5': '68993eda72ef62386a15ea2cf3c93107',
|
||||||
|
@ -37,6 +39,7 @@ class CanvasIE(InfoExtractor):
|
||||||
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
_GEO_BYPASS = False
|
||||||
_HLS_ENTRY_PROTOCOLS_MAP = {
|
_HLS_ENTRY_PROTOCOLS_MAP = {
|
||||||
'HLS': 'm3u8_native',
|
'HLS': 'm3u8_native',
|
||||||
'HLS_AES': 'm3u8',
|
'HLS_AES': 'm3u8',
|
||||||
|
@ -47,6 +50,8 @@ class CanvasIE(InfoExtractor):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
site_id, video_id = mobj.group('site_id'), mobj.group('id')
|
site_id, video_id = mobj.group('site_id'), mobj.group('id')
|
||||||
|
|
||||||
|
data = None
|
||||||
|
if site_id != 'vrtvideo':
|
||||||
# Old API endpoint, serves more formats but may fail for some videos
|
# Old API endpoint, serves more formats but may fail for some videos
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
'https://mediazone.vrt.be/api/v1/%s/assets/%s'
|
||||||
|
@ -55,21 +60,24 @@ class CanvasIE(InfoExtractor):
|
||||||
|
|
||||||
# New API endpoint
|
# New API endpoint
|
||||||
if not data:
|
if not data:
|
||||||
|
headers = self.geo_verification_headers()
|
||||||
|
headers.update({'Content-Type': 'application/json'})
|
||||||
token = self._download_json(
|
token = self._download_json(
|
||||||
'%s/tokens' % self._REST_API_BASE, video_id,
|
'%s/tokens' % self._REST_API_BASE, video_id,
|
||||||
'Downloading token', data=b'',
|
'Downloading token', data=b'', headers=headers)['vrtPlayerToken']
|
||||||
headers={'Content-Type': 'application/json'})['vrtPlayerToken']
|
|
||||||
data = self._download_json(
|
data = self._download_json(
|
||||||
'%s/videos/%s' % (self._REST_API_BASE, video_id),
|
'%s/videos/%s' % (self._REST_API_BASE, video_id),
|
||||||
video_id, 'Downloading video JSON', fatal=False, query={
|
video_id, 'Downloading video JSON', query={
|
||||||
'vrtPlayerToken': token,
|
'vrtPlayerToken': token,
|
||||||
'client': '%s@PROD' % site_id,
|
'client': '%s@PROD' % site_id,
|
||||||
}, expected_status=400)
|
}, expected_status=400)
|
||||||
message = data.get('message')
|
if not data.get('title'):
|
||||||
if message and not data.get('title'):
|
code = data.get('code')
|
||||||
if data.get('code') == 'AUTHENTICATION_REQUIRED':
|
if code == 'AUTHENTICATION_REQUIRED':
|
||||||
self.raise_login_required(message)
|
self.raise_login_required()
|
||||||
raise ExtractorError(message, expected=True)
|
elif code == 'INVALID_LOCATION':
|
||||||
|
self.raise_geo_restricted(countries=['BE'])
|
||||||
|
raise ExtractorError(data.get('message') or code, expected=True)
|
||||||
|
|
||||||
title = data['title']
|
title = data['title']
|
||||||
description = data.get('description')
|
description = data.get('description')
|
||||||
|
@ -205,20 +213,24 @@ class CanvasEenIE(InfoExtractor):
|
||||||
|
|
||||||
class VrtNUIE(GigyaBaseIE):
|
class VrtNUIE(GigyaBaseIE):
|
||||||
IE_DESC = 'VrtNU.be'
|
IE_DESC = 'VrtNU.be'
|
||||||
_VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# Available via old API endpoint
|
# Available via old API endpoint
|
||||||
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
|
'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1989/postbus-x-s1989a1/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
|
'id': 'pbs-pub-e8713dac-899e-41de-9313-81269f4c04ac$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'De zwarte weduwe',
|
'title': 'Postbus X - Aflevering 1 (Seizoen 1989)',
|
||||||
'description': 'md5:db1227b0f318c849ba5eab1fef895ee4',
|
'description': 'md5:b704f669eb9262da4c55b33d7c6ed4b7',
|
||||||
'duration': 1457.04,
|
'duration': 1457.04,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'season': 'Season 1',
|
'series': 'Postbus X',
|
||||||
'season_number': 1,
|
'season': 'Seizoen 1989',
|
||||||
|
'season_number': 1989,
|
||||||
|
'episode': 'De zwarte weduwe',
|
||||||
'episode_number': 1,
|
'episode_number': 1,
|
||||||
|
'timestamp': 1595822400,
|
||||||
|
'upload_date': '20200727',
|
||||||
},
|
},
|
||||||
'skip': 'This video is only available for registered users',
|
'skip': 'This video is only available for registered users',
|
||||||
'params': {
|
'params': {
|
||||||
|
@ -300,69 +312,73 @@ class VrtNUIE(GigyaBaseIE):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
webpage, urlh = self._download_webpage_handle(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
attrs = extract_attributes(self._search_regex(
|
||||||
|
r'(<nui-media[^>]+>)', webpage, 'media element'))
|
||||||
|
video_id = attrs['videoid']
|
||||||
|
publication_id = attrs.get('publicationid')
|
||||||
|
if publication_id:
|
||||||
|
video_id = publication_id + '$' + video_id
|
||||||
|
|
||||||
|
page = (self._parse_json(self._search_regex(
|
||||||
|
r'digitalData\s*=\s*({.+?});', webpage, 'digial data',
|
||||||
|
default='{}'), video_id, fatal=False) or {}).get('page') or {}
|
||||||
|
|
||||||
info = self._search_json_ld(webpage, display_id, default={})
|
info = self._search_json_ld(webpage, display_id, default={})
|
||||||
|
|
||||||
# title is optional here since it may be extracted by extractor
|
|
||||||
# that is delegated from here
|
|
||||||
title = strip_or_none(self._html_search_regex(
|
|
||||||
r'(?ms)<h1 class="content__heading">(.+?)</h1>',
|
|
||||||
webpage, 'title', default=None))
|
|
||||||
|
|
||||||
description = self._html_search_regex(
|
|
||||||
r'(?ms)<div class="content__description">(.+?)</div>',
|
|
||||||
webpage, 'description', default=None)
|
|
||||||
|
|
||||||
season = self._html_search_regex(
|
|
||||||
[r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s*
|
|
||||||
<span>seizoen\ (.+?)</span>\s*
|
|
||||||
</div>''',
|
|
||||||
r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'],
|
|
||||||
webpage, 'season', default=None)
|
|
||||||
|
|
||||||
season_number = int_or_none(season)
|
|
||||||
|
|
||||||
episode_number = int_or_none(self._html_search_regex(
|
|
||||||
r'''(?xms)<div\ class="content__episode">\s*
|
|
||||||
<abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span>
|
|
||||||
</div>''',
|
|
||||||
webpage, 'episode_number', default=None))
|
|
||||||
|
|
||||||
release_date = parse_iso8601(self._html_search_regex(
|
|
||||||
r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"',
|
|
||||||
webpage, 'release_date', default=None))
|
|
||||||
|
|
||||||
# If there's a ? or a # in the URL, remove them and everything after
|
|
||||||
clean_url = urlh.geturl().split('?')[0].split('#')[0].strip('/')
|
|
||||||
securevideo_url = clean_url + '.mssecurevideo.json'
|
|
||||||
|
|
||||||
try:
|
|
||||||
video = self._download_json(securevideo_url, display_id)
|
|
||||||
except ExtractorError as e:
|
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
|
|
||||||
self.raise_login_required()
|
|
||||||
raise
|
|
||||||
|
|
||||||
# We are dealing with a '../<show>.relevant' URL
|
|
||||||
redirect_url = video.get('url')
|
|
||||||
if redirect_url:
|
|
||||||
return self.url_result(self._proto_relative_url(redirect_url, 'https:'))
|
|
||||||
|
|
||||||
# There is only one entry, but with an unknown key, so just get
|
|
||||||
# the first one
|
|
||||||
video_id = list(video.values())[0].get('videoid')
|
|
||||||
|
|
||||||
return merge_dicts(info, {
|
return merge_dicts(info, {
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
|
'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
|
||||||
'ie_key': CanvasIE.ie_key(),
|
'ie_key': CanvasIE.ie_key(),
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
|
'season_number': int_or_none(page.get('episode_season')),
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
class DagelijkseKostIE(InfoExtractor):
|
||||||
|
IE_DESC = 'dagelijksekost.een.be'
|
||||||
|
_VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
|
||||||
|
'md5': '30bfffc323009a3e5f689bef6efa2365',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
|
||||||
|
'display_id': 'hachis-parmentier-met-witloof',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Hachis parmentier met witloof',
|
||||||
|
'description': 'md5:9960478392d87f63567b5b117688cdc5',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'duration': 283.02,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['is not a supported codec'],
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
title = strip_or_none(get_element_by_class(
|
||||||
|
'dish-metadata__title', webpage
|
||||||
|
) or self._html_search_meta(
|
||||||
|
'twitter:title', webpage))
|
||||||
|
|
||||||
|
description = clean_html(get_element_by_class(
|
||||||
|
'dish-description', webpage)
|
||||||
|
) or self._html_search_meta(
|
||||||
|
('description', 'twitter:description', 'og:description'),
|
||||||
|
webpage)
|
||||||
|
|
||||||
|
video_id = self._html_search_regex(
|
||||||
|
r'data-url=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
|
||||||
|
group='id')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': 'https://mediazone.vrt.be/api/v1/dako/assets/%s' % video_id,
|
||||||
|
'ie_key': CanvasIE.ie_key(),
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'season': season,
|
}
|
||||||
'season_number': season_number,
|
|
||||||
'episode_number': episode_number,
|
|
||||||
'release_date': release_date,
|
|
||||||
})
|
|
||||||
|
|
|
@ -11,7 +11,47 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class CBSLocalIE(AnvatoIE):
|
class CBSLocalIE(AnvatoIE):
|
||||||
_VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
|
_VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/'
|
||||||
|
_VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3580809',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'A Very Blue Anniversary',
|
||||||
|
'description': 'CBS2’s Cindy Hsu has more.',
|
||||||
|
'thumbnail': 're:^https?://.*',
|
||||||
|
'timestamp': int,
|
||||||
|
'upload_date': r're:^\d{8}$',
|
||||||
|
'uploader': 'CBS',
|
||||||
|
'subtitles': {
|
||||||
|
'en': 'mincount:5',
|
||||||
|
},
|
||||||
|
'categories': [
|
||||||
|
'Stations\\Spoken Word\\WCBSTV',
|
||||||
|
'Syndication\\AOL',
|
||||||
|
'Syndication\\MSN',
|
||||||
|
'Syndication\\NDN',
|
||||||
|
'Syndication\\Yahoo',
|
||||||
|
'Content\\News',
|
||||||
|
'Content\\News\\Local News',
|
||||||
|
],
|
||||||
|
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mcp_id = self._match_id(url)
|
||||||
|
return self.url_result(
|
||||||
|
'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
|
||||||
|
|
||||||
|
|
||||||
|
class CBSLocalArticleIE(AnvatoIE):
|
||||||
|
_VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# Anvato backend
|
# Anvato backend
|
||||||
|
@ -52,31 +92,6 @@ class CBSLocalIE(AnvatoIE):
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
|
||||||
'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '3580809',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'A Very Blue Anniversary',
|
|
||||||
'description': 'CBS2’s Cindy Hsu has more.',
|
|
||||||
'thumbnail': 're:^https?://.*',
|
|
||||||
'timestamp': int,
|
|
||||||
'upload_date': r're:^\d{8}$',
|
|
||||||
'uploader': 'CBS',
|
|
||||||
'subtitles': {
|
|
||||||
'en': 'mincount:5',
|
|
||||||
},
|
|
||||||
'categories': [
|
|
||||||
'Stations\\Spoken Word\\WCBSTV',
|
|
||||||
'Syndication\\AOL',
|
|
||||||
'Syndication\\MSN',
|
|
||||||
'Syndication\\NDN',
|
|
||||||
'Syndication\\Yahoo',
|
|
||||||
'Content\\News',
|
|
||||||
'Content\\News\\Local News',
|
|
||||||
],
|
|
||||||
'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
|
|
||||||
},
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -1,15 +1,18 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import calendar
|
||||||
|
import datetime
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
|
extract_timezone,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
|
try_get,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -24,8 +27,9 @@ class CCMAIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'L\'espot de La Marató de TV3',
|
'title': 'L\'espot de La Marató de TV3',
|
||||||
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
|
'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
|
||||||
'timestamp': 1470918540,
|
'timestamp': 1478608140,
|
||||||
'upload_date': '20160811',
|
'upload_date': '20161108',
|
||||||
|
'age_limit': 0,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
|
||||||
|
@ -35,8 +39,24 @@ class CCMAIE(InfoExtractor):
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'El Consell de Savis analitza el derbi',
|
'title': 'El Consell de Savis analitza el derbi',
|
||||||
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
|
'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
|
||||||
'upload_date': '20171205',
|
'upload_date': '20170512',
|
||||||
'timestamp': 1512507300,
|
'timestamp': 1494622500,
|
||||||
|
'vcodec': 'none',
|
||||||
|
'categories': ['Esports'],
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
|
||||||
|
'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6031387',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
|
||||||
|
'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
|
||||||
|
'timestamp': 1582577700,
|
||||||
|
'upload_date': '20200224',
|
||||||
|
'subtitles': 'mincount:4',
|
||||||
|
'age_limit': 16,
|
||||||
|
'series': 'Crims',
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@ -72,17 +92,28 @@ class CCMAIE(InfoExtractor):
|
||||||
|
|
||||||
informacio = media['informacio']
|
informacio = media['informacio']
|
||||||
title = informacio['titol']
|
title = informacio['titol']
|
||||||
durada = informacio.get('durada', {})
|
durada = informacio.get('durada') or {}
|
||||||
duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
|
duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
|
||||||
timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
|
tematica = try_get(informacio, lambda x: x['tematica']['text'])
|
||||||
|
|
||||||
|
timestamp = None
|
||||||
|
data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
|
||||||
|
try:
|
||||||
|
timezone, data_utc = extract_timezone(data_utc)
|
||||||
|
timestamp = calendar.timegm((datetime.datetime.strptime(
|
||||||
|
data_utc, '%Y-%d-%mT%H:%M:%S') - timezone).timetuple())
|
||||||
|
except TypeError:
|
||||||
|
pass
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
subtitols = media.get('subtitols', {})
|
subtitols = media.get('subtitols') or []
|
||||||
if subtitols:
|
if isinstance(subtitols, dict):
|
||||||
sub_url = subtitols.get('url')
|
subtitols = [subtitols]
|
||||||
|
for st in subtitols:
|
||||||
|
sub_url = st.get('url')
|
||||||
if sub_url:
|
if sub_url:
|
||||||
subtitles.setdefault(
|
subtitles.setdefault(
|
||||||
subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({
|
st.get('iso') or st.get('text') or 'ca', []).append({
|
||||||
'url': sub_url,
|
'url': sub_url,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@ -97,6 +128,16 @@ class CCMAIE(InfoExtractor):
|
||||||
'height': int_or_none(imatges.get('alcada')),
|
'height': int_or_none(imatges.get('alcada')),
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
age_limit = None
|
||||||
|
codi_etic = try_get(informacio, lambda x: x['codi_etic']['id'])
|
||||||
|
if codi_etic:
|
||||||
|
codi_etic_s = codi_etic.split('_')
|
||||||
|
if len(codi_etic_s) == 2:
|
||||||
|
if codi_etic_s[1] == 'TP':
|
||||||
|
age_limit = 0
|
||||||
|
else:
|
||||||
|
age_limit = int_or_none(codi_etic_s[1])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': media_id,
|
'id': media_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -106,4 +147,9 @@ class CCMAIE(InfoExtractor):
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'age_limit': age_limit,
|
||||||
|
'alt_title': informacio.get('titol_complet'),
|
||||||
|
'episode_number': int_or_none(informacio.get('capitol')),
|
||||||
|
'categories': [tematica] if tematica else None,
|
||||||
|
'series': informacio.get('programa'),
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import smuggle_url
|
from ..utils import smuggle_url
|
||||||
|
@ -38,7 +39,7 @@ class CNBCIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class CNBCVideoIE(InfoExtractor):
|
class CNBCVideoIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
|
'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -56,11 +57,15 @@ class CNBCVideoIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
path, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, display_id)
|
video_id = self._download_json(
|
||||||
video_id = self._search_regex(
|
'https://webql-redesign.cnbcfm.com/graphql', display_id, query={
|
||||||
r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
|
'query': '''{
|
||||||
'video id')
|
page(path: "%s") {
|
||||||
|
vcpsId
|
||||||
|
}
|
||||||
|
}''' % path,
|
||||||
|
})['data']['page']['vcpsId']
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'http://video.cnbc.com/gallery/?video=%s' % video_id,
|
'http://video.cnbc.com/gallery/?video=%d' % video_id,
|
||||||
CNBCIE.ie_key())
|
CNBCIE.ie_key())
|
||||||
|
|
|
@ -96,7 +96,10 @@ class CNNIE(TurnerBaseIE):
|
||||||
config['data_src'] % path, page_title, {
|
config['data_src'] % path, page_title, {
|
||||||
'default': {
|
'default': {
|
||||||
'media_src': config['media_src'],
|
'media_src': config['media_src'],
|
||||||
}
|
},
|
||||||
|
'f4m': {
|
||||||
|
'host': 'cnn-vh.akamaihd.net',
|
||||||
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,142 +1,51 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .mtv import MTVServicesInfoExtractor
|
from .mtv import MTVServicesInfoExtractor
|
||||||
from .common import InfoExtractor
|
|
||||||
|
|
||||||
|
|
||||||
class ComedyCentralIE(MTVServicesInfoExtractor):
|
class ComedyCentralIE(MTVServicesInfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
|
_VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?)/(?P<id>[0-9a-z]{6})'
|
||||||
(video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
|
|
||||||
/(?P<title>.*)'''
|
|
||||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
|
'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike',
|
||||||
'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
|
'md5': 'b8acb347177c680ff18a292aa2166f80',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
|
'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
|
'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike',
|
||||||
'description': 'After a certain point, breastfeeding becomes c**kblocking.',
|
'description': 'md5:5334307c433892b85f4f5e5ac9ef7498',
|
||||||
'timestamp': 1376798400,
|
'timestamp': 1598670000,
|
||||||
'upload_date': '20130818',
|
'upload_date': '20200829',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
|
'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
|
||||||
|
|
||||||
|
|
||||||
class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
|
|
||||||
_VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
|
|
||||||
(?:full-episodes|shows(?=/[^/]+/full-episodes))
|
|
||||||
/(?P<id>[^?]+)'''
|
|
||||||
_FEED_URL = 'http://comedycentral.com/feeds/mrss/'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
|
|
||||||
'info_dict': {
|
|
||||||
'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
|
|
||||||
'title': 'November 28, 2016 - Ryan Speedo Green',
|
|
||||||
},
|
|
||||||
'playlist_count': 4,
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate',
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
playlist_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
|
||||||
mgid = self._extract_triforce_mgid(webpage, data_zone='t2_lc_promo1')
|
|
||||||
videos_info = self._get_videos_info(mgid)
|
|
||||||
return videos_info
|
|
||||||
|
|
||||||
|
|
||||||
class ToshIE(MTVServicesInfoExtractor):
|
|
||||||
IE_DESC = 'Tosh.0'
|
|
||||||
_VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
|
|
||||||
_FEED_URL = 'http://tosh.cc.com/feeds/mrss'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
|
|
||||||
'info_dict': {
|
|
||||||
'description': 'Tosh asked fans to share their summer plans.',
|
|
||||||
'title': 'Twitter Users Share Summer Plans',
|
|
||||||
},
|
|
||||||
'playlist': [{
|
|
||||||
'md5': 'f269e88114c1805bb6d7653fecea9e06',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
|
|
||||||
'description': 'Tosh asked fans to share their summer plans.',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
|
||||||
# It's really reported to be published on year 2077
|
|
||||||
'upload_date': '20770610',
|
|
||||||
'timestamp': 3390510600,
|
|
||||||
'subtitles': {
|
|
||||||
'en': 'mincount:3',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
}, {
|
|
||||||
'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
|
||||||
class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
class ComedyCentralTVIE(MTVServicesInfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P<id>[0-9a-z]{6})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
|
'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'local_playlist-f99b626bdfe13568579a',
|
'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
|
'title': 'Josh Investigates',
|
||||||
|
'description': 'Steht uns das Ende der Welt bevor?',
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
}]
|
||||||
|
_FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
|
||||||
|
_GEO_COUNTRIES = ['DE']
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _get_feed_query(self, uri):
|
||||||
video_id = self._match_id(url)
|
return {
|
||||||
|
'accountOverride': 'intl.mtvi.com',
|
||||||
webpage = self._download_webpage(url, video_id)
|
'arcEp': 'web.cc.tv',
|
||||||
|
'ep': 'b9032c3a',
|
||||||
mrss_url = self._search_regex(
|
'imageEp': 'web.cc.tv',
|
||||||
r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
|
'mgid': uri,
|
||||||
webpage, 'mrss url', group='url')
|
|
||||||
|
|
||||||
return self._get_videos_info_from_url(mrss_url, video_id)
|
|
||||||
|
|
||||||
|
|
||||||
class ComedyCentralShortnameIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': ':tds',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': ':thedailyshow',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': ':theopposition',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
shortcut_map = {
|
|
||||||
'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
|
||||||
'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
|
|
||||||
'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
|
|
||||||
}
|
}
|
||||||
return self.url_result(shortcut_map[video_id])
|
|
||||||
|
|
|
@ -336,8 +336,8 @@ class InfoExtractor(object):
|
||||||
object, each element of which is a valid dictionary by this specification.
|
object, each element of which is a valid dictionary by this specification.
|
||||||
|
|
||||||
Additionally, playlists can have "id", "title", "description", "uploader",
|
Additionally, playlists can have "id", "title", "description", "uploader",
|
||||||
"uploader_id", "uploader_url" attributes with the same semantics as videos
|
"uploader_id", "uploader_url", "duration" attributes with the same semantics
|
||||||
(see above).
|
as videos (see above).
|
||||||
|
|
||||||
|
|
||||||
_type "multi_video" indicates that there are multiple videos that
|
_type "multi_video" indicates that there are multiple videos that
|
||||||
|
@ -1239,8 +1239,16 @@ class InfoExtractor(object):
|
||||||
'ViewAction': 'view',
|
'ViewAction': 'view',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def extract_interaction_type(e):
|
||||||
|
interaction_type = e.get('interactionType')
|
||||||
|
if isinstance(interaction_type, dict):
|
||||||
|
interaction_type = interaction_type.get('@type')
|
||||||
|
return str_or_none(interaction_type)
|
||||||
|
|
||||||
def extract_interaction_statistic(e):
|
def extract_interaction_statistic(e):
|
||||||
interaction_statistic = e.get('interactionStatistic')
|
interaction_statistic = e.get('interactionStatistic')
|
||||||
|
if isinstance(interaction_statistic, dict):
|
||||||
|
interaction_statistic = [interaction_statistic]
|
||||||
if not isinstance(interaction_statistic, list):
|
if not isinstance(interaction_statistic, list):
|
||||||
return
|
return
|
||||||
for is_e in interaction_statistic:
|
for is_e in interaction_statistic:
|
||||||
|
@ -1248,8 +1256,8 @@ class InfoExtractor(object):
|
||||||
continue
|
continue
|
||||||
if is_e.get('@type') != 'InteractionCounter':
|
if is_e.get('@type') != 'InteractionCounter':
|
||||||
continue
|
continue
|
||||||
interaction_type = is_e.get('interactionType')
|
interaction_type = extract_interaction_type(is_e)
|
||||||
if not isinstance(interaction_type, compat_str):
|
if not interaction_type:
|
||||||
continue
|
continue
|
||||||
# For interaction count some sites provide string instead of
|
# For interaction count some sites provide string instead of
|
||||||
# an integer (as per spec) with non digit characters (e.g. ",")
|
# an integer (as per spec) with non digit characters (e.g. ",")
|
||||||
|
@ -1474,9 +1482,10 @@ class InfoExtractor(object):
|
||||||
try:
|
try:
|
||||||
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
|
||||||
return True
|
return True
|
||||||
except ExtractorError:
|
except ExtractorError as e:
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
'%s: %s URL is invalid, skipping' % (video_id, item))
|
'%s: %s URL is invalid, skipping: %s'
|
||||||
|
% (video_id, item, error_to_compat_str(e.cause)))
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def http_scheme(self):
|
def http_scheme(self):
|
||||||
|
@ -2612,7 +2621,15 @@ class InfoExtractor(object):
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
|
||||||
|
signed = 'hdnea=' in manifest_url
|
||||||
|
if not signed:
|
||||||
|
# https://learn.akamai.com/en-us/webhelp/media-services-on-demand/stream-packaging-user-guide/GUID-BE6C0F73-1E06-483B-B0EA-57984B91B7F9.html
|
||||||
|
manifest_url = re.sub(
|
||||||
|
r'(?:b=[\d,-]+|(?:__a__|attributes)=off|__b__=\d+)&?',
|
||||||
|
'', manifest_url).strip('?')
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
hdcore_sign = 'hdcore=3.7.0'
|
hdcore_sign = 'hdcore=3.7.0'
|
||||||
f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
|
||||||
hds_host = hosts.get('hds')
|
hds_host = hosts.get('hds')
|
||||||
|
@ -2625,13 +2642,38 @@ class InfoExtractor(object):
|
||||||
for entry in f4m_formats:
|
for entry in f4m_formats:
|
||||||
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
entry.update({'extra_param_to_segment_url': hdcore_sign})
|
||||||
formats.extend(f4m_formats)
|
formats.extend(f4m_formats)
|
||||||
|
|
||||||
m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
|
m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
|
||||||
hls_host = hosts.get('hls')
|
hls_host = hosts.get('hls')
|
||||||
if hls_host:
|
if hls_host:
|
||||||
m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
|
m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
|
||||||
formats.extend(self._extract_m3u8_formats(
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False)
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
|
|
||||||
|
http_host = hosts.get('http')
|
||||||
|
if http_host and m3u8_formats and not signed:
|
||||||
|
REPL_REGEX = r'https?://[^/]+/i/([^,]+),([^/]+),([^/]+)\.csmil/.+'
|
||||||
|
qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
|
||||||
|
qualities_length = len(qualities)
|
||||||
|
if len(m3u8_formats) in (qualities_length, qualities_length + 1):
|
||||||
|
i = 0
|
||||||
|
for f in m3u8_formats:
|
||||||
|
if f['vcodec'] != 'none':
|
||||||
|
for protocol in ('http', 'https'):
|
||||||
|
http_f = f.copy()
|
||||||
|
del http_f['manifest_url']
|
||||||
|
http_url = re.sub(
|
||||||
|
REPL_REGEX, protocol + r'://%s/\g<1>%s\3' % (http_host, qualities[i]), f['url'])
|
||||||
|
http_f.update({
|
||||||
|
'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
|
||||||
|
'url': http_url,
|
||||||
|
'protocol': protocol,
|
||||||
|
})
|
||||||
|
formats.append(http_f)
|
||||||
|
i += 1
|
||||||
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
|
||||||
|
|
|
@ -16,6 +16,8 @@ from ..utils import (
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
strip_or_none,
|
||||||
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -82,6 +84,7 @@ class CondeNastIE(InfoExtractor):
|
||||||
'uploader': 'gq',
|
'uploader': 'gq',
|
||||||
'upload_date': '20170321',
|
'upload_date': '20170321',
|
||||||
'timestamp': 1490126427,
|
'timestamp': 1490126427,
|
||||||
|
'description': 'How much grimmer would things be if these people were competent?',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# JS embed
|
# JS embed
|
||||||
|
@ -93,7 +96,7 @@ class CondeNastIE(InfoExtractor):
|
||||||
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
|
'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
|
||||||
'uploader': 'arstechnica',
|
'uploader': 'arstechnica',
|
||||||
'upload_date': '20150916',
|
'upload_date': '20150916',
|
||||||
'timestamp': 1442434955,
|
'timestamp': 1442434920,
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
|
'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
|
||||||
|
@ -196,6 +199,13 @@ class CondeNastIE(InfoExtractor):
|
||||||
})
|
})
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for t, caption in video_info.get('captions', {}).items():
|
||||||
|
caption_url = caption.get('src')
|
||||||
|
if not (t in ('vtt', 'srt', 'tml') and caption_url):
|
||||||
|
continue
|
||||||
|
subtitles.setdefault('en', []).append({'url': caption_url})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
@ -208,6 +218,7 @@ class CondeNastIE(InfoExtractor):
|
||||||
'season': video_info.get('season_title'),
|
'season': video_info.get('season_title'),
|
||||||
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
'timestamp': parse_iso8601(video_info.get('premiere_date')),
|
||||||
'categories': video_info.get('categories'),
|
'categories': video_info.get('categories'),
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -224,6 +235,14 @@ class CondeNastIE(InfoExtractor):
|
||||||
|
|
||||||
if url_type == 'series':
|
if url_type == 'series':
|
||||||
return self._extract_series(url, webpage)
|
return self._extract_series(url, webpage)
|
||||||
|
else:
|
||||||
|
video = try_get(self._parse_json(self._search_regex(
|
||||||
|
r'__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
|
||||||
|
'preload state', '{}'), display_id),
|
||||||
|
lambda x: x['transformed']['video'])
|
||||||
|
if video:
|
||||||
|
params = {'videoId': video['id']}
|
||||||
|
info = {'description': strip_or_none(video.get('description'))}
|
||||||
else:
|
else:
|
||||||
params = self._extract_video_params(webpage, display_id)
|
params = self._extract_video_params(webpage, display_id)
|
||||||
info = self._search_json_ld(
|
info = self._search_json_ld(
|
||||||
|
|
|
@ -8,9 +8,14 @@ from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
find_xpath_attr,
|
find_xpath_attr,
|
||||||
|
get_element_by_attribute,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
merge_dicts,
|
||||||
|
parse_iso8601,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
|
str_to_int,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
)
|
)
|
||||||
from .senateisvp import SenateISVPIE
|
from .senateisvp import SenateISVPIE
|
||||||
|
@ -98,6 +103,48 @@ class CSpanIE(InfoExtractor):
|
||||||
bc_attr['data-bcid'])
|
bc_attr['data-bcid'])
|
||||||
return self.url_result(smuggle_url(bc_url, {'source_url': url}))
|
return self.url_result(smuggle_url(bc_url, {'source_url': url}))
|
||||||
|
|
||||||
|
def add_referer(formats):
|
||||||
|
for f in formats:
|
||||||
|
f.setdefault('http_headers', {})['Referer'] = url
|
||||||
|
|
||||||
|
# As of 01.12.2020 this path looks to cover all cases making the rest
|
||||||
|
# of the code unnecessary
|
||||||
|
jwsetup = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)jwsetup\s*=\s*({.+?})\s*;', webpage, 'jwsetup',
|
||||||
|
default='{}'),
|
||||||
|
video_id, transform_source=js_to_json, fatal=False)
|
||||||
|
if jwsetup:
|
||||||
|
info = self._parse_jwplayer_data(
|
||||||
|
jwsetup, video_id, require_title=False, m3u8_id='hls',
|
||||||
|
base_url=url)
|
||||||
|
add_referer(info['formats'])
|
||||||
|
for subtitles in info['subtitles'].values():
|
||||||
|
for subtitle in subtitles:
|
||||||
|
ext = determine_ext(subtitle['url'])
|
||||||
|
if ext == 'php':
|
||||||
|
ext = 'vtt'
|
||||||
|
subtitle['ext'] = ext
|
||||||
|
ld_info = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
title = get_element_by_class('video-page-title', webpage) or \
|
||||||
|
self._og_search_title(webpage)
|
||||||
|
description = get_element_by_attribute('itemprop', 'description', webpage) or \
|
||||||
|
self._html_search_meta(['og:description', 'description'], webpage)
|
||||||
|
return merge_dicts(info, ld_info, {
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': get_element_by_attribute('itemprop', 'thumbnailUrl', webpage),
|
||||||
|
'description': description,
|
||||||
|
'timestamp': parse_iso8601(get_element_by_attribute('itemprop', 'uploadDate', webpage)),
|
||||||
|
'location': get_element_by_attribute('itemprop', 'contentLocation', webpage),
|
||||||
|
'duration': int_or_none(self._search_regex(
|
||||||
|
r'jwsetup\.seclength\s*=\s*(\d+);',
|
||||||
|
webpage, 'duration', fatal=False)),
|
||||||
|
'view_count': str_to_int(self._search_regex(
|
||||||
|
r"<span[^>]+class='views'[^>]*>([\d,]+)\s+Views</span>",
|
||||||
|
webpage, 'views', fatal=False)),
|
||||||
|
})
|
||||||
|
|
||||||
|
# Obsolete
|
||||||
# We first look for clipid, because clipprog always appears before
|
# We first look for clipid, because clipprog always appears before
|
||||||
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
|
patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
|
||||||
results = list(filter(None, (re.search(p, webpage) for p in patterns)))
|
results = list(filter(None, (re.search(p, webpage) for p in patterns)))
|
||||||
|
@ -165,6 +212,7 @@ class CSpanIE(InfoExtractor):
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
path, video_id, 'mp4', entry_protocol='m3u8_native',
|
path, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
|
m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
|
||||||
|
add_referer(formats)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': '%s_%d' % (video_id, partnum + 1),
|
'id': '%s_%d' % (video_id, partnum + 1),
|
||||||
|
|
52
haruhi_dl/extractor/ctv.py
Normal file
52
haruhi_dl/extractor/ctv.py
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class CTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P<id>(?:show|movie)s/[^/]+/[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2102249',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Wednesday, December 23, 2020',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.',
|
||||||
|
'timestamp': 1608732000,
|
||||||
|
'upload_date': '20201223',
|
||||||
|
'series': 'Your Morning',
|
||||||
|
'season': '2020-2021',
|
||||||
|
'season_number': 5,
|
||||||
|
'episode_number': 88,
|
||||||
|
'tags': ['Your Morning'],
|
||||||
|
'categories': ['Talk Show'],
|
||||||
|
'duration': 7467.126,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
content = self._download_json(
|
||||||
|
'https://www.ctv.ca/space-graphql/graphql', display_id, query={
|
||||||
|
'query': '''{
|
||||||
|
resolvedPath(path: "/%s") {
|
||||||
|
lastSegment {
|
||||||
|
content {
|
||||||
|
... on AxisContent {
|
||||||
|
axisId
|
||||||
|
videoPlayerDestCode
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}''' % display_id,
|
||||||
|
})['data']['resolvedPath']['lastSegment']['content']
|
||||||
|
video_id = content['axisId']
|
||||||
|
return self.url_result(
|
||||||
|
'9c9media:%s:%s' % (content['videoPlayerDestCode'], video_id),
|
||||||
|
'NineCNineMedia', video_id)
|
|
@ -7,7 +7,7 @@ from .dplay import DPlayIE
|
||||||
|
|
||||||
|
|
||||||
class DiscoveryNetworksDeIE(DPlayIE):
|
class DiscoveryNetworksDeIE(DPlayIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)'
|
_VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
|
'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
|
||||||
|
@ -29,6 +29,9 @@ class DiscoveryNetworksDeIE(DPlayIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
|
'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
@ -10,16 +11,23 @@ from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
strip_or_none,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DPlayIE(InfoExtractor):
|
class DPlayIE(InfoExtractor):
|
||||||
|
_PATH_REGEX = r'/(?P<id>[^/]+/[^/?#]+)'
|
||||||
_VALID_URL = r'''(?x)https?://
|
_VALID_URL = r'''(?x)https?://
|
||||||
(?P<domain>
|
(?P<domain>
|
||||||
(?:www\.)?(?P<host>dplay\.(?P<country>dk|fi|jp|se|no))|
|
(?:www\.)?(?P<host>d
|
||||||
|
(?:
|
||||||
|
play\.(?P<country>dk|fi|jp|se|no)|
|
||||||
|
iscoveryplus\.(?P<plus_country>dk|es|fi|it|se|no)
|
||||||
|
)
|
||||||
|
)|
|
||||||
(?P<subdomain_country>es|it)\.dplay\.com
|
(?P<subdomain_country>es|it)\.dplay\.com
|
||||||
)/[^/]+/(?P<id>[^/]+/[^/?#]+)'''
|
)/[^/]+''' + _PATH_REGEX
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# non geo restricted, via secure api, unsigned download hls URL
|
# non geo restricted, via secure api, unsigned download hls URL
|
||||||
|
@ -126,23 +134,67 @@ class DPlayIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.dplay.jp/video/gold-rush/24086',
|
'url': 'https://www.dplay.jp/video/gold-rush/24086',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.discoveryplus.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.discoveryplus.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.discoveryplus.no/videoer/i-kongens-klr/sesong-1-episode-7',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.discoveryplus.it/videos/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.discoveryplus.es/videos/la-fiebre-del-oro/temporada-8-episodio-1',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.discoveryplus.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _process_errors(self, e, geo_countries):
|
||||||
|
info = self._parse_json(e.cause.read().decode('utf-8'), None)
|
||||||
|
error = info['errors'][0]
|
||||||
|
error_code = error.get('code')
|
||||||
|
if error_code == 'access.denied.geoblocked':
|
||||||
|
self.raise_geo_restricted(countries=geo_countries)
|
||||||
|
elif error_code in ('access.denied.missingpackage', 'invalid.token'):
|
||||||
|
raise ExtractorError(
|
||||||
|
'This video is only available for registered users. You may want to use --cookies.', expected=True)
|
||||||
|
raise ExtractorError(info['errors'][0]['detail'], expected=True)
|
||||||
|
|
||||||
|
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||||
|
headers['Authorization'] = 'Bearer ' + self._download_json(
|
||||||
|
disco_base + 'token', display_id, 'Downloading token',
|
||||||
|
query={
|
||||||
|
'realm': realm,
|
||||||
|
})['data']['attributes']['token']
|
||||||
|
|
||||||
|
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||||
|
streaming = self._download_json(
|
||||||
|
disco_base + 'playback/videoPlaybackInfo/' + video_id,
|
||||||
|
video_id, headers=headers)['data']['attributes']['streaming']
|
||||||
|
streaming_list = []
|
||||||
|
for format_id, format_dict in streaming.items():
|
||||||
|
streaming_list.append({
|
||||||
|
'type': format_id,
|
||||||
|
'url': format_dict.get('url'),
|
||||||
|
})
|
||||||
|
return streaming_list
|
||||||
|
|
||||||
def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
|
def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
|
||||||
geo_countries = [country.upper()]
|
geo_countries = [country.upper()]
|
||||||
self._initialize_geo_bypass({
|
self._initialize_geo_bypass({
|
||||||
'countries': geo_countries,
|
'countries': geo_countries,
|
||||||
})
|
})
|
||||||
disco_base = 'https://%s/' % disco_host
|
disco_base = 'https://%s/' % disco_host
|
||||||
token = self._download_json(
|
|
||||||
disco_base + 'token', display_id, 'Downloading token',
|
|
||||||
query={
|
|
||||||
'realm': realm,
|
|
||||||
})['data']['attributes']['token']
|
|
||||||
headers = {
|
headers = {
|
||||||
'Referer': url,
|
'Referer': url,
|
||||||
'Authorization': 'Bearer ' + token,
|
|
||||||
}
|
}
|
||||||
|
self._update_disco_api_headers(headers, disco_base, display_id, realm)
|
||||||
|
try:
|
||||||
video = self._download_json(
|
video = self._download_json(
|
||||||
disco_base + 'content/videos/' + display_id, display_id,
|
disco_base + 'content/videos/' + display_id, display_id,
|
||||||
headers=headers, query={
|
headers=headers, query={
|
||||||
|
@ -153,31 +205,28 @@ class DPlayIE(InfoExtractor):
|
||||||
'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
|
'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
|
||||||
'include': 'images,primaryChannel,show,tags'
|
'include': 'images,primaryChannel,show,tags'
|
||||||
})
|
})
|
||||||
|
except ExtractorError as e:
|
||||||
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
|
||||||
|
self._process_errors(e, geo_countries)
|
||||||
|
raise
|
||||||
video_id = video['data']['id']
|
video_id = video['data']['id']
|
||||||
info = video['data']['attributes']
|
info = video['data']['attributes']
|
||||||
title = info['name'].strip()
|
title = info['name'].strip()
|
||||||
formats = []
|
formats = []
|
||||||
try:
|
try:
|
||||||
streaming = self._download_json(
|
streaming = self._download_video_playback_info(
|
||||||
disco_base + 'playback/videoPlaybackInfo/' + video_id,
|
disco_base, video_id, headers)
|
||||||
display_id, headers=headers)['data']['attributes']['streaming']
|
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
|
self._process_errors(e, geo_countries)
|
||||||
error = info['errors'][0]
|
|
||||||
error_code = error.get('code')
|
|
||||||
if error_code == 'access.denied.geoblocked':
|
|
||||||
self.raise_geo_restricted(countries=geo_countries)
|
|
||||||
elif error_code == 'access.denied.missingpackage':
|
|
||||||
self.raise_login_required()
|
|
||||||
raise ExtractorError(info['errors'][0]['detail'], expected=True)
|
|
||||||
raise
|
raise
|
||||||
for format_id, format_dict in streaming.items():
|
for format_dict in streaming:
|
||||||
if not isinstance(format_dict, dict):
|
if not isinstance(format_dict, dict):
|
||||||
continue
|
continue
|
||||||
format_url = format_dict.get('url')
|
format_url = format_dict.get('url')
|
||||||
if not format_url:
|
if not format_url:
|
||||||
continue
|
continue
|
||||||
|
format_id = format_dict.get('type')
|
||||||
ext = determine_ext(format_url)
|
ext = determine_ext(format_url)
|
||||||
if format_id == 'dash' or ext == 'mpd':
|
if format_id == 'dash' or ext == 'mpd':
|
||||||
formats.extend(self._extract_mpd_formats(
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
@ -225,7 +274,7 @@ class DPlayIE(InfoExtractor):
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': info.get('description'),
|
'description': strip_or_none(info.get('description')),
|
||||||
'duration': float_or_none(info.get('videoDuration'), 1000),
|
'duration': float_or_none(info.get('videoDuration'), 1000),
|
||||||
'timestamp': unified_timestamp(info.get('publishStart')),
|
'timestamp': unified_timestamp(info.get('publishStart')),
|
||||||
'series': series,
|
'series': series,
|
||||||
|
@ -241,7 +290,80 @@ class DPlayIE(InfoExtractor):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
display_id = mobj.group('id')
|
display_id = mobj.group('id')
|
||||||
domain = mobj.group('domain').lstrip('www.')
|
domain = mobj.group('domain').lstrip('www.')
|
||||||
country = mobj.group('country') or mobj.group('subdomain_country')
|
country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country')
|
||||||
host = 'disco-api.' + domain if domain.startswith('dplay.') else 'eu2-prod.disco-api.com'
|
host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
|
||||||
return self._get_disco_api_info(
|
return self._get_disco_api_info(
|
||||||
url, display_id, host, 'dplay' + country, country)
|
url, display_id, host, 'dplay' + country, country)
|
||||||
|
|
||||||
|
|
||||||
|
class DiscoveryPlusIE(DPlayIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '1140794',
|
||||||
|
'display_id': 'property-brothers-forever-home/food-and-family',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Food and Family',
|
||||||
|
'description': 'The brothers help a Richmond family expand their single-level home.',
|
||||||
|
'duration': 2583.113,
|
||||||
|
'timestamp': 1609304400,
|
||||||
|
'upload_date': '20201230',
|
||||||
|
'creator': 'HGTV',
|
||||||
|
'series': 'Property Brothers: Forever Home',
|
||||||
|
'season_number': 1,
|
||||||
|
'episode_number': 1,
|
||||||
|
},
|
||||||
|
'skip': 'Available for Premium users',
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
|
||||||
|
headers['x-disco-client'] = 'WEB:UNKNOWN:dplus_us:15.0.0'
|
||||||
|
|
||||||
|
def _download_video_playback_info(self, disco_base, video_id, headers):
|
||||||
|
return self._download_json(
|
||||||
|
disco_base + 'playback/v3/videoPlaybackInfo',
|
||||||
|
video_id, headers=headers, data=json.dumps({
|
||||||
|
'deviceInfo': {
|
||||||
|
'adBlocker': False,
|
||||||
|
},
|
||||||
|
'videoId': video_id,
|
||||||
|
'wisteriaProperties': {
|
||||||
|
'platform': 'desktop',
|
||||||
|
'product': 'dplus_us',
|
||||||
|
},
|
||||||
|
}).encode('utf-8'))['data']['attributes']['streaming']
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
return self._get_disco_api_info(
|
||||||
|
url, display_id, 'us1-prod-direct.discoveryplus.com', 'go', 'us')
|
||||||
|
|
||||||
|
|
||||||
|
class HGTVDeIE(DPlayIE):
|
||||||
|
_VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '151205',
|
||||||
|
'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Wer braucht schon eine Toilette',
|
||||||
|
'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
|
||||||
|
'duration': 1177.024,
|
||||||
|
'timestamp': 1595705400,
|
||||||
|
'upload_date': '20200725',
|
||||||
|
'creator': 'HGTV',
|
||||||
|
'series': 'Tiny House - klein, aber oho',
|
||||||
|
'season_number': 3,
|
||||||
|
'episode_number': 3,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
return self._get_disco_api_info(
|
||||||
|
url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
|
||||||
|
|
|
@ -29,7 +29,7 @@ class DRTVIE(InfoExtractor):
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*|
|
(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*|
|
||||||
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode)/
|
(?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
|
||||||
)
|
)
|
||||||
(?P<id>[\da-z_-]+)
|
(?P<id>[\da-z_-]+)
|
||||||
'''
|
'''
|
||||||
|
@ -111,6 +111,9 @@ class DRTVIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769',
|
'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.dr.dk/drtv/program/jagten_220924',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -12,7 +12,14 @@ from ..utils import (
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class EggheadCourseIE(InfoExtractor):
|
class EggheadBaseIE(InfoExtractor):
|
||||||
|
def _call_api(self, path, video_id, resource, fatal=True):
|
||||||
|
return self._download_json(
|
||||||
|
'https://app.egghead.io/api/v1/' + path,
|
||||||
|
video_id, 'Downloading %s JSON' % resource, fatal=fatal)
|
||||||
|
|
||||||
|
|
||||||
|
class EggheadCourseIE(EggheadBaseIE):
|
||||||
IE_DESC = 'egghead.io course'
|
IE_DESC = 'egghead.io course'
|
||||||
IE_NAME = 'egghead:course'
|
IE_NAME = 'egghead:course'
|
||||||
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
|
||||||
|
@ -28,10 +35,9 @@ class EggheadCourseIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
|
series_path = 'series/' + playlist_id
|
||||||
lessons = self._download_json(
|
lessons = self._call_api(
|
||||||
'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
|
series_path + '/lessons', playlist_id, 'course lessons')
|
||||||
playlist_id, 'Downloading course lessons JSON')
|
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for lesson in lessons:
|
for lesson in lessons:
|
||||||
|
@ -44,9 +50,8 @@ class EggheadCourseIE(InfoExtractor):
|
||||||
entries.append(self.url_result(
|
entries.append(self.url_result(
|
||||||
lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
|
lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
|
||||||
|
|
||||||
course = self._download_json(
|
course = self._call_api(
|
||||||
'https://egghead.io/api/v1/series/%s' % playlist_id,
|
series_path, playlist_id, 'course', False) or {}
|
||||||
playlist_id, 'Downloading course JSON', fatal=False) or {}
|
|
||||||
|
|
||||||
playlist_id = course.get('id')
|
playlist_id = course.get('id')
|
||||||
if playlist_id:
|
if playlist_id:
|
||||||
|
@ -57,7 +62,7 @@ class EggheadCourseIE(InfoExtractor):
|
||||||
course.get('description'))
|
course.get('description'))
|
||||||
|
|
||||||
|
|
||||||
class EggheadLessonIE(InfoExtractor):
|
class EggheadLessonIE(EggheadBaseIE):
|
||||||
IE_DESC = 'egghead.io lesson'
|
IE_DESC = 'egghead.io lesson'
|
||||||
IE_NAME = 'egghead:lesson'
|
IE_NAME = 'egghead:lesson'
|
||||||
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
|
||||||
|
@ -74,7 +79,7 @@ class EggheadLessonIE(InfoExtractor):
|
||||||
'upload_date': '20161209',
|
'upload_date': '20161209',
|
||||||
'duration': 304,
|
'duration': 304,
|
||||||
'view_count': 0,
|
'view_count': 0,
|
||||||
'tags': ['javascript', 'free'],
|
'tags': 'count:2',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -88,8 +93,8 @@ class EggheadLessonIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
lesson = self._download_json(
|
lesson = self._call_api(
|
||||||
'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
|
'lessons/' + display_id, display_id, 'lesson')
|
||||||
|
|
||||||
lesson_id = compat_str(lesson['id'])
|
lesson_id = compat_str(lesson['id'])
|
||||||
title = lesson['title']
|
title = lesson['title']
|
||||||
|
|
|
@ -16,7 +16,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class EpornerIE(InfoExtractor):
|
class EpornerIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
|
_VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:(?:hd-porn|embed)/|video-)(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
|
||||||
'md5': '39d486f046212d8e1b911c52ab4691f8',
|
'md5': '39d486f046212d8e1b911c52ab4691f8',
|
||||||
|
@ -43,7 +43,10 @@ class EpornerIE(InfoExtractor):
|
||||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
|
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
|
'url': 'http://www.eporner.com/embed/3YRUtzMcWn0',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.eporner.com/video-FJsA19J3Y3H/one-of-the-greats/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
@ -57,7 +60,7 @@ class EpornerIE(InfoExtractor):
|
||||||
video_id = self._match_id(urlh.geturl())
|
video_id = self._match_id(urlh.geturl())
|
||||||
|
|
||||||
hash = self._search_regex(
|
hash = self._search_regex(
|
||||||
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
|
r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash')
|
||||||
|
|
||||||
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
title = self._og_search_title(webpage, default=None) or self._html_search_regex(
|
||||||
r'<title>(.+?) - EPORNER', webpage, 'title')
|
r'<title>(.+?) - EPORNER', webpage, 'title')
|
||||||
|
@ -115,8 +118,8 @@ class EpornerIE(InfoExtractor):
|
||||||
duration = parse_duration(self._html_search_meta(
|
duration = parse_duration(self._html_search_meta(
|
||||||
'duration', webpage, default=None))
|
'duration', webpage, default=None))
|
||||||
view_count = str_to_int(self._search_regex(
|
view_count = str_to_int(self._search_regex(
|
||||||
r'id="cinemaviews">\s*([0-9,]+)\s*<small>views',
|
r'id=["\']cinemaviews1["\'][^>]*>\s*([0-9,]+)',
|
||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', default=None))
|
||||||
|
|
||||||
return merge_dicts(json_ld, {
|
return merge_dicts(json_ld, {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
|
|
@ -1,77 +0,0 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
sanitized_Request,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class EveryonesMixtapeIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '5bfseWNmlds',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': "Passion Pit - \"Sleepyhead\" (Official Music Video)",
|
|
||||||
'uploader': 'FKR.TV',
|
|
||||||
'uploader_id': 'frenchkissrecords',
|
|
||||||
'description': "Music video for \"Sleepyhead\" from Passion Pit's debut EP Chunk Of Change.\nBuy on iTunes: https://itunes.apple.com/us/album/chunk-of-change-ep/id300087641\n\nDirected by The Wilderness.\n\nhttp://www.passionpitmusic.com\nhttp://www.frenchkissrecords.com",
|
|
||||||
'upload_date': '20081015'
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True, # This is simply YouTube
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'm7m0jJAbMQi',
|
|
||||||
'title': 'Driving',
|
|
||||||
},
|
|
||||||
'playlist_count': 24
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
playlist_id = mobj.group('id')
|
|
||||||
|
|
||||||
pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id
|
|
||||||
pllist_req = sanitized_Request(pllist_url)
|
|
||||||
pllist_req.add_header('X-Requested-With', 'XMLHttpRequest')
|
|
||||||
|
|
||||||
playlist_list = self._download_json(
|
|
||||||
pllist_req, playlist_id, note='Downloading playlist metadata')
|
|
||||||
try:
|
|
||||||
playlist_no = next(playlist['id']
|
|
||||||
for playlist in playlist_list
|
|
||||||
if playlist['code'] == playlist_id)
|
|
||||||
except StopIteration:
|
|
||||||
raise ExtractorError('Playlist id not found')
|
|
||||||
|
|
||||||
pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no
|
|
||||||
pl_req = sanitized_Request(pl_url)
|
|
||||||
pl_req.add_header('X-Requested-With', 'XMLHttpRequest')
|
|
||||||
playlist = self._download_json(
|
|
||||||
pl_req, playlist_id, note='Downloading playlist info')
|
|
||||||
|
|
||||||
entries = [{
|
|
||||||
'_type': 'url',
|
|
||||||
'url': t['url'],
|
|
||||||
'title': t['title'],
|
|
||||||
} for t in playlist['tracks']]
|
|
||||||
|
|
||||||
if mobj.group('songnr'):
|
|
||||||
songnr = int(mobj.group('songnr')) - 1
|
|
||||||
return entries[songnr]
|
|
||||||
|
|
||||||
playlist_title = playlist['mixData']['name']
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'id': playlist_id,
|
|
||||||
'title': playlist_title,
|
|
||||||
'entries': entries,
|
|
||||||
}
|
|
|
@ -30,7 +30,11 @@ from .adobetv import (
|
||||||
from .adultswim import AdultSwimIE
|
from .adultswim import AdultSwimIE
|
||||||
from .aenetworks import (
|
from .aenetworks import (
|
||||||
AENetworksIE,
|
AENetworksIE,
|
||||||
|
AENetworksCollectionIE,
|
||||||
|
AENetworksShowIE,
|
||||||
HistoryTopicIE,
|
HistoryTopicIE,
|
||||||
|
HistoryPlayerIE,
|
||||||
|
BiographyIE,
|
||||||
)
|
)
|
||||||
from .afreecatv import AfreecaTVIE
|
from .afreecatv import AfreecaTVIE
|
||||||
from .agora import (
|
from .agora import (
|
||||||
|
@ -43,8 +47,12 @@ from .airmozilla import AirMozillaIE
|
||||||
from .albicla import AlbiclaIE
|
from .albicla import AlbiclaIE
|
||||||
from .aljazeera import AlJazeeraIE
|
from .aljazeera import AlJazeeraIE
|
||||||
from .alphaporno import AlphaPornoIE
|
from .alphaporno import AlphaPornoIE
|
||||||
|
from .amara import AmaraIE
|
||||||
from .amcnetworks import AMCNetworksIE
|
from .amcnetworks import AMCNetworksIE
|
||||||
from .americastestkitchen import AmericasTestKitchenIE
|
from .americastestkitchen import (
|
||||||
|
AmericasTestKitchenIE,
|
||||||
|
AmericasTestKitchenSeasonIE,
|
||||||
|
)
|
||||||
from .animeondemand import AnimeOnDemandIE
|
from .animeondemand import AnimeOnDemandIE
|
||||||
from .anvato import AnvatoIE
|
from .anvato import AnvatoIE
|
||||||
from .aol import AolIE
|
from .aol import AolIE
|
||||||
|
@ -60,7 +68,9 @@ from .appletrailers import (
|
||||||
AppleTrailersIE,
|
AppleTrailersIE,
|
||||||
AppleTrailersSectionIE,
|
AppleTrailersSectionIE,
|
||||||
)
|
)
|
||||||
|
from .applepodcasts import ApplePodcastsIE
|
||||||
from .archiveorg import ArchiveOrgIE
|
from .archiveorg import ArchiveOrgIE
|
||||||
|
from .arcpublishing import ArcPublishingIE
|
||||||
from .arkena import ArkenaIE
|
from .arkena import ArkenaIE
|
||||||
from .ard import (
|
from .ard import (
|
||||||
ARDBetaMediathekIE,
|
ARDBetaMediathekIE,
|
||||||
|
@ -68,7 +78,7 @@ from .ard import (
|
||||||
ARDMediathekIE,
|
ARDMediathekIE,
|
||||||
)
|
)
|
||||||
from .arte import (
|
from .arte import (
|
||||||
ArteTVPlus7IE,
|
ArteTVIE,
|
||||||
ArteTVEmbedIE,
|
ArteTVEmbedIE,
|
||||||
ArteTVPlaylistIE,
|
ArteTVPlaylistIE,
|
||||||
)
|
)
|
||||||
|
@ -98,16 +108,18 @@ from .bbc import (
|
||||||
BBCCoUkPlaylistIE,
|
BBCCoUkPlaylistIE,
|
||||||
BBCIE,
|
BBCIE,
|
||||||
)
|
)
|
||||||
from .beampro import (
|
|
||||||
BeamProLiveIE,
|
|
||||||
BeamProVodIE,
|
|
||||||
)
|
|
||||||
from .beeg import BeegIE
|
from .beeg import BeegIE
|
||||||
from .behindkink import BehindKinkIE
|
from .behindkink import BehindKinkIE
|
||||||
from .bellmedia import BellMediaIE
|
from .bellmedia import BellMediaIE
|
||||||
from .beatport import BeatportIE
|
from .beatport import BeatportIE
|
||||||
from .bet import BetIE
|
from .bet import BetIE
|
||||||
from .bfi import BFIPlayerIE
|
from .bfi import BFIPlayerIE
|
||||||
|
from .bfmtv import (
|
||||||
|
BFMTVIE,
|
||||||
|
BFMTVLiveIE,
|
||||||
|
BFMTVArticleIE,
|
||||||
|
)
|
||||||
|
from .bibeltv import BibelTVIE
|
||||||
from .bigflix import BigflixIE
|
from .bigflix import BigflixIE
|
||||||
from .bild import BildIE
|
from .bild import BildIE
|
||||||
from .bilibili import (
|
from .bilibili import (
|
||||||
|
@ -130,7 +142,9 @@ from .bleacherreport import (
|
||||||
from .blinkx import BlinkxIE
|
from .blinkx import BlinkxIE
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
from .bokecc import BokeCCIE
|
from .bokecc import BokeCCIE
|
||||||
|
from .bongacams import BongaCamsIE
|
||||||
from .bostonglobe import BostonGlobeIE
|
from .bostonglobe import BostonGlobeIE
|
||||||
|
from .box import BoxIE
|
||||||
from .bpb import BpbIE
|
from .bpb import BpbIE
|
||||||
from .br import (
|
from .br import (
|
||||||
BRIE,
|
BRIE,
|
||||||
|
@ -159,6 +173,7 @@ from .canvas import (
|
||||||
CanvasIE,
|
CanvasIE,
|
||||||
CanvasEenIE,
|
CanvasEenIE,
|
||||||
VrtNUIE,
|
VrtNUIE,
|
||||||
|
DagelijkseKostIE,
|
||||||
)
|
)
|
||||||
from .carambatv import (
|
from .carambatv import (
|
||||||
CarambaTVIE,
|
CarambaTVIE,
|
||||||
|
@ -173,7 +188,10 @@ from .cbc import (
|
||||||
CBCOlympicsIE,
|
CBCOlympicsIE,
|
||||||
)
|
)
|
||||||
from .cbs import CBSIE
|
from .cbs import CBSIE
|
||||||
from .cbslocal import CBSLocalIE
|
from .cbslocal import (
|
||||||
|
CBSLocalIE,
|
||||||
|
CBSLocalArticleIE,
|
||||||
|
)
|
||||||
from .cbsinteractive import CBSInteractiveIE
|
from .cbsinteractive import CBSInteractiveIE
|
||||||
from .cbsnews import (
|
from .cbsnews import (
|
||||||
CBSNewsEmbedIE,
|
CBSNewsEmbedIE,
|
||||||
|
@ -228,11 +246,8 @@ from .cnn import (
|
||||||
)
|
)
|
||||||
from .coub import CoubIE
|
from .coub import CoubIE
|
||||||
from .comedycentral import (
|
from .comedycentral import (
|
||||||
ComedyCentralFullEpisodesIE,
|
|
||||||
ComedyCentralIE,
|
ComedyCentralIE,
|
||||||
ComedyCentralShortnameIE,
|
|
||||||
ComedyCentralTVIE,
|
ComedyCentralTVIE,
|
||||||
ToshIE,
|
|
||||||
)
|
)
|
||||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||||
from .commonprotocols import (
|
from .commonprotocols import (
|
||||||
|
@ -251,6 +266,7 @@ from .crunchyroll import (
|
||||||
)
|
)
|
||||||
from .cspan import CSpanIE
|
from .cspan import CSpanIE
|
||||||
from .ctsnews import CtsNewsIE
|
from .ctsnews import CtsNewsIE
|
||||||
|
from .ctv import CTVIE
|
||||||
from .ctvnews import CTVNewsIE
|
from .ctvnews import CTVNewsIE
|
||||||
from .cultureunplugged import CultureUnpluggedIE
|
from .cultureunplugged import CultureUnpluggedIE
|
||||||
from .curiositystream import (
|
from .curiositystream import (
|
||||||
|
@ -282,7 +298,11 @@ from .douyutv import (
|
||||||
DouyuShowIE,
|
DouyuShowIE,
|
||||||
DouyuTVIE,
|
DouyuTVIE,
|
||||||
)
|
)
|
||||||
from .dplay import DPlayIE
|
from .dplay import (
|
||||||
|
DPlayIE,
|
||||||
|
DiscoveryPlusIE,
|
||||||
|
HGTVDeIE,
|
||||||
|
)
|
||||||
from .dreisat import DreiSatIE
|
from .dreisat import DreiSatIE
|
||||||
from .drbonanza import DRBonanzaIE
|
from .drbonanza import DRBonanzaIE
|
||||||
from .drtuber import DrTuberIE
|
from .drtuber import DrTuberIE
|
||||||
|
@ -344,7 +364,6 @@ from .eurozet import (
|
||||||
EurozetPlayerPodcastIE,
|
EurozetPlayerPodcastIE,
|
||||||
EurozetPlayerMusicStreamIE,
|
EurozetPlayerMusicStreamIE,
|
||||||
)
|
)
|
||||||
from .everyonesmixtape import EveryonesMixtapeIE
|
|
||||||
from .expotv import ExpoTVIE
|
from .expotv import ExpoTVIE
|
||||||
from .expressen import ExpressenIE
|
from .expressen import ExpressenIE
|
||||||
from .extremetube import ExtremeTubeIE
|
from .extremetube import ExtremeTubeIE
|
||||||
|
@ -408,6 +427,7 @@ from .frontendmasters import (
|
||||||
FrontendMastersLessonIE,
|
FrontendMastersLessonIE,
|
||||||
FrontendMastersCourseIE
|
FrontendMastersCourseIE
|
||||||
)
|
)
|
||||||
|
from .fujitv import FujiTVFODPlus7IE
|
||||||
from .funimation import FunimationIE
|
from .funimation import FunimationIE
|
||||||
from .funk import FunkIE
|
from .funk import FunkIE
|
||||||
from .funkwhale import (
|
from .funkwhale import (
|
||||||
|
@ -419,7 +439,6 @@ from .funkwhale import (
|
||||||
FunkwhaleRadioSHIE,
|
FunkwhaleRadioSHIE,
|
||||||
)
|
)
|
||||||
from .fusion import FusionIE
|
from .fusion import FusionIE
|
||||||
from .fxnetworks import FXNetworksIE
|
|
||||||
from .gaia import GaiaIE
|
from .gaia import GaiaIE
|
||||||
from .gameinformer import GameInformerIE
|
from .gameinformer import GameInformerIE
|
||||||
from .gamespot import GameSpotIE
|
from .gamespot import GameSpotIE
|
||||||
|
@ -427,6 +446,7 @@ from .gamestar import GameStarIE
|
||||||
from .gaskrank import GaskrankIE
|
from .gaskrank import GaskrankIE
|
||||||
from .gazeta import GazetaIE
|
from .gazeta import GazetaIE
|
||||||
from .gdcvault import GDCVaultIE
|
from .gdcvault import GDCVaultIE
|
||||||
|
from .gedidigital import GediDigitalIE
|
||||||
from .generic import GenericIE
|
from .generic import GenericIE
|
||||||
from .gfycat import GfycatIE
|
from .gfycat import GfycatIE
|
||||||
from .giantbomb import GiantBombIE
|
from .giantbomb import GiantBombIE
|
||||||
|
@ -440,7 +460,10 @@ from .go import GoIE
|
||||||
from .godtube import GodTubeIE
|
from .godtube import GodTubeIE
|
||||||
from .golem import GolemIE
|
from .golem import GolemIE
|
||||||
from .googledrive import GoogleDriveIE
|
from .googledrive import GoogleDriveIE
|
||||||
from .googleplus import GooglePlusIE
|
from .googlepodcasts import (
|
||||||
|
GooglePodcastsIE,
|
||||||
|
GooglePodcastsFeedIE,
|
||||||
|
)
|
||||||
from .googlesearch import GoogleSearchIE
|
from .googlesearch import GoogleSearchIE
|
||||||
from .goshgay import GoshgayIE
|
from .goshgay import GoshgayIE
|
||||||
from .gputechconf import GPUTechConfIE
|
from .gputechconf import GPUTechConfIE
|
||||||
|
@ -483,8 +506,12 @@ from .hungama import (
|
||||||
from .hypem import HypemIE
|
from .hypem import HypemIE
|
||||||
from .ign import (
|
from .ign import (
|
||||||
IGNIE,
|
IGNIE,
|
||||||
OneUPIE,
|
IGNVideoIE,
|
||||||
PCMagIE,
|
IGNArticleIE,
|
||||||
|
)
|
||||||
|
from .iheart import (
|
||||||
|
IHeartRadioIE,
|
||||||
|
IHeartRadioPodcastIE,
|
||||||
)
|
)
|
||||||
from .imdb import (
|
from .imdb import (
|
||||||
ImdbIE,
|
ImdbIE,
|
||||||
|
@ -531,13 +558,15 @@ from .joj import JojIE
|
||||||
from .jwplatform import JWPlatformIE
|
from .jwplatform import JWPlatformIE
|
||||||
from .kakao import KakaoIE
|
from .kakao import KakaoIE
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
from .kanalplay import KanalPlayIE
|
|
||||||
from .kankan import KankanIE
|
from .kankan import KankanIE
|
||||||
from .karaoketv import KaraoketvIE
|
from .karaoketv import KaraoketvIE
|
||||||
from .karrierevideos import KarriereVideosIE
|
from .karrierevideos import KarriereVideosIE
|
||||||
from .keezmovies import KeezMoviesIE
|
from .keezmovies import KeezMoviesIE
|
||||||
from .ketnet import KetnetIE
|
from .ketnet import KetnetIE
|
||||||
from .khanacademy import KhanAcademyIE
|
from .khanacademy import (
|
||||||
|
KhanAcademyIE,
|
||||||
|
KhanAcademyUnitIE,
|
||||||
|
)
|
||||||
from .kickstarter import KickStarterIE
|
from .kickstarter import KickStarterIE
|
||||||
from .kinja import KinjaEmbedIE
|
from .kinja import KinjaEmbedIE
|
||||||
from .kinopoisk import KinoPoiskIE
|
from .kinopoisk import KinoPoiskIE
|
||||||
|
@ -638,6 +667,7 @@ from .mastodon import MastodonSHIE
|
||||||
from .massengeschmacktv import MassengeschmackTVIE
|
from .massengeschmacktv import MassengeschmackTVIE
|
||||||
from .matchtv import MatchTVIE
|
from .matchtv import MatchTVIE
|
||||||
from .mdr import MDRIE
|
from .mdr import MDRIE
|
||||||
|
from .medaltv import MedalTVIE
|
||||||
from .mediaset import MediasetIE
|
from .mediaset import MediasetIE
|
||||||
from .mediasite import (
|
from .mediasite import (
|
||||||
MediasiteIE,
|
MediasiteIE,
|
||||||
|
@ -658,6 +688,11 @@ from .microsoftvirtualacademy import (
|
||||||
MicrosoftVirtualAcademyIE,
|
MicrosoftVirtualAcademyIE,
|
||||||
MicrosoftVirtualAcademyCourseIE,
|
MicrosoftVirtualAcademyCourseIE,
|
||||||
)
|
)
|
||||||
|
from .minds import (
|
||||||
|
MindsIE,
|
||||||
|
MindsChannelIE,
|
||||||
|
MindsGroupIE,
|
||||||
|
)
|
||||||
from .ministrygrid import MinistryGridIE
|
from .ministrygrid import MinistryGridIE
|
||||||
from .minoto import MinotoIE
|
from .minoto import MinotoIE
|
||||||
from .miomio import MioMioIE
|
from .miomio import MioMioIE
|
||||||
|
@ -708,9 +743,15 @@ from .nationalgeographic import (
|
||||||
NationalGeographicTVIE,
|
NationalGeographicTVIE,
|
||||||
)
|
)
|
||||||
from .naver import NaverIE
|
from .naver import NaverIE
|
||||||
from .nba import NBAIE
|
from .nba import (
|
||||||
|
NBAWatchEmbedIE,
|
||||||
|
NBAWatchIE,
|
||||||
|
NBAWatchCollectionIE,
|
||||||
|
NBAEmbedIE,
|
||||||
|
NBAIE,
|
||||||
|
NBAChannelIE,
|
||||||
|
)
|
||||||
from .nbc import (
|
from .nbc import (
|
||||||
CSNNEIE,
|
|
||||||
NBCIE,
|
NBCIE,
|
||||||
NBCNewsIE,
|
NBCNewsIE,
|
||||||
NBCOlympicsIE,
|
NBCOlympicsIE,
|
||||||
|
@ -753,8 +794,14 @@ from .nexx import (
|
||||||
NexxIE,
|
NexxIE,
|
||||||
NexxEmbedIE,
|
NexxEmbedIE,
|
||||||
)
|
)
|
||||||
from .nfl import NFLIE
|
from .nfl import (
|
||||||
from .nhk import NhkVodIE
|
NFLIE,
|
||||||
|
NFLArticleIE,
|
||||||
|
)
|
||||||
|
from .nhk import (
|
||||||
|
NhkVodIE,
|
||||||
|
NhkVodProgramIE,
|
||||||
|
)
|
||||||
from .nhl import NHLIE
|
from .nhl import NHLIE
|
||||||
from .nick import (
|
from .nick import (
|
||||||
NickIE,
|
NickIE,
|
||||||
|
@ -804,6 +851,7 @@ from .nrk import (
|
||||||
NRKSkoleIE,
|
NRKSkoleIE,
|
||||||
NRKTVIE,
|
NRKTVIE,
|
||||||
NRKTVDirekteIE,
|
NRKTVDirekteIE,
|
||||||
|
NRKRadioPodkastIE,
|
||||||
NRKTVEpisodeIE,
|
NRKTVEpisodeIE,
|
||||||
NRKTVEpisodesIE,
|
NRKTVEpisodesIE,
|
||||||
NRKTVSeasonIE,
|
NRKTVSeasonIE,
|
||||||
|
@ -816,6 +864,7 @@ from .ntvru import NTVRuIE
|
||||||
from .nytimes import (
|
from .nytimes import (
|
||||||
NYTimesIE,
|
NYTimesIE,
|
||||||
NYTimesArticleIE,
|
NYTimesArticleIE,
|
||||||
|
NYTimesCookingIE,
|
||||||
)
|
)
|
||||||
from .nuvid import NuvidIE
|
from .nuvid import NuvidIE
|
||||||
from .nzz import NZZIE
|
from .nzz import NZZIE
|
||||||
|
@ -879,6 +928,10 @@ from .picarto import (
|
||||||
)
|
)
|
||||||
from .piksel import PikselIE
|
from .piksel import PikselIE
|
||||||
from .pinkbike import PinkbikeIE
|
from .pinkbike import PinkbikeIE
|
||||||
|
from .pinterest import (
|
||||||
|
PinterestIE,
|
||||||
|
PinterestCollectionIE,
|
||||||
|
)
|
||||||
from .pladform import PladformIE
|
from .pladform import PladformIE
|
||||||
from .platzi import (
|
from .platzi import (
|
||||||
PlatziIE,
|
PlatziIE,
|
||||||
|
@ -1005,6 +1058,7 @@ from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETe
|
||||||
from .rtvnh import RTVNHIE
|
from .rtvnh import RTVNHIE
|
||||||
from .rtvs import RTVSIE
|
from .rtvs import RTVSIE
|
||||||
from .ruhd import RUHDIE
|
from .ruhd import RUHDIE
|
||||||
|
from .rumble import RumbleEmbedIE
|
||||||
from .rutube import (
|
from .rutube import (
|
||||||
RutubeIE,
|
RutubeIE,
|
||||||
RutubeChannelIE,
|
RutubeChannelIE,
|
||||||
|
@ -1021,6 +1075,7 @@ from .safari import (
|
||||||
SafariApiIE,
|
SafariApiIE,
|
||||||
SafariCourseIE,
|
SafariCourseIE,
|
||||||
)
|
)
|
||||||
|
from .samplefocus import SampleFocusIE
|
||||||
from .sapo import SapoIE
|
from .sapo import SapoIE
|
||||||
from .savefrom import SaveFromIE
|
from .savefrom import SaveFromIE
|
||||||
from .sbs import SBSIE
|
from .sbs import SBSIE
|
||||||
|
@ -1053,8 +1108,23 @@ from .shared import (
|
||||||
VivoIE,
|
VivoIE,
|
||||||
)
|
)
|
||||||
from .showroomlive import ShowRoomLiveIE
|
from .showroomlive import ShowRoomLiveIE
|
||||||
|
from .simplecast import (
|
||||||
|
SimplecastIE,
|
||||||
|
SimplecastEpisodeIE,
|
||||||
|
SimplecastPodcastIE,
|
||||||
|
)
|
||||||
from .sina import SinaIE
|
from .sina import SinaIE
|
||||||
from .sixplay import SixPlayIE
|
from .sixplay import SixPlayIE
|
||||||
|
from .skyit import (
|
||||||
|
SkyItPlayerIE,
|
||||||
|
SkyItVideoIE,
|
||||||
|
SkyItVideoLiveIE,
|
||||||
|
SkyItIE,
|
||||||
|
SkyItAcademyIE,
|
||||||
|
SkyItArteIE,
|
||||||
|
CieloTVItIE,
|
||||||
|
TV8ItIE,
|
||||||
|
)
|
||||||
from .skylinewebcams import SkylineWebcamsIE
|
from .skylinewebcams import SkylineWebcamsIE
|
||||||
from .skynewsarabia import (
|
from .skynewsarabia import (
|
||||||
SkyNewsArabiaIE,
|
SkyNewsArabiaIE,
|
||||||
|
@ -1063,16 +1133,11 @@ from .skynewsarabia import (
|
||||||
from .sky import (
|
from .sky import (
|
||||||
SkyNewsIE,
|
SkyNewsIE,
|
||||||
SkySportsIE,
|
SkySportsIE,
|
||||||
|
SkySportsNewsIE,
|
||||||
)
|
)
|
||||||
from .slideshare import SlideshareIE
|
from .slideshare import SlideshareIE
|
||||||
from .slideslive import SlidesLiveIE
|
from .slideslive import SlidesLiveIE
|
||||||
from .slutload import SlutloadIE
|
from .slutload import SlutloadIE
|
||||||
from .smotri import (
|
|
||||||
SmotriIE,
|
|
||||||
SmotriCommunityIE,
|
|
||||||
SmotriUserIE,
|
|
||||||
SmotriBroadcastIE,
|
|
||||||
)
|
|
||||||
from .snotr import SnotrIE
|
from .snotr import SnotrIE
|
||||||
from .sohu import SohuIE
|
from .sohu import SohuIE
|
||||||
from .sonyliv import SonyLIVIE
|
from .sonyliv import SonyLIVIE
|
||||||
|
@ -1101,16 +1166,28 @@ from .spankbang import (
|
||||||
SpankBangPlaylistIE,
|
SpankBangPlaylistIE,
|
||||||
)
|
)
|
||||||
from .spankwire import SpankwireIE
|
from .spankwire import SpankwireIE
|
||||||
from .spiegel import SpiegelIE, SpiegelArticleIE
|
from .spiegel import SpiegelIE
|
||||||
from .spiegeltv import SpiegeltvIE
|
|
||||||
from .spike import (
|
from .spike import (
|
||||||
BellatorIE,
|
BellatorIE,
|
||||||
ParamountNetworkIE,
|
ParamountNetworkIE,
|
||||||
)
|
)
|
||||||
from .stitcher import StitcherIE
|
from .stitcher import (
|
||||||
|
StitcherIE,
|
||||||
|
StitcherShowIE,
|
||||||
|
)
|
||||||
from .sport5 import Sport5IE
|
from .sport5 import Sport5IE
|
||||||
from .sportbox import SportBoxIE
|
from .sportbox import SportBoxIE
|
||||||
from .sportdeutschland import SportDeutschlandIE
|
from .sportdeutschland import SportDeutschlandIE
|
||||||
|
from .spotify import (
|
||||||
|
SpotifyIE,
|
||||||
|
SpotifyShowIE,
|
||||||
|
)
|
||||||
|
from .spreaker import (
|
||||||
|
SpreakerIE,
|
||||||
|
SpreakerPageIE,
|
||||||
|
SpreakerShowIE,
|
||||||
|
SpreakerShowPageIE,
|
||||||
|
)
|
||||||
from .springboardplatform import SpringboardPlatformIE
|
from .springboardplatform import SpringboardPlatformIE
|
||||||
from .sprout import SproutIE
|
from .sprout import SproutIE
|
||||||
from .srgssr import (
|
from .srgssr import (
|
||||||
|
@ -1120,6 +1197,11 @@ from .srgssr import (
|
||||||
from .srmediathek import SRMediathekIE
|
from .srmediathek import SRMediathekIE
|
||||||
from .stanfordoc import StanfordOpenClassroomIE
|
from .stanfordoc import StanfordOpenClassroomIE
|
||||||
from .steam import SteamIE
|
from .steam import SteamIE
|
||||||
|
from .storyfire import (
|
||||||
|
StoryFireIE,
|
||||||
|
StoryFireUserIE,
|
||||||
|
StoryFireSeriesIE,
|
||||||
|
)
|
||||||
from .streamable import StreamableIE
|
from .streamable import StreamableIE
|
||||||
from .streamcloud import StreamcloudIE
|
from .streamcloud import StreamcloudIE
|
||||||
from .streamcz import StreamCZIE
|
from .streamcz import StreamCZIE
|
||||||
|
@ -1145,7 +1227,6 @@ from .tagesschau import (
|
||||||
TagesschauIE,
|
TagesschauIE,
|
||||||
)
|
)
|
||||||
from .tass import TassIE
|
from .tass import TassIE
|
||||||
from .tastytrade import TastyTradeIE
|
|
||||||
from .tbs import TBSIE
|
from .tbs import TBSIE
|
||||||
from .tdslifeway import TDSLifewayIE
|
from .tdslifeway import TDSLifewayIE
|
||||||
from .teachable import (
|
from .teachable import (
|
||||||
|
@ -1172,6 +1253,7 @@ from .telequebec import (
|
||||||
TeleQuebecSquatIE,
|
TeleQuebecSquatIE,
|
||||||
TeleQuebecEmissionIE,
|
TeleQuebecEmissionIE,
|
||||||
TeleQuebecLiveIE,
|
TeleQuebecLiveIE,
|
||||||
|
TeleQuebecVideoIE,
|
||||||
)
|
)
|
||||||
from .teletask import TeleTaskIE
|
from .teletask import TeleTaskIE
|
||||||
from .telewebion import TelewebionIE
|
from .telewebion import TelewebionIE
|
||||||
|
@ -1208,7 +1290,10 @@ from .tnaflix import (
|
||||||
EMPFlixIE,
|
EMPFlixIE,
|
||||||
MovieFapIE,
|
MovieFapIE,
|
||||||
)
|
)
|
||||||
from .toggle import ToggleIE
|
from .toggle import (
|
||||||
|
ToggleIE,
|
||||||
|
MeWatchIE,
|
||||||
|
)
|
||||||
from .tonline import TOnlineIE
|
from .tonline import TOnlineIE
|
||||||
from .toongoggles import ToonGogglesIE
|
from .toongoggles import ToonGogglesIE
|
||||||
from .toutv import TouTvIE
|
from .toutv import TouTvIE
|
||||||
|
@ -1219,6 +1304,10 @@ from .transistorfm import (
|
||||||
TransistorFMShareIE,
|
TransistorFMShareIE,
|
||||||
)
|
)
|
||||||
from .trilulilu import TriluliluIE
|
from .trilulilu import TriluliluIE
|
||||||
|
from .trovo import (
|
||||||
|
TrovoIE,
|
||||||
|
TrovoVodIE,
|
||||||
|
)
|
||||||
from .trunews import TruNewsIE
|
from .trunews import TruNewsIE
|
||||||
from .trutv import TruTVIE
|
from .trutv import TruTVIE
|
||||||
from .tubafm import (
|
from .tubafm import (
|
||||||
|
@ -1241,6 +1330,7 @@ from .tv2 import (
|
||||||
TV2IE,
|
TV2IE,
|
||||||
TV2ArticleIE,
|
TV2ArticleIE,
|
||||||
KatsomoIE,
|
KatsomoIE,
|
||||||
|
MTVUutisetArticleIE,
|
||||||
)
|
)
|
||||||
from .tv2dk import (
|
from .tv2dk import (
|
||||||
TV2DKIE,
|
TV2DKIE,
|
||||||
|
@ -1249,7 +1339,14 @@ from .tv2dk import (
|
||||||
from .tv2hu import TV2HuIE
|
from .tv2hu import TV2HuIE
|
||||||
from .tv4 import TV4IE
|
from .tv4 import TV4IE
|
||||||
from .tv5mondeplus import TV5MondePlusIE
|
from .tv5mondeplus import TV5MondePlusIE
|
||||||
from .tva import TVAIE
|
from .tv5unis import (
|
||||||
|
TV5UnisVideoIE,
|
||||||
|
TV5UnisIE,
|
||||||
|
)
|
||||||
|
from .tva import (
|
||||||
|
TVAIE,
|
||||||
|
QubIE,
|
||||||
|
)
|
||||||
from .tvanouvelles import (
|
from .tvanouvelles import (
|
||||||
TVANouvellesIE,
|
TVANouvellesIE,
|
||||||
TVANouvellesArticleIE,
|
TVANouvellesArticleIE,
|
||||||
|
@ -1258,6 +1355,7 @@ from .tvc import (
|
||||||
TVCIE,
|
TVCIE,
|
||||||
TVCArticleIE,
|
TVCArticleIE,
|
||||||
)
|
)
|
||||||
|
from .tver import TVerIE
|
||||||
from .tvigle import TvigleIE
|
from .tvigle import TvigleIE
|
||||||
from .tvland import TVLandIE
|
from .tvland import TVLandIE
|
||||||
from .tvn24 import TVN24IE
|
from .tvn24 import TVN24IE
|
||||||
|
@ -1376,7 +1474,6 @@ from .vidme import (
|
||||||
VidmeUserIE,
|
VidmeUserIE,
|
||||||
VidmeUserLikesIE,
|
VidmeUserLikesIE,
|
||||||
)
|
)
|
||||||
from .vidzi import VidziIE
|
|
||||||
from .vier import VierIE, VierVideosIE
|
from .vier import VierIE, VierVideosIE
|
||||||
from .viewlift import (
|
from .viewlift import (
|
||||||
ViewLiftIE,
|
ViewLiftIE,
|
||||||
|
@ -1436,10 +1533,14 @@ from .vrv import (
|
||||||
VRVSeriesIE,
|
VRVSeriesIE,
|
||||||
)
|
)
|
||||||
from .vshare import VShareIE
|
from .vshare import VShareIE
|
||||||
|
from .vtm import VTMIE
|
||||||
from .medialaan import MedialaanIE
|
from .medialaan import MedialaanIE
|
||||||
from .vube import VubeIE
|
from .vube import VubeIE
|
||||||
from .vuclip import VuClipIE
|
from .vuclip import VuClipIE
|
||||||
from .vvvvid import VVVVIDIE
|
from .vvvvid import (
|
||||||
|
VVVVIDIE,
|
||||||
|
VVVVIDShowIE,
|
||||||
|
)
|
||||||
from .vyborymos import VyboryMosIE
|
from .vyborymos import VyboryMosIE
|
||||||
from .vzaar import VzaarIE
|
from .vzaar import VzaarIE
|
||||||
from .wakanim import WakanimIE
|
from .wakanim import WakanimIE
|
||||||
|
@ -1551,7 +1652,6 @@ from .youtube import (
|
||||||
YoutubeTruncatedURLIE,
|
YoutubeTruncatedURLIE,
|
||||||
)
|
)
|
||||||
from .zapiks import ZapiksIE
|
from .zapiks import ZapiksIE
|
||||||
from .zaq1 import Zaq1IE
|
|
||||||
from .zattoo import (
|
from .zattoo import (
|
||||||
BBVTVIE,
|
BBVTVIE,
|
||||||
EinsUndEinsTVIE,
|
EinsUndEinsTVIE,
|
||||||
|
@ -1572,5 +1672,6 @@ from .zattoo import (
|
||||||
ZattooLiveIE,
|
ZattooLiveIE,
|
||||||
)
|
)
|
||||||
from .zdf import ZDFIE, ZDFChannelIE
|
from .zdf import ZDFIE, ZDFChannelIE
|
||||||
|
from .zhihu import ZhihuIE
|
||||||
from .zingmp3 import ZingMp3IE
|
from .zingmp3 import ZingMp3IE
|
||||||
from .zype import ZypeIE
|
from .zype import ZypeIE
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
|
@ -8,6 +9,7 @@ from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_http_client,
|
compat_http_client,
|
||||||
|
compat_str,
|
||||||
compat_urllib_error,
|
compat_urllib_error,
|
||||||
compat_urllib_parse_unquote,
|
compat_urllib_parse_unquote,
|
||||||
compat_urllib_parse_unquote_plus,
|
compat_urllib_parse_unquote_plus,
|
||||||
|
@ -16,14 +18,17 @@ from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
get_element_by_id,
|
get_element_by_id,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
limit_length,
|
limit_length,
|
||||||
parse_count,
|
parse_count,
|
||||||
|
qualities,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
try_get,
|
try_get,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -39,11 +44,13 @@ class FacebookIE(InfoExtractor):
|
||||||
photo\.php|
|
photo\.php|
|
||||||
video\.php|
|
video\.php|
|
||||||
video/embed|
|
video/embed|
|
||||||
story\.php
|
story\.php|
|
||||||
|
watch(?:/live)?/?
|
||||||
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
)\?(?:.*?)(?:v|video_id|story_fbid)=|
|
||||||
[^/]+/videos/(?:[^/]+/)?|
|
[^/]+/videos/(?:[^/]+/)?|
|
||||||
[^/]+/posts/|
|
[^/]+/posts/|
|
||||||
groups/[^/]+/permalink/
|
groups/[^/]+/permalink/|
|
||||||
|
watchparty/
|
||||||
)|
|
)|
|
||||||
facebook:
|
facebook:
|
||||||
)
|
)
|
||||||
|
@ -54,8 +61,6 @@ class FacebookIE(InfoExtractor):
|
||||||
_NETRC_MACHINE = 'facebook'
|
_NETRC_MACHINE = 'facebook'
|
||||||
IE_NAME = 'facebook'
|
IE_NAME = 'facebook'
|
||||||
|
|
||||||
_CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
|
|
||||||
|
|
||||||
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
_VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
|
||||||
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
|
_VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
|
||||||
|
|
||||||
|
@ -72,6 +77,7 @@ class FacebookIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
'skip': 'Requires logging in',
|
'skip': 'Requires logging in',
|
||||||
}, {
|
}, {
|
||||||
|
# data.video
|
||||||
'url': 'https://www.facebook.com/video.php?v=274175099429670',
|
'url': 'https://www.facebook.com/video.php?v=274175099429670',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '274175099429670',
|
'id': '274175099429670',
|
||||||
|
@ -133,6 +139,7 @@ class FacebookIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# have 1080P, but only up to 720p in swf params
|
# have 1080P, but only up to 720p in swf params
|
||||||
|
# data.video.story.attachments[].media
|
||||||
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
|
||||||
'md5': '9571fae53d4165bbbadb17a94651dcdc',
|
'md5': '9571fae53d4165bbbadb17a94651dcdc',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -147,6 +154,7 @@ class FacebookIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
# bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
|
||||||
|
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||||
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
|
'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1417995061575415',
|
'id': '1417995061575415',
|
||||||
|
@ -174,6 +182,7 @@ class FacebookIE(InfoExtractor):
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||||
'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
|
'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1396382447100162',
|
'id': '1396382447100162',
|
||||||
|
@ -193,18 +202,23 @@ class FacebookIE(InfoExtractor):
|
||||||
'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf',
|
'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
|
# data.mediaset.currMedia.edges
|
||||||
'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
|
'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
|
# data.video.story.attachments[].media
|
||||||
'url': 'facebook:544765982287235',
|
'url': 'facebook:544765982287235',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
|
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||||
'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
|
'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
|
# data.video.creation_story.attachments[].media
|
||||||
'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
|
'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
|
# data.video
|
||||||
'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
|
'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
|
@ -212,6 +226,7 @@ class FacebookIE(InfoExtractor):
|
||||||
'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
|
'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
|
# data.video
|
||||||
'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
|
'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '359649331226507',
|
'id': '359649331226507',
|
||||||
|
@ -222,7 +237,64 @@ class FacebookIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media
|
||||||
|
'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '106560053808006',
|
||||||
|
},
|
||||||
|
'playlist_count': 2,
|
||||||
|
}, {
|
||||||
|
# data.video.story.attachments[].media
|
||||||
|
'url': 'https://www.facebook.com/watch/?v=647537299265662',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media
|
||||||
|
'url': 'https://www.facebook.com/PankajShahLondon/posts/10157667649866271',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '10157667649866271',
|
||||||
|
},
|
||||||
|
'playlist_count': 3,
|
||||||
|
}, {
|
||||||
|
# data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media
|
||||||
|
'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '117576630041613',
|
||||||
|
'ext': 'mp4',
|
||||||
|
# TODO: title can be extracted from video page
|
||||||
|
'title': 'Facebook video #117576630041613',
|
||||||
|
'uploader_id': '189393014416438',
|
||||||
|
'upload_date': '20201123',
|
||||||
|
'timestamp': 1606162592,
|
||||||
|
},
|
||||||
|
'skip': 'Requires logging in',
|
||||||
|
}, {
|
||||||
|
# node.comet_sections.content.story.attached_story.attachments.style_type_renderer.attachment.media
|
||||||
|
'url': 'https://www.facebook.com/groups/ateistiskselskab/permalink/10154930137678856/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '211567722618337',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Facebook video #211567722618337',
|
||||||
|
'uploader_id': '127875227654254',
|
||||||
|
'upload_date': '20161122',
|
||||||
|
'timestamp': 1479793574,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# data.video.creation_story.attachments[].media
|
||||||
|
'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.facebook.com/watchparty/211641140192478',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '211641140192478',
|
||||||
|
},
|
||||||
|
'playlist_count': 1,
|
||||||
|
'skip': 'Requires logging in',
|
||||||
}]
|
}]
|
||||||
|
_SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
|
||||||
|
_api_config = {
|
||||||
|
'graphURI': '/api/graphql/'
|
||||||
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_urls(webpage, **kwargs):
|
def _extract_urls(webpage, **kwargs):
|
||||||
|
@ -305,23 +377,24 @@ class FacebookIE(InfoExtractor):
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
def _extract_from_url(self, url, video_id, fatal_if_no_video=True):
|
def _extract_from_url(self, url, video_id):
|
||||||
req = sanitized_Request(url)
|
webpage = self._download_webpage(
|
||||||
req.add_header('User-Agent', self._CHROME_USER_AGENT)
|
url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id)
|
||||||
webpage = self._download_webpage(req, video_id)
|
|
||||||
|
|
||||||
video_data = None
|
video_data = None
|
||||||
|
|
||||||
def extract_video_data(instances):
|
def extract_video_data(instances):
|
||||||
|
video_data = []
|
||||||
for item in instances:
|
for item in instances:
|
||||||
if item[1][0] == 'VideoConfig':
|
if try_get(item, lambda x: x[1][0]) == 'VideoConfig':
|
||||||
video_item = item[2][0]
|
video_item = item[2][0]
|
||||||
if video_item.get('video_id'):
|
if video_item.get('video_id'):
|
||||||
return video_item['videoData']
|
video_data.append(video_item['videoData'])
|
||||||
|
return video_data
|
||||||
|
|
||||||
server_js_data = self._parse_json(self._search_regex(
|
server_js_data = self._parse_json(self._search_regex(
|
||||||
r'handleServerJS\(({.+})(?:\);|,")', webpage,
|
[r'handleServerJS\(({.+})(?:\);|,")', r'\bs\.handle\(({.+?})\);'],
|
||||||
'server js data', default='{}'), video_id, fatal=False)
|
webpage, 'server js data', default='{}'), video_id, fatal=False)
|
||||||
|
|
||||||
if server_js_data:
|
if server_js_data:
|
||||||
video_data = extract_video_data(server_js_data.get('instances', []))
|
video_data = extract_video_data(server_js_data.get('instances', []))
|
||||||
|
@ -331,17 +404,118 @@ class FacebookIE(InfoExtractor):
|
||||||
return extract_video_data(try_get(
|
return extract_video_data(try_get(
|
||||||
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
js_data, lambda x: x['jsmods']['instances'], list) or [])
|
||||||
|
|
||||||
|
def extract_dash_manifest(video, formats):
|
||||||
|
dash_manifest = video.get('dash_manifest')
|
||||||
|
if dash_manifest:
|
||||||
|
formats.extend(self._parse_mpd_formats(
|
||||||
|
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
|
||||||
|
|
||||||
|
def process_formats(formats):
|
||||||
|
# Downloads with browser's User-Agent are rate limited. Working around
|
||||||
|
# with non-browser User-Agent.
|
||||||
|
for f in formats:
|
||||||
|
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
def extract_relay_data(_filter):
|
||||||
|
return self._parse_json(self._search_regex(
|
||||||
|
r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
|
||||||
|
webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
|
||||||
|
|
||||||
|
def extract_relay_prefetched_data(_filter):
|
||||||
|
replay_data = extract_relay_data(_filter)
|
||||||
|
for require in (replay_data.get('require') or []):
|
||||||
|
if require[0] == 'RelayPrefetchedStreamCache':
|
||||||
|
return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
server_js_data = self._parse_json(
|
server_js_data = self._parse_json(self._search_regex([
|
||||||
self._search_regex(
|
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX,
|
||||||
r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_\d+)',
|
r'bigPipe\.onPageletArrive\(({.*?id\s*:\s*"%s".*?})\);' % self._SUPPORTED_PAGLETS_REGEX
|
||||||
webpage, 'js data', default='{}'),
|
], webpage, 'js data', default='{}'), video_id, js_to_json, False)
|
||||||
video_id, transform_source=js_to_json, fatal=False)
|
|
||||||
video_data = extract_from_jsmods_instances(server_js_data)
|
video_data = extract_from_jsmods_instances(server_js_data)
|
||||||
|
|
||||||
if not video_data:
|
if not video_data:
|
||||||
if not fatal_if_no_video:
|
data = extract_relay_prefetched_data(
|
||||||
return webpage, False
|
r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
|
||||||
|
if data:
|
||||||
|
entries = []
|
||||||
|
|
||||||
|
def parse_graphql_video(video):
|
||||||
|
formats = []
|
||||||
|
q = qualities(['sd', 'hd'])
|
||||||
|
for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
|
||||||
|
playable_url = video.get('playable_url' + suffix)
|
||||||
|
if not playable_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'format_id': format_id,
|
||||||
|
'quality': q(format_id),
|
||||||
|
'url': playable_url,
|
||||||
|
})
|
||||||
|
extract_dash_manifest(video, formats)
|
||||||
|
process_formats(formats)
|
||||||
|
v_id = video.get('videoId') or video.get('id') or video_id
|
||||||
|
info = {
|
||||||
|
'id': v_id,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
|
||||||
|
'uploader_id': try_get(video, lambda x: x['owner']['id']),
|
||||||
|
'timestamp': int_or_none(video.get('publish_time')),
|
||||||
|
'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
|
||||||
|
}
|
||||||
|
description = try_get(video, lambda x: x['savable_description']['text'])
|
||||||
|
title = video.get('name')
|
||||||
|
if title:
|
||||||
|
info.update({
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
info['title'] = description or 'Facebook video #%s' % v_id
|
||||||
|
entries.append(info)
|
||||||
|
|
||||||
|
def parse_attachment(attachment, key='media'):
|
||||||
|
media = attachment.get(key) or {}
|
||||||
|
if media.get('__typename') == 'Video':
|
||||||
|
return parse_graphql_video(media)
|
||||||
|
|
||||||
|
nodes = data.get('nodes') or []
|
||||||
|
node = data.get('node') or {}
|
||||||
|
if not nodes and node:
|
||||||
|
nodes.append(node)
|
||||||
|
for node in nodes:
|
||||||
|
story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
|
||||||
|
attachments = try_get(story, [
|
||||||
|
lambda x: x['attached_story']['attachments'],
|
||||||
|
lambda x: x['attachments']
|
||||||
|
], list) or []
|
||||||
|
for attachment in attachments:
|
||||||
|
attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
|
||||||
|
ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
|
||||||
|
for n in ns:
|
||||||
|
parse_attachment(n)
|
||||||
|
parse_attachment(attachment)
|
||||||
|
|
||||||
|
edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
|
||||||
|
for edge in edges:
|
||||||
|
parse_attachment(edge, key='node')
|
||||||
|
|
||||||
|
video = data.get('video') or {}
|
||||||
|
if video:
|
||||||
|
attachments = try_get(video, [
|
||||||
|
lambda x: x['story']['attachments'],
|
||||||
|
lambda x: x['creation_story']['attachments']
|
||||||
|
], list) or []
|
||||||
|
for attachment in attachments:
|
||||||
|
parse_attachment(attachment)
|
||||||
|
if not entries:
|
||||||
|
parse_graphql_video(video)
|
||||||
|
|
||||||
|
return self.playlist_result(entries, video_id)
|
||||||
|
|
||||||
|
if not video_data:
|
||||||
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
|
||||||
if m_msg is not None:
|
if m_msg is not None:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
|
@ -350,6 +524,43 @@ class FacebookIE(InfoExtractor):
|
||||||
elif '>You must log in to continue' in webpage:
|
elif '>You must log in to continue' in webpage:
|
||||||
self.raise_login_required()
|
self.raise_login_required()
|
||||||
|
|
||||||
|
if not video_data and '/watchparty/' in url:
|
||||||
|
post_data = {
|
||||||
|
'doc_id': 3731964053542869,
|
||||||
|
'variables': json.dumps({
|
||||||
|
'livingRoomID': video_id,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
prefetched_data = extract_relay_prefetched_data(r'"login_data"\s*:\s*{')
|
||||||
|
if prefetched_data:
|
||||||
|
lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict)
|
||||||
|
if lsd:
|
||||||
|
post_data[lsd['name']] = lsd['value']
|
||||||
|
|
||||||
|
relay_data = extract_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,')
|
||||||
|
for define in (relay_data.get('define') or []):
|
||||||
|
if define[0] == 'RelayAPIConfigDefaults':
|
||||||
|
self._api_config = define[2]
|
||||||
|
|
||||||
|
living_room = self._download_json(
|
||||||
|
urljoin(url, self._api_config['graphURI']), video_id,
|
||||||
|
data=urlencode_postdata(post_data))['data']['living_room']
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for edge in (try_get(living_room, lambda x: x['recap']['watched_content']['edges']) or []):
|
||||||
|
video = try_get(edge, lambda x: x['node']['video']) or {}
|
||||||
|
v_id = video.get('id')
|
||||||
|
if not v_id:
|
||||||
|
continue
|
||||||
|
v_id = compat_str(v_id)
|
||||||
|
entries.append(self.url_result(
|
||||||
|
self._VIDEO_PAGE_TEMPLATE % v_id,
|
||||||
|
self.ie_key(), v_id, video.get('name')))
|
||||||
|
|
||||||
|
return self.playlist_result(entries, video_id)
|
||||||
|
|
||||||
|
if not video_data:
|
||||||
# Video info not in first request, do a secondary request using
|
# Video info not in first request, do a secondary request using
|
||||||
# tahoe player specific URL
|
# tahoe player specific URL
|
||||||
tahoe_data = self._download_webpage(
|
tahoe_data = self._download_webpage(
|
||||||
|
@ -379,8 +590,19 @@ class FacebookIE(InfoExtractor):
|
||||||
if not video_data:
|
if not video_data:
|
||||||
raise ExtractorError('Cannot parse data')
|
raise ExtractorError('Cannot parse data')
|
||||||
|
|
||||||
subtitles = {}
|
if len(video_data) > 1:
|
||||||
|
entries = []
|
||||||
|
for v in video_data:
|
||||||
|
video_url = v[0].get('video_url')
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
entries.append(self.url_result(urljoin(
|
||||||
|
url, video_url), self.ie_key(), v[0].get('video_id')))
|
||||||
|
return self.playlist_result(entries, video_id)
|
||||||
|
video_data = video_data[0]
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
|
subtitles = {}
|
||||||
for f in video_data:
|
for f in video_data:
|
||||||
format_id = f['stream_type']
|
format_id = f['stream_type']
|
||||||
if f and isinstance(f, dict):
|
if f and isinstance(f, dict):
|
||||||
|
@ -399,22 +621,14 @@ class FacebookIE(InfoExtractor):
|
||||||
'url': src,
|
'url': src,
|
||||||
'preference': preference,
|
'preference': preference,
|
||||||
})
|
})
|
||||||
dash_manifest = f[0].get('dash_manifest')
|
extract_dash_manifest(f[0], formats)
|
||||||
if dash_manifest:
|
|
||||||
formats.extend(self._parse_mpd_formats(
|
|
||||||
compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
|
|
||||||
subtitles_src = f[0].get('subtitles_src')
|
subtitles_src = f[0].get('subtitles_src')
|
||||||
if subtitles_src:
|
if subtitles_src:
|
||||||
subtitles.setdefault('en', []).append({'url': subtitles_src})
|
subtitles.setdefault('en', []).append({'url': subtitles_src})
|
||||||
if not formats:
|
if not formats:
|
||||||
raise ExtractorError('Cannot find video formats')
|
raise ExtractorError('Cannot find video formats')
|
||||||
|
|
||||||
# Downloads with browser's User-Agent are rate limited. Working around
|
process_formats(formats)
|
||||||
# with non-browser User-Agent.
|
|
||||||
for f in formats:
|
|
||||||
f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
video_title = self._html_search_regex(
|
video_title = self._html_search_regex(
|
||||||
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage,
|
r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage,
|
||||||
|
@ -454,35 +668,13 @@ class FacebookIE(InfoExtractor):
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
return webpage, info_dict
|
return info_dict
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
|
real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
|
||||||
webpage, info_dict = self._extract_from_url(real_url, video_id, fatal_if_no_video=False)
|
return self._extract_from_url(real_url, video_id)
|
||||||
|
|
||||||
if info_dict:
|
|
||||||
return info_dict
|
|
||||||
|
|
||||||
if '/posts/' in url:
|
|
||||||
video_id_json = self._search_regex(
|
|
||||||
r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])', webpage, 'video ids', group='ids',
|
|
||||||
default='')
|
|
||||||
if video_id_json:
|
|
||||||
entries = [
|
|
||||||
self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
|
|
||||||
for vid in self._parse_json(video_id_json, video_id)]
|
|
||||||
return self.playlist_result(entries, video_id)
|
|
||||||
|
|
||||||
# Single Video?
|
|
||||||
video_id = self._search_regex(r'video_id:\s*"([0-9]+)"', webpage, 'single video id')
|
|
||||||
return self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
|
|
||||||
else:
|
|
||||||
_, info_dict = self._extract_from_url(
|
|
||||||
self._VIDEO_PAGE_TEMPLATE % video_id,
|
|
||||||
video_id, fatal_if_no_video=True)
|
|
||||||
return info_dict
|
|
||||||
|
|
||||||
|
|
||||||
class FacebookPluginsVideoIE(InfoExtractor):
|
class FacebookPluginsVideoIE(InfoExtractor):
|
||||||
|
|
|
@ -11,7 +11,7 @@ from ..utils import (
|
||||||
|
|
||||||
class FranceCultureIE(InfoExtractor):
|
class FranceCultureIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
|
'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'rendez-vous-au-pays-des-geeks',
|
'id': 'rendez-vous-au-pays-des-geeks',
|
||||||
|
@ -20,10 +20,14 @@ class FranceCultureIE(InfoExtractor):
|
||||||
'title': 'Rendez-vous au pays des geeks',
|
'title': 'Rendez-vous au pays des geeks',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
'upload_date': '20140301',
|
'upload_date': '20140301',
|
||||||
'timestamp': 1393642916,
|
'timestamp': 1393700400,
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
# no thumbnail
|
||||||
|
'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
@ -36,19 +40,19 @@ class FranceCultureIE(InfoExtractor):
|
||||||
</h1>|
|
</h1>|
|
||||||
<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
|
<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
|
||||||
).*?
|
).*?
|
||||||
(<button[^>]+data-asset-source="[^"]+"[^>]+>)
|
(<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
|
||||||
''',
|
''',
|
||||||
webpage, 'video data'))
|
webpage, 'video data'))
|
||||||
|
|
||||||
video_url = video_data['data-asset-source']
|
video_url = video_data.get('data-url') or video_data['data-asset-source']
|
||||||
title = video_data.get('data-asset-title') or self._og_search_title(webpage)
|
title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
|
||||||
|
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
|
r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
|
||||||
webpage, 'description', default=None)
|
webpage, 'description', default=None)
|
||||||
thumbnail = self._search_regex(
|
thumbnail = self._search_regex(
|
||||||
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
|
r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
|
||||||
webpage, 'thumbnail', fatal=False)
|
webpage, 'thumbnail', default=None)
|
||||||
uploader = self._html_search_regex(
|
uploader = self._html_search_regex(
|
||||||
r'(?s)<span class="author">(.*?)</span>',
|
r'(?s)<span class="author">(.*?)</span>',
|
||||||
webpage, 'uploader', default=None)
|
webpage, 'uploader', default=None)
|
||||||
|
@ -64,6 +68,6 @@ class FranceCultureIE(InfoExtractor):
|
||||||
'ext': ext,
|
'ext': ext,
|
||||||
'vcodec': 'none' if ext == 'mp3' else None,
|
'vcodec': 'none' if ext == 'mp3' else None,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'timestamp': int_or_none(video_data.get('data-asset-created-date')),
|
'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
|
||||||
'duration': int_or_none(video_data.get('data-duration')),
|
'duration': int_or_none(video_data.get('data-duration')),
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@ class FranceInterIE(InfoExtractor):
|
||||||
'ext': 'mp3',
|
'ext': 'mp3',
|
||||||
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
|
'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
|
||||||
'description': 'md5:401969c5d318c061f86bda1fa359292b',
|
'description': 'md5:401969c5d318c061f86bda1fa359292b',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'upload_date': '20160907',
|
'upload_date': '20160907',
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -31,6 +32,7 @@ class FranceInterIE(InfoExtractor):
|
||||||
|
|
||||||
title = self._og_search_title(webpage)
|
title = self._og_search_title(webpage)
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage)
|
||||||
|
thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
|
||||||
|
|
||||||
upload_date_str = self._search_regex(
|
upload_date_str = self._search_regex(
|
||||||
r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
|
||||||
|
@ -48,6 +50,7 @@ class FranceInterIE(InfoExtractor):
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'formats': [{
|
'formats': [{
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
|
|
@ -17,6 +17,7 @@ from ..utils import (
|
||||||
parse_duration,
|
parse_duration,
|
||||||
try_get,
|
try_get,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
from .dailymotion import DailymotionIE
|
from .dailymotion import DailymotionIE
|
||||||
|
|
||||||
|
@ -128,18 +129,38 @@ class FranceTVIE(InfoExtractor):
|
||||||
|
|
||||||
is_live = None
|
is_live = None
|
||||||
|
|
||||||
formats = []
|
videos = []
|
||||||
for video in info['videos']:
|
|
||||||
if video['statut'] != 'ONLINE':
|
for video in (info.get('videos') or []):
|
||||||
|
if video.get('statut') != 'ONLINE':
|
||||||
continue
|
continue
|
||||||
video_url = video['url']
|
if not video.get('url'):
|
||||||
|
continue
|
||||||
|
videos.append(video)
|
||||||
|
|
||||||
|
if not videos:
|
||||||
|
for device_type in ['desktop', 'mobile']:
|
||||||
|
fallback_info = self._download_json(
|
||||||
|
'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
|
||||||
|
video_id, 'Downloading fallback %s video JSON' % device_type, query={
|
||||||
|
'device_type': device_type,
|
||||||
|
'browser': 'chrome',
|
||||||
|
}, fatal=False)
|
||||||
|
|
||||||
|
if fallback_info and fallback_info.get('video'):
|
||||||
|
videos.append(fallback_info['video'])
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for video in videos:
|
||||||
|
video_url = video.get('url')
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
if is_live is None:
|
if is_live is None:
|
||||||
is_live = (try_get(
|
is_live = (try_get(
|
||||||
video, lambda x: x['plages_ouverture'][0]['direct'],
|
video, lambda x: x['plages_ouverture'][0]['direct'], bool) is True
|
||||||
bool) is True) or '/live.francetv.fr/' in video_url
|
or video.get('is_live') is True
|
||||||
format_id = video['format']
|
or '/live.francetv.fr/' in video_url)
|
||||||
|
format_id = video.get('format')
|
||||||
ext = determine_ext(video_url)
|
ext = determine_ext(video_url)
|
||||||
if ext == 'f4m':
|
if ext == 'f4m':
|
||||||
if georestricted:
|
if georestricted:
|
||||||
|
@ -154,6 +175,9 @@ class FranceTVIE(InfoExtractor):
|
||||||
sign(video_url, format_id), video_id, 'mp4',
|
sign(video_url, format_id), video_id, 'mp4',
|
||||||
entry_protocol='m3u8_native', m3u8_id=format_id,
|
entry_protocol='m3u8_native', m3u8_id=format_id,
|
||||||
fatal=False))
|
fatal=False))
|
||||||
|
elif ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False))
|
||||||
elif video_url.startswith('rtmp'):
|
elif video_url.startswith('rtmp'):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
|
@ -166,6 +190,7 @@ class FranceTVIE(InfoExtractor):
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
})
|
})
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
title = info['titre']
|
title = info['titre']
|
||||||
|
@ -185,10 +210,10 @@ class FranceTVIE(InfoExtractor):
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._live_title(title) if is_live else title,
|
'title': self._live_title(title) if is_live else title,
|
||||||
'description': clean_html(info['synopsis']),
|
'description': clean_html(info.get('synopsis')),
|
||||||
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
|
'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')),
|
||||||
'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
|
'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
|
||||||
'timestamp': int_or_none(info['diffusion']['timestamp']),
|
'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
|
||||||
'is_live': is_live,
|
'is_live': is_live,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
|
35
haruhi_dl/extractor/fujitv.py
Normal file
35
haruhi_dl/extractor/fujitv.py
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class FujiTVFODPlus7IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://i\.fod\.fujitv\.co\.jp/plus7/web/[0-9a-z]{4}/(?P<id>[0-9a-z]+)'
|
||||||
|
_BASE_URL = 'http://i.fod.fujitv.co.jp/'
|
||||||
|
_BITRATE_MAP = {
|
||||||
|
300: (320, 180),
|
||||||
|
800: (640, 360),
|
||||||
|
1200: (1280, 720),
|
||||||
|
2000: (1280, 720),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id)
|
||||||
|
for f in formats:
|
||||||
|
wh = self._BITRATE_MAP.get(f.get('tbr'))
|
||||||
|
if wh:
|
||||||
|
f.update({
|
||||||
|
'width': wh[0],
|
||||||
|
'height': wh[1],
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': self._BASE_URL + 'pc/image/wbtn/wbtn_%s.jpg' % video_id,
|
||||||
|
}
|
|
@ -1,77 +0,0 @@
|
||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .adobepass import AdobePassIE
|
|
||||||
from ..utils import (
|
|
||||||
extract_attributes,
|
|
||||||
int_or_none,
|
|
||||||
parse_age_limit,
|
|
||||||
smuggle_url,
|
|
||||||
update_url_query,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class FXNetworksIE(AdobePassIE):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.fxnetworks.com/video/1032565827847',
|
|
||||||
'md5': '8d99b97b4aa7a202f55b6ed47ea7e703',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'dRzwHC_MMqIv',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'First Look: Better Things - Season 2',
|
|
||||||
'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.',
|
|
||||||
'age_limit': 14,
|
|
||||||
'uploader': 'NEWA-FNG-FX',
|
|
||||||
'upload_date': '20170825',
|
|
||||||
'timestamp': 1503686274,
|
|
||||||
'episode_number': 0,
|
|
||||||
'season_number': 2,
|
|
||||||
'series': 'Better Things',
|
|
||||||
},
|
|
||||||
'add_ie': ['ThePlatform'],
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.simpsonsworld.com/video/716094019682',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
if 'The content you are trying to access is not available in your region.' in webpage:
|
|
||||||
self.raise_geo_restricted()
|
|
||||||
video_data = extract_attributes(self._search_regex(
|
|
||||||
r'(<a.+?rel="https?://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
|
|
||||||
player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None)
|
|
||||||
release_url = video_data['rel']
|
|
||||||
title = video_data['data-title']
|
|
||||||
rating = video_data.get('data-rating')
|
|
||||||
query = {
|
|
||||||
'mbr': 'true',
|
|
||||||
}
|
|
||||||
if player_type == 'movies':
|
|
||||||
query.update({
|
|
||||||
'manifest': 'm3u',
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
query.update({
|
|
||||||
'switch': 'http',
|
|
||||||
})
|
|
||||||
if video_data.get('data-req-auth') == '1':
|
|
||||||
resource = self._get_mvpd_resource(
|
|
||||||
video_data['data-channel'], title,
|
|
||||||
video_data.get('data-guid'), rating)
|
|
||||||
query['auth'] = self._extract_mvpd_auth(url, video_id, 'fx', resource)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
|
|
||||||
'series': video_data.get('data-show-title'),
|
|
||||||
'episode_number': int_or_none(video_data.get('data-episode')),
|
|
||||||
'season_number': int_or_none(video_data.get('data-season')),
|
|
||||||
'thumbnail': video_data.get('data-large-thumb'),
|
|
||||||
'age_limit': parse_age_limit(rating),
|
|
||||||
'ie_key': 'ThePlatform',
|
|
||||||
}
|
|
|
@ -1,16 +1,7 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .once import OnceIE
|
from .once import OnceIE
|
||||||
from ..compat import (
|
from ..compat import compat_urllib_parse_unquote
|
||||||
compat_urllib_parse_unquote,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
|
||||||
unescapeHTML,
|
|
||||||
url_basename,
|
|
||||||
dict_get,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class GameSpotIE(OnceIE):
|
class GameSpotIE(OnceIE):
|
||||||
|
@ -24,17 +15,16 @@ class GameSpotIE(OnceIE):
|
||||||
'title': 'Arma 3 - Community Guide: SITREP I',
|
'title': 'Arma 3 - Community Guide: SITREP I',
|
||||||
'description': 'Check out this video where some of the basics of Arma 3 is explained.',
|
'description': 'Check out this video where some of the basics of Arma 3 is explained.',
|
||||||
},
|
},
|
||||||
|
'skip': 'manifest URL give HTTP Error 404: Not Found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/',
|
'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/',
|
||||||
|
'md5': '173ea87ad762cf5d3bf6163dceb255a6',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'gs-2300-6424837',
|
'id': 'gs-2300-6424837',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Now Playing - The Witcher 3: Wild Hunt',
|
'title': 'Now Playing - The Witcher 3: Wild Hunt',
|
||||||
'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.',
|
'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.',
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
'skip_download': True, # m3u8 downloads
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.gamespot.com/videos/embed/6439218/',
|
'url': 'https://www.gamespot.com/videos/embed/6439218/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -49,90 +39,40 @@ class GameSpotIE(OnceIE):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
page_id = self._match_id(url)
|
page_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, page_id)
|
webpage = self._download_webpage(url, page_id)
|
||||||
data_video_json = self._search_regex(
|
data_video = self._parse_json(self._html_search_regex(
|
||||||
r'data-video=["\'](.*?)["\']', webpage, 'data video')
|
r'data-video=(["\'])({.*?})\1', webpage,
|
||||||
data_video = self._parse_json(unescapeHTML(data_video_json), page_id)
|
'video data', group=2), page_id)
|
||||||
|
title = compat_urllib_parse_unquote(data_video['title'])
|
||||||
streams = data_video['videoStreams']
|
streams = data_video['videoStreams']
|
||||||
|
|
||||||
manifest_url = None
|
|
||||||
formats = []
|
formats = []
|
||||||
f4m_url = streams.get('f4m_stream')
|
|
||||||
if f4m_url:
|
m3u8_url = streams.get('adaptive_stream')
|
||||||
manifest_url = f4m_url
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
|
|
||||||
m3u8_url = dict_get(streams, ('m3u8_stream', 'adaptive_stream'))
|
|
||||||
if m3u8_url:
|
if m3u8_url:
|
||||||
manifest_url = m3u8_url
|
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
m3u8_url, page_id, 'mp4', 'm3u8_native',
|
m3u8_url, page_id, 'mp4', 'm3u8_native',
|
||||||
m3u8_id='hls', fatal=False)
|
m3u8_id='hls', fatal=False)
|
||||||
formats.extend(m3u8_formats)
|
for f in m3u8_formats:
|
||||||
progressive_url = dict_get(
|
|
||||||
streams, ('progressive_hd', 'progressive_high', 'progressive_low', 'other_lr'))
|
|
||||||
if progressive_url and manifest_url:
|
|
||||||
qualities_basename = self._search_regex(
|
|
||||||
r'/([^/]+)\.csmil/',
|
|
||||||
manifest_url, 'qualities basename', default=None)
|
|
||||||
if qualities_basename:
|
|
||||||
QUALITIES_RE = r'((,\d+)+,?)'
|
|
||||||
qualities = self._search_regex(
|
|
||||||
QUALITIES_RE, qualities_basename,
|
|
||||||
'qualities', default=None)
|
|
||||||
if qualities:
|
|
||||||
qualities = list(map(lambda q: int(q), qualities.strip(',').split(',')))
|
|
||||||
qualities.sort()
|
|
||||||
http_template = re.sub(QUALITIES_RE, r'%d', qualities_basename)
|
|
||||||
http_url_basename = url_basename(progressive_url)
|
|
||||||
if m3u8_formats:
|
|
||||||
self._sort_formats(m3u8_formats)
|
|
||||||
m3u8_formats = list(filter(
|
|
||||||
lambda f: f.get('vcodec') != 'none', m3u8_formats))
|
|
||||||
if len(qualities) == len(m3u8_formats):
|
|
||||||
for q, m3u8_format in zip(qualities, m3u8_formats):
|
|
||||||
f = m3u8_format.copy()
|
|
||||||
f.update({
|
|
||||||
'url': progressive_url.replace(
|
|
||||||
http_url_basename, http_template % q),
|
|
||||||
'format_id': f['format_id'].replace('hls', 'http'),
|
|
||||||
'protocol': 'http',
|
|
||||||
})
|
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
else:
|
http_f = f.copy()
|
||||||
for q in qualities:
|
del http_f['manifest_url']
|
||||||
formats.append({
|
http_f.update({
|
||||||
'url': progressive_url.replace(
|
'format_id': f['format_id'].replace('hls-', 'http-'),
|
||||||
http_url_basename, http_template % q),
|
'protocol': 'http',
|
||||||
'ext': 'mp4',
|
'url': f['url'].replace('.m3u8', '.mp4'),
|
||||||
'format_id': 'http-%d' % q,
|
|
||||||
'tbr': q,
|
|
||||||
})
|
})
|
||||||
|
formats.append(http_f)
|
||||||
|
|
||||||
onceux_json = self._search_regex(
|
mpd_url = streams.get('adaptive_dash')
|
||||||
r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None)
|
if mpd_url:
|
||||||
if onceux_json:
|
formats.extend(self._extract_mpd_formats(
|
||||||
onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
|
mpd_url, page_id, mpd_id='dash', fatal=False))
|
||||||
if onceux_url:
|
|
||||||
formats.extend(self._extract_once_formats(re.sub(
|
|
||||||
r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url),
|
|
||||||
http_formats_preference=-1))
|
|
||||||
|
|
||||||
if not formats:
|
|
||||||
for quality in ['sd', 'hd']:
|
|
||||||
# It's actually a link to a flv file
|
|
||||||
flv_url = streams.get('f4m_{0}'.format(quality))
|
|
||||||
if flv_url is not None:
|
|
||||||
formats.append({
|
|
||||||
'url': flv_url,
|
|
||||||
'ext': 'flv',
|
|
||||||
'format_id': quality,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': data_video['guid'],
|
'id': data_video.get('guid') or page_id,
|
||||||
'display_id': page_id,
|
'display_id': page_id,
|
||||||
'title': compat_urllib_parse_unquote(data_video['title']),
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'description': self._html_search_meta('description', webpage),
|
'description': self._html_search_meta('description', webpage),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
|
161
haruhi_dl/extractor/gedidigital.py
Normal file
161
haruhi_dl/extractor/gedidigital.py
Normal file
|
@ -0,0 +1,161 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GediDigitalIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)https?://video\.
|
||||||
|
(?:
|
||||||
|
(?:
|
||||||
|
(?:espresso\.)?repubblica
|
||||||
|
|lastampa
|
||||||
|
|ilsecoloxix
|
||||||
|
)|
|
||||||
|
(?:
|
||||||
|
iltirreno
|
||||||
|
|messaggeroveneto
|
||||||
|
|ilpiccolo
|
||||||
|
|gazzettadimantova
|
||||||
|
|mattinopadova
|
||||||
|
|laprovinciapavese
|
||||||
|
|tribunatreviso
|
||||||
|
|nuovavenezia
|
||||||
|
|gazzettadimodena
|
||||||
|
|lanuovaferrara
|
||||||
|
|corrierealpi
|
||||||
|
|lasentinella
|
||||||
|
)\.gelocal
|
||||||
|
)\.it(?:/[^/]+){2,3}?/(?P<id>\d+)(?:[/?&#]|$)'''
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683',
|
||||||
|
'md5': '84658d7fb9e55a6e57ecc77b73137494',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '121559',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Il paradosso delle Regionali: ecco perché la Lega vince ma sembra aver perso',
|
||||||
|
'description': 'md5:de7f4d6eaaaf36c153b599b10f8ce7ca',
|
||||||
|
'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-full-.+?\.jpg$',
|
||||||
|
'duration': 125,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.espresso.repubblica.it/embed/tutti-i-video/01-ted-villa/14772/14870&width=640&height=360',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.repubblica.it/motori/record-della-pista-a-spa-francorchamps-la-pagani-huayra-roadster-bc-stupisce/367415/367963',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.ilsecoloxix.it/sport/cassani-e-i-brividi-azzurri-ai-mondiali-di-imola-qui-mi-sono-innamorato-del-ciclismo-da-ragazzino-incredibile-tornarci-da-ct/66184/66267',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.iltirreno.gelocal.it/sport/dentro-la-notizia-ferrari-cosa-succede-a-maranello/141059/142723',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.messaggeroveneto.gelocal.it/locale/maria-giovanna-elmi-covid-vaccino/138155/139268',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.ilpiccolo.gelocal.it/dossier/big-john/dinosauro-big-john-al-via-le-visite-guidate-a-trieste/135226/135751',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.gazzettadimantova.gelocal.it/locale/dal-ponte-visconteo-di-valeggio-l-and-8217sos-dei-ristoratori-aprire-anche-a-cena/137310/137818',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.mattinopadova.gelocal.it/dossier/coronavirus-in-veneto/covid-a-vo-un-anno-dopo-un-cuore-tricolore-per-non-dimenticare/138402/138964',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.laprovinciapavese.gelocal.it/locale/mede-zona-rossa-via-alle-vaccinazioni-per-gli-over-80/137545/138120',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.tribunatreviso.gelocal.it/dossier/coronavirus-in-veneto/ecco-le-prima-vaccinazioni-di-massa-nella-marca/134485/135024',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.nuovavenezia.gelocal.it/locale/camion-troppo-alto-per-il-ponte-ferroviario-perde-il-carico/135734/136266',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.gazzettadimodena.gelocal.it/locale/modena-scoperta-la-proteina-che-predice-il-livello-di-gravita-del-covid/139109/139796',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.lanuovaferrara.gelocal.it/locale/due-bombole-di-gpl-aperte-e-abbandonate-i-vigili-bruciano-il-gas/134391/134957',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.corrierealpi.gelocal.it/dossier/cortina-2021-i-mondiali-di-sci-alpino/mondiali-di-sci-il-timelapse-sulla-splendida-olympia/133760/134331',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.lasentinella.gelocal.it/locale/vestigne-centra-un-auto-e-si-ribalta/138931/139466',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.espresso.repubblica.it/tutti-i-video/01-ted-villa/14772',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
title = self._html_search_meta(
|
||||||
|
['twitter:title', 'og:title'], webpage, fatal=True)
|
||||||
|
player_data = re.findall(
|
||||||
|
r"PlayerFactory\.setParam\('(?P<type>format|param)',\s*'(?P<name>[^']+)',\s*'(?P<val>[^']+)'\);",
|
||||||
|
webpage)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
duration = thumb = None
|
||||||
|
for t, n, v in player_data:
|
||||||
|
if t == 'format':
|
||||||
|
if n in ('video-hds-vod-ec', 'video-hls-vod-ec', 'video-viralize', 'video-youtube-pfp'):
|
||||||
|
continue
|
||||||
|
elif n.endswith('-vod-ak'):
|
||||||
|
formats.extend(self._extract_akamai_formats(
|
||||||
|
v, video_id, {'http': 'media.gedidigital.it'}))
|
||||||
|
else:
|
||||||
|
ext = determine_ext(v)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
v, video_id, 'mp4', 'm3u8_native', m3u8_id=n, fatal=False))
|
||||||
|
continue
|
||||||
|
f = {
|
||||||
|
'format_id': n,
|
||||||
|
'url': v,
|
||||||
|
}
|
||||||
|
if ext == 'mp3':
|
||||||
|
abr = int_or_none(self._search_regex(
|
||||||
|
r'-mp3-audio-(\d+)', v, 'abr', default=None))
|
||||||
|
f.update({
|
||||||
|
'abr': abr,
|
||||||
|
'tbr': abr,
|
||||||
|
'vcodec': 'none'
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
mobj = re.match(r'^video-rrtv-(\d+)(?:-(\d+))?$', n)
|
||||||
|
if mobj:
|
||||||
|
f.update({
|
||||||
|
'height': int(mobj.group(1)),
|
||||||
|
'vbr': int_or_none(mobj.group(2)),
|
||||||
|
})
|
||||||
|
if not f.get('vbr'):
|
||||||
|
f['vbr'] = int_or_none(self._search_regex(
|
||||||
|
r'-video-rrtv-(\d+)', v, 'abr', default=None))
|
||||||
|
formats.append(f)
|
||||||
|
elif t == 'param':
|
||||||
|
if n in ['image_full', 'image']:
|
||||||
|
thumb = v
|
||||||
|
elif n == 'videoDuration':
|
||||||
|
duration = int_or_none(v)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': self._html_search_meta(
|
||||||
|
['twitter:description', 'og:description', 'description'], webpage),
|
||||||
|
'thumbnail': thumb or self._og_search_thumbnail(webpage),
|
||||||
|
'formats': formats,
|
||||||
|
'duration': duration,
|
||||||
|
}
|
|
@ -20,19 +20,24 @@ from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
|
int_or_none,
|
||||||
is_html,
|
is_html,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
KNOWN_EXTENSIONS,
|
KNOWN_EXTENSIONS,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
mimetype2ext,
|
mimetype2ext,
|
||||||
orderedSet,
|
orderedSet,
|
||||||
|
parse_duration,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_timestamp,
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
|
url_or_none,
|
||||||
|
xpath_attr,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
|
xpath_with_ns,
|
||||||
)
|
)
|
||||||
from .commonprotocols import RtmpIE
|
from .commonprotocols import RtmpIE
|
||||||
from .brightcove import (
|
from .brightcove import (
|
||||||
|
@ -48,7 +53,6 @@ from .ooyala import OoyalaIE
|
||||||
from .rutv import RUTVIE
|
from .rutv import RUTVIE
|
||||||
from .tvc import TVCIE
|
from .tvc import TVCIE
|
||||||
from .sportbox import SportBoxIE
|
from .sportbox import SportBoxIE
|
||||||
from .smotri import SmotriIE
|
|
||||||
from .myvi import MyviIE
|
from .myvi import MyviIE
|
||||||
from .condenast import CondeNastIE
|
from .condenast import CondeNastIE
|
||||||
from .udn import UDNEmbedIE
|
from .udn import UDNEmbedIE
|
||||||
|
@ -63,7 +67,10 @@ from .tube8 import Tube8IE
|
||||||
from .mofosex import MofosexEmbedIE
|
from .mofosex import MofosexEmbedIE
|
||||||
from .spankwire import SpankwireIE
|
from .spankwire import SpankwireIE
|
||||||
from .youporn import YouPornIE
|
from .youporn import YouPornIE
|
||||||
from .vimeo import VimeoIE
|
from .vimeo import (
|
||||||
|
VimeoIE,
|
||||||
|
VHXEmbedIE,
|
||||||
|
)
|
||||||
from .dailymotion import DailymotionIE
|
from .dailymotion import DailymotionIE
|
||||||
from .dailymail import DailyMailIE
|
from .dailymail import DailyMailIE
|
||||||
from .onionstudios import OnionStudiosIE
|
from .onionstudios import OnionStudiosIE
|
||||||
|
@ -91,6 +98,7 @@ from .piksel import PikselIE
|
||||||
from .videa import VideaIE
|
from .videa import VideaIE
|
||||||
from .twentymin import TwentyMinutenIE
|
from .twentymin import TwentyMinutenIE
|
||||||
from .ustream import UstreamIE
|
from .ustream import UstreamIE
|
||||||
|
from .arte import ArteTVEmbedIE
|
||||||
from .videopress import VideoPressIE
|
from .videopress import VideoPressIE
|
||||||
from .rutube import RutubeIE
|
from .rutube import RutubeIE
|
||||||
from .limelight import LimelightBaseIE
|
from .limelight import LimelightBaseIE
|
||||||
|
@ -125,6 +133,9 @@ from .rtlnl import RtlNlIE
|
||||||
from .xnews import XLinkIE
|
from .xnews import XLinkIE
|
||||||
from .libsyn import LibsynIE
|
from .libsyn import LibsynIE
|
||||||
from .pulsembed import PulsEmbedIE
|
from .pulsembed import PulsEmbedIE
|
||||||
|
from .arcpublishing import ArcPublishingIE
|
||||||
|
from .medialaan import MedialaanIE
|
||||||
|
from .simplecast import SimplecastIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
|
@ -203,11 +214,48 @@ class GenericIE(InfoExtractor):
|
||||||
{
|
{
|
||||||
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
'id': 'http://podcastfeeds.nbcnews.com/nbcnews/video/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
||||||
'ext': 'm4v',
|
'title': 'MSNBC Rachel Maddow (video)',
|
||||||
'upload_date': '20150228',
|
'description': 're:.*her unique approach to storytelling.*',
|
||||||
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
},
|
||||||
}
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'ext': 'mov',
|
||||||
|
'id': 'pdv_maddow_netcast_mov-12-04-2020-224335',
|
||||||
|
'title': 're:MSNBC Rachel Maddow',
|
||||||
|
'description': 're:.*her unique approach to storytelling.*',
|
||||||
|
'timestamp': int,
|
||||||
|
'upload_date': compat_str,
|
||||||
|
'duration': float,
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
# RSS feed with item with description and thumbnails
|
||||||
|
{
|
||||||
|
'url': 'https://anchor.fm/s/dd00e14/podcast/rss',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'https://anchor.fm/s/dd00e14/podcast/rss',
|
||||||
|
'title': 're:.*100% Hydrogen.*',
|
||||||
|
'description': 're:.*In this episode.*',
|
||||||
|
},
|
||||||
|
'playlist': [{
|
||||||
|
'info_dict': {
|
||||||
|
'ext': 'm4a',
|
||||||
|
'id': 'c1c879525ce2cb640b344507e682c36d',
|
||||||
|
'title': 're:Hydrogen!',
|
||||||
|
'description': 're:.*In this episode we are going.*',
|
||||||
|
'timestamp': 1567977776,
|
||||||
|
'upload_date': '20190908',
|
||||||
|
'duration': 459,
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'episode_number': 1,
|
||||||
|
'season_number': 1,
|
||||||
|
'age_limit': 0,
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
# RSS feed with enclosures and unsupported link URLs
|
# RSS feed with enclosures and unsupported link URLs
|
||||||
{
|
{
|
||||||
|
@ -2111,23 +2159,23 @@ class GenericIE(InfoExtractor):
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
# {
|
||||||
# Zype embed
|
# # Zype embed
|
||||||
'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
# 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
|
||||||
'info_dict': {
|
# 'info_dict': {
|
||||||
'id': '5b400b834b32992a310622b9',
|
# 'id': '5b400b834b32992a310622b9',
|
||||||
'ext': 'mp4',
|
# 'ext': 'mp4',
|
||||||
'title': 'Smoky Barbecue Favorites',
|
# 'title': 'Smoky Barbecue Favorites',
|
||||||
'thumbnail': r're:^https?://.*\.jpe?g',
|
# 'thumbnail': r're:^https?://.*\.jpe?g',
|
||||||
'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
|
# 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
|
||||||
'upload_date': '20170909',
|
# 'upload_date': '20170909',
|
||||||
'timestamp': 1504915200,
|
# 'timestamp': 1504915200,
|
||||||
},
|
# },
|
||||||
'add_ie': [ZypeIE.ie_key()],
|
# 'add_ie': [ZypeIE.ie_key()],
|
||||||
'params': {
|
# 'params': {
|
||||||
'skip_download': True,
|
# 'skip_download': True,
|
||||||
},
|
# },
|
||||||
},
|
# },
|
||||||
{
|
{
|
||||||
# videojs embed
|
# videojs embed
|
||||||
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
|
'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
|
||||||
|
@ -2205,6 +2253,54 @@ class GenericIE(InfoExtractor):
|
||||||
# 'force_generic_extractor': True,
|
# 'force_generic_extractor': True,
|
||||||
# },
|
# },
|
||||||
# }
|
# }
|
||||||
|
{
|
||||||
|
# VHX Embed
|
||||||
|
'url': 'https://demo.vhx.tv/category-c/videos/file-example-mp4-480-1-5mg-copy',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '858208',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Untitled',
|
||||||
|
'uploader_id': 'user80538407',
|
||||||
|
'uploader': 'OTT Videos',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# ArcPublishing PoWa video player
|
||||||
|
'url': 'https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/',
|
||||||
|
'md5': 'b03b2fac8680e1e5a7cc81a5c27e71b3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Senate candidates wave to voters on Anchorage streets',
|
||||||
|
'description': 'md5:91f51a6511f090617353dc720318b20e',
|
||||||
|
'timestamp': 1604378735,
|
||||||
|
'upload_date': '20201103',
|
||||||
|
'duration': 1581,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# MyChannels SDK embed
|
||||||
|
# https://www.24kitchen.nl/populair/deskundige-dit-waarom-sommigen-gevoelig-zijn-voor-voedselallergieen
|
||||||
|
'url': 'https://www.demorgen.be/nieuws/burgemeester-rotterdam-richt-zich-in-videoboodschap-tot-relschoppers-voelt-het-goed~b0bcfd741/',
|
||||||
|
'md5': '90c0699c37006ef18e198c032d81739c',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '194165',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Burgemeester Aboutaleb spreekt relschoppers toe',
|
||||||
|
'timestamp': 1611740340,
|
||||||
|
'upload_date': '20210127',
|
||||||
|
'duration': 159,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# Simplecast player embed
|
||||||
|
'url': 'https://www.bio.org/podcast',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'podcast',
|
||||||
|
'title': 'I AM BIO Podcast | BIO',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 52,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_following_redirect(self, new_url):
|
def report_following_redirect(self, new_url):
|
||||||
|
@ -2216,6 +2312,10 @@ class GenericIE(InfoExtractor):
|
||||||
playlist_desc_el = doc.find('./channel/description')
|
playlist_desc_el = doc.find('./channel/description')
|
||||||
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
|
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
|
||||||
|
|
||||||
|
NS_MAP = {
|
||||||
|
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
|
||||||
|
}
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for it in doc.findall('./channel/item'):
|
for it in doc.findall('./channel/item'):
|
||||||
next_url = None
|
next_url = None
|
||||||
|
@ -2231,10 +2331,33 @@ class GenericIE(InfoExtractor):
|
||||||
if not next_url:
|
if not next_url:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
def itunes(key):
|
||||||
|
return xpath_text(
|
||||||
|
it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
|
||||||
|
default=None)
|
||||||
|
|
||||||
|
duration = itunes('duration')
|
||||||
|
explicit = (itunes('explicit') or '').lower()
|
||||||
|
if explicit in ('true', 'yes'):
|
||||||
|
age_limit = 18
|
||||||
|
elif explicit in ('false', 'no'):
|
||||||
|
age_limit = 0
|
||||||
|
else:
|
||||||
|
age_limit = None
|
||||||
|
|
||||||
entries.append({
|
entries.append({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'url': next_url,
|
'url': next_url,
|
||||||
'title': it.find('title').text,
|
'title': it.find('title').text,
|
||||||
|
'description': xpath_text(it, 'description', default=None),
|
||||||
|
'timestamp': unified_timestamp(
|
||||||
|
xpath_text(it, 'pubDate', default=None)),
|
||||||
|
'duration': int_or_none(duration) or parse_duration(duration),
|
||||||
|
'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
|
||||||
|
'episode': itunes('title'),
|
||||||
|
'episode_number': int_or_none(itunes('episode')),
|
||||||
|
'season_number': int_or_none(itunes('season')),
|
||||||
|
'age_limit': age_limit,
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -2354,7 +2477,7 @@ class GenericIE(InfoExtractor):
|
||||||
info_dict = {
|
info_dict = {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': self._generic_title(url),
|
'title': self._generic_title(url),
|
||||||
'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
|
'timestamp': unified_timestamp(head_response.headers.get('Last-Modified'))
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check for direct link to a video
|
# Check for direct link to a video
|
||||||
|
@ -2417,6 +2540,9 @@ class GenericIE(InfoExtractor):
|
||||||
webpage = self._webpage_read_content(
|
webpage = self._webpage_read_content(
|
||||||
full_response, url, video_id, prefix=first_bytes)
|
full_response, url, video_id, prefix=first_bytes)
|
||||||
|
|
||||||
|
if '<title>DPG Media Privacy Gate</title>' in webpage:
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
# Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
|
# Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
|
||||||
|
@ -2592,6 +2718,10 @@ class GenericIE(InfoExtractor):
|
||||||
SVTIE,
|
SVTIE,
|
||||||
XLinkIE,
|
XLinkIE,
|
||||||
LibsynIE,
|
LibsynIE,
|
||||||
|
VHXEmbedIE,
|
||||||
|
ArcPublishingIE,
|
||||||
|
MedialaanIE,
|
||||||
|
SimplecastIE,
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
ie_key = embie.ie_key()
|
ie_key = embie.ie_key()
|
||||||
|
@ -2751,11 +2881,9 @@ class GenericIE(InfoExtractor):
|
||||||
return self.url_result(ustream_url, UstreamIE.ie_key())
|
return self.url_result(ustream_url, UstreamIE.ie_key())
|
||||||
|
|
||||||
# Look for embedded arte.tv player
|
# Look for embedded arte.tv player
|
||||||
mobj = re.search(
|
arte_urls = ArteTVEmbedIE._extract_urls(webpage)
|
||||||
r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
|
if arte_urls:
|
||||||
webpage)
|
return self.playlist_from_matches(arte_urls, video_id, video_title)
|
||||||
if mobj is not None:
|
|
||||||
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
|
|
||||||
|
|
||||||
# Look for embedded francetv player
|
# Look for embedded francetv player
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
|
@ -2764,11 +2892,6 @@ class GenericIE(InfoExtractor):
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'))
|
return self.url_result(mobj.group('url'))
|
||||||
|
|
||||||
# Look for embedded smotri.com player
|
|
||||||
smotri_url = SmotriIE._extract_url(webpage)
|
|
||||||
if smotri_url:
|
|
||||||
return self.url_result(smotri_url, 'Smotri')
|
|
||||||
|
|
||||||
# Look for embedded Myvi.ru player
|
# Look for embedded Myvi.ru player
|
||||||
myvi_url = MyviIE._extract_url(webpage)
|
myvi_url = MyviIE._extract_url(webpage)
|
||||||
if myvi_url:
|
if myvi_url:
|
||||||
|
|
|
@ -38,13 +38,17 @@ class GoIE(AdobePassIE):
|
||||||
'disneynow': {
|
'disneynow': {
|
||||||
'brand': '011',
|
'brand': '011',
|
||||||
'resource_id': 'Disney',
|
'resource_id': 'Disney',
|
||||||
}
|
},
|
||||||
|
'fxnow.fxnetworks': {
|
||||||
|
'brand': '025',
|
||||||
|
'requestor_id': 'dtci',
|
||||||
|
},
|
||||||
}
|
}
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:(?P<sub_domain>%s)\.)?go|
|
(?:(?P<sub_domain>%s)\.)?go|
|
||||||
(?P<sub_domain_2>abc|freeform|disneynow)
|
(?P<sub_domain_2>abc|freeform|disneynow|fxnow\.fxnetworks)
|
||||||
)\.com/
|
)\.com/
|
||||||
(?:
|
(?:
|
||||||
(?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
|
(?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
|
||||||
|
@ -99,6 +103,19 @@ class GoIE(AdobePassIE):
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'VDKA12782841',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'First Look: Better Things - Season 2',
|
||||||
|
'description': 'md5:fa73584a95761c605d9d54904e35b407',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'geo_bypass_ip_block': '3.244.239.0/24',
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
|
|
@ -1,73 +0,0 @@
|
||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
import codecs
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import unified_strdate
|
|
||||||
|
|
||||||
|
|
||||||
class GooglePlusIE(InfoExtractor):
|
|
||||||
IE_DESC = 'Google Plus'
|
|
||||||
_VALID_URL = r'https?://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)'
|
|
||||||
IE_NAME = 'plus.google'
|
|
||||||
_TEST = {
|
|
||||||
'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'ZButuJc6CtH',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': '嘆きの天使 降臨',
|
|
||||||
'upload_date': '20120613',
|
|
||||||
'uploader': '井上ヨシマサ',
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
|
|
||||||
# Step 1, Retrieve post webpage to extract further information
|
|
||||||
webpage = self._download_webpage(url, video_id, 'Downloading entry webpage')
|
|
||||||
|
|
||||||
title = self._og_search_description(webpage).splitlines()[0]
|
|
||||||
upload_date = unified_strdate(self._html_search_regex(
|
|
||||||
r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*>
|
|
||||||
([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
|
|
||||||
webpage, 'upload date', fatal=False, flags=re.VERBOSE))
|
|
||||||
uploader = self._html_search_regex(
|
|
||||||
r'rel="author".*?>(.*?)</a>', webpage, 'uploader', fatal=False)
|
|
||||||
|
|
||||||
# Step 2, Simulate clicking the image box to launch video
|
|
||||||
DOMAIN = 'https://plus.google.com/'
|
|
||||||
video_page = self._search_regex(
|
|
||||||
r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
|
|
||||||
webpage, 'video page URL')
|
|
||||||
if not video_page.startswith(DOMAIN):
|
|
||||||
video_page = DOMAIN + video_page
|
|
||||||
|
|
||||||
webpage = self._download_webpage(video_page, video_id, 'Downloading video page')
|
|
||||||
|
|
||||||
def unicode_escape(s):
|
|
||||||
decoder = codecs.getdecoder('unicode_escape')
|
|
||||||
return re.sub(
|
|
||||||
r'\\u[0-9a-fA-F]{4,}',
|
|
||||||
lambda m: decoder(m.group(0))[0],
|
|
||||||
s)
|
|
||||||
|
|
||||||
# Extract video links all sizes
|
|
||||||
formats = [{
|
|
||||||
'url': unicode_escape(video_url),
|
|
||||||
'ext': 'flv',
|
|
||||||
'width': int(width),
|
|
||||||
'height': int(height),
|
|
||||||
} for width, height, video_url in re.findall(
|
|
||||||
r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent\.com.*?)"', webpage)]
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'uploader': uploader,
|
|
||||||
'upload_date': upload_date,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
88
haruhi_dl/extractor/googlepodcasts.py
Normal file
88
haruhi_dl/extractor/googlepodcasts.py
Normal file
|
@ -0,0 +1,88 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_podcast_url,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
urlencode_postdata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class GooglePodcastsBaseIE(InfoExtractor):
|
||||||
|
_VALID_URL_BASE = r'https?://podcasts\.google\.com/feed/'
|
||||||
|
|
||||||
|
def _batch_execute(self, func_id, video_id, params):
|
||||||
|
return json.loads(self._download_json(
|
||||||
|
'https://podcasts.google.com/_/PodcastsUi/data/batchexecute',
|
||||||
|
video_id, data=urlencode_postdata({
|
||||||
|
'f.req': json.dumps([[[func_id, json.dumps(params), None, '1']]]),
|
||||||
|
}), transform_source=lambda x: self._search_regex(r'(?s)(\[.+\])', x, 'data'))[0][2])
|
||||||
|
|
||||||
|
def _extract_episode(self, episode):
|
||||||
|
return {
|
||||||
|
'id': episode[4][3],
|
||||||
|
'title': episode[8],
|
||||||
|
'url': clean_podcast_url(episode[13]),
|
||||||
|
'thumbnail': episode[2],
|
||||||
|
'description': episode[9],
|
||||||
|
'creator': try_get(episode, lambda x: x[14]),
|
||||||
|
'timestamp': int_or_none(episode[11]),
|
||||||
|
'duration': int_or_none(episode[12]),
|
||||||
|
'series': episode[1],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class GooglePodcastsIE(GooglePodcastsBaseIE):
|
||||||
|
IE_NAME = 'google:podcasts'
|
||||||
|
_VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<feed_url>[^/]+)/episode/(?P<id>[^/?&#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA/episode/MzBlNWRlN2UtOWE4Yy00ODcwLTk2M2MtM2JlMmUyNmViOTRh',
|
||||||
|
'md5': 'fa56b2ee8bd0703e27e42d4b104c4766',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '30e5de7e-9a8c-4870-963c-3be2e26eb94a',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'WWDTM New Year 2021',
|
||||||
|
'description': 'We say goodbye to 2020 with Christine Baranksi, Doug Jones, Jonna Mendez, and Kellee Edwards.',
|
||||||
|
'upload_date': '20210102',
|
||||||
|
'timestamp': 1609606800,
|
||||||
|
'duration': 2901,
|
||||||
|
'series': "Wait Wait... Don't Tell Me!",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
b64_feed_url, b64_guid = re.match(self._VALID_URL, url).groups()
|
||||||
|
episode = self._batch_execute(
|
||||||
|
'oNjqVe', b64_guid, [b64_feed_url, b64_guid])[1]
|
||||||
|
return self._extract_episode(episode)
|
||||||
|
|
||||||
|
|
||||||
|
class GooglePodcastsFeedIE(GooglePodcastsBaseIE):
|
||||||
|
IE_NAME = 'google:podcasts:feed'
|
||||||
|
_VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<id>[^/?&#]+)/?(?:[?#&]|$)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA',
|
||||||
|
'info_dict': {
|
||||||
|
'title': "Wait Wait... Don't Tell Me!",
|
||||||
|
'description': "NPR's weekly current events quiz. Have a laugh and test your news knowledge while figuring out what's real and what we've made up.",
|
||||||
|
},
|
||||||
|
'playlist_mincount': 20,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
b64_feed_url = self._match_id(url)
|
||||||
|
data = self._batch_execute('ncqJEe', b64_feed_url, [b64_feed_url])
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for episode in (try_get(data, lambda x: x[1][0]) or []):
|
||||||
|
entries.append(self._extract_episode(episode))
|
||||||
|
|
||||||
|
feed = try_get(data, lambda x: x[3]) or []
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_title=try_get(feed, lambda x: x[0]),
|
||||||
|
playlist_description=try_get(feed, lambda x: x[2]))
|
|
@ -3,6 +3,7 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import hmac
|
import hmac
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
|
@ -25,43 +26,50 @@ from ..utils import (
|
||||||
class HotStarBaseIE(InfoExtractor):
|
class HotStarBaseIE(InfoExtractor):
|
||||||
_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
|
_AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
|
||||||
|
|
||||||
def _call_api_impl(self, path, video_id, query):
|
def _call_api_impl(self, path, video_id, headers, query, data=None):
|
||||||
st = int(time.time())
|
st = int(time.time())
|
||||||
exp = st + 6000
|
exp = st + 6000
|
||||||
auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
|
auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
|
||||||
auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
|
auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
|
||||||
response = self._download_json(
|
h = {'hotstarauth': auth}
|
||||||
'https://api.hotstar.com/' + path, video_id, headers={
|
h.update(headers)
|
||||||
'hotstarauth': auth,
|
return self._download_json(
|
||||||
|
'https://api.hotstar.com/' + path,
|
||||||
|
video_id, headers=h, query=query, data=data)
|
||||||
|
|
||||||
|
def _call_api(self, path, video_id, query_name='contentId'):
|
||||||
|
response = self._call_api_impl(path, video_id, {
|
||||||
'x-country-code': 'IN',
|
'x-country-code': 'IN',
|
||||||
'x-platform-code': 'JIO',
|
'x-platform-code': 'JIO',
|
||||||
}, query=query)
|
}, {
|
||||||
|
query_name: video_id,
|
||||||
|
'tas': 10000,
|
||||||
|
})
|
||||||
if response['statusCode'] != 'OK':
|
if response['statusCode'] != 'OK':
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
response['body']['message'], expected=True)
|
response['body']['message'], expected=True)
|
||||||
return response['body']['results']
|
return response['body']['results']
|
||||||
|
|
||||||
def _call_api(self, path, video_id, query_name='contentId'):
|
def _call_api_v2(self, path, video_id, headers, query=None, data=None):
|
||||||
return self._call_api_impl(path, video_id, {
|
h = {'X-Request-Id': compat_str(uuid.uuid4())}
|
||||||
query_name: video_id,
|
h.update(headers)
|
||||||
'tas': 10000,
|
try:
|
||||||
})
|
|
||||||
|
|
||||||
def _call_api_v2(self, path, video_id):
|
|
||||||
return self._call_api_impl(
|
return self._call_api_impl(
|
||||||
'%s/in/contents/%s' % (path, video_id), video_id, {
|
path, video_id, h, query, data)
|
||||||
'desiredConfig': 'encryption:plain;ladder:phone,tv;package:hls,dash',
|
except ExtractorError as e:
|
||||||
'client': 'mweb',
|
if isinstance(e.cause, compat_HTTPError):
|
||||||
'clientVersion': '6.18.0',
|
if e.cause.code == 402:
|
||||||
'deviceId': compat_str(uuid.uuid4()),
|
self.raise_login_required()
|
||||||
'osName': 'Windows',
|
message = self._parse_json(e.cause.read().decode(), video_id)['message']
|
||||||
'osVersion': '10',
|
if message in ('Content not available in region', 'Country is not supported'):
|
||||||
})
|
raise self.raise_geo_restricted(message)
|
||||||
|
raise ExtractorError(message)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
class HotStarIE(HotStarBaseIE):
|
class HotStarIE(HotStarBaseIE):
|
||||||
IE_NAME = 'hotstar'
|
IE_NAME = 'hotstar'
|
||||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
|
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+[/-])?(?P<id>\d{10})'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# contentData
|
# contentData
|
||||||
'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
|
'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
|
||||||
|
@ -92,8 +100,13 @@ class HotStarIE(HotStarBaseIE):
|
||||||
# only available via api v2
|
# only available via api v2
|
||||||
'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
|
'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.hotstar.com/in/tv/start-music/1260005217/cooks-vs-comalis/1100039717',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_GEO_BYPASS = False
|
_GEO_BYPASS = False
|
||||||
|
_DEVICE_ID = None
|
||||||
|
_USER_TOKEN = None
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -121,7 +134,30 @@ class HotStarIE(HotStarBaseIE):
|
||||||
headers = {'Referer': url}
|
headers = {'Referer': url}
|
||||||
formats = []
|
formats = []
|
||||||
geo_restricted = False
|
geo_restricted = False
|
||||||
playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets']
|
|
||||||
|
if not self._USER_TOKEN:
|
||||||
|
self._DEVICE_ID = compat_str(uuid.uuid4())
|
||||||
|
self._USER_TOKEN = self._call_api_v2('um/v3/users', video_id, {
|
||||||
|
'X-HS-Platform': 'PCTV',
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
}, data=json.dumps({
|
||||||
|
'device_ids': [{
|
||||||
|
'id': self._DEVICE_ID,
|
||||||
|
'type': 'device_id',
|
||||||
|
}],
|
||||||
|
}).encode())['user_identity']
|
||||||
|
|
||||||
|
playback_sets = self._call_api_v2(
|
||||||
|
'play/v2/playback/content/' + video_id, video_id, {
|
||||||
|
'X-HS-Platform': 'web',
|
||||||
|
'X-HS-AppVersion': '6.99.1',
|
||||||
|
'X-HS-UserToken': self._USER_TOKEN,
|
||||||
|
}, query={
|
||||||
|
'device-id': self._DEVICE_ID,
|
||||||
|
'desired-config': 'encryption:plain',
|
||||||
|
'os-name': 'Windows',
|
||||||
|
'os-version': '10',
|
||||||
|
})['data']['playBackSets']
|
||||||
for playback_set in playback_sets:
|
for playback_set in playback_sets:
|
||||||
if not isinstance(playback_set, dict):
|
if not isinstance(playback_set, dict):
|
||||||
continue
|
continue
|
||||||
|
@ -163,19 +199,22 @@ class HotStarIE(HotStarBaseIE):
|
||||||
for f in formats:
|
for f in formats:
|
||||||
f.setdefault('http_headers', {}).update(headers)
|
f.setdefault('http_headers', {}).update(headers)
|
||||||
|
|
||||||
|
image = try_get(video_data, lambda x: x['image']['h'], compat_str)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
'thumbnail': 'https://img1.hotstarext.com/image/upload/' + image if image else None,
|
||||||
'description': video_data.get('description'),
|
'description': video_data.get('description'),
|
||||||
'duration': int_or_none(video_data.get('duration')),
|
'duration': int_or_none(video_data.get('duration')),
|
||||||
'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')),
|
'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'channel': video_data.get('channelName'),
|
'channel': video_data.get('channelName'),
|
||||||
'channel_id': video_data.get('channelId'),
|
'channel_id': str_or_none(video_data.get('channelId')),
|
||||||
'series': video_data.get('showName'),
|
'series': video_data.get('showName'),
|
||||||
'season': video_data.get('seasonName'),
|
'season': video_data.get('seasonName'),
|
||||||
'season_number': int_or_none(video_data.get('seasonNo')),
|
'season_number': int_or_none(video_data.get('seasonNo')),
|
||||||
'season_id': video_data.get('seasonId'),
|
'season_id': str_or_none(video_data.get('seasonId')),
|
||||||
'episode': title,
|
'episode': title,
|
||||||
'episode_number': int_or_none(video_data.get('episodeNo')),
|
'episode_number': int_or_none(video_data.get('episodeNo')),
|
||||||
}
|
}
|
||||||
|
@ -183,7 +222,7 @@ class HotStarIE(HotStarBaseIE):
|
||||||
|
|
||||||
class HotStarPlaylistIE(HotStarBaseIE):
|
class HotStarPlaylistIE(HotStarBaseIE):
|
||||||
IE_NAME = 'hotstar:playlist'
|
IE_NAME = 'hotstar:playlist'
|
||||||
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
|
_VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:[a-z]{2}/)?tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
|
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -193,6 +232,9 @@ class HotStarPlaylistIE(HotStarBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
|
'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.hotstar.com/us/tv/masterchef-india/s-830/list/episodes/t-1_2_830',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -3,28 +3,39 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
HEADRequest,
|
||||||
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
strip_or_none,
|
||||||
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class IGNIE(InfoExtractor):
|
class IGNBaseIE(InfoExtractor):
|
||||||
|
def _call_api(self, slug):
|
||||||
|
return self._download_json(
|
||||||
|
'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
|
||||||
|
|
||||||
|
|
||||||
|
class IGNIE(IGNBaseIE):
|
||||||
"""
|
"""
|
||||||
Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
|
Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
|
||||||
Some videos of it.ign.com are also supported
|
Some videos of it.ign.com are also supported
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_VALID_URL = r'https?://.+?\.ign\.com/(?:[^/]+/)?(?P<type>videos|show_videos|articles|feature|(?:[^/]+/\d+/video))(/.+)?/(?P<name_or_id>.+)'
|
_VALID_URL = r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[^/?&#]+)'
|
||||||
IE_NAME = 'ign.com'
|
IE_NAME = 'ign.com'
|
||||||
|
_PAGE_TYPE = 'video'
|
||||||
|
|
||||||
_API_URL_TEMPLATE = 'http://apis.ign.com/video/v3/videos/%s'
|
_TESTS = [{
|
||||||
_EMBED_RE = r'<iframe[^>]+?["\']((?:https?:)?//.+?\.ign\.com.+?/embed.+?)["\']'
|
|
||||||
|
|
||||||
_TESTS = [
|
|
||||||
{
|
|
||||||
'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
|
'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
|
||||||
'md5': 'febda82c4bafecd2d44b6e1a18a595f8',
|
'md5': 'd2e1586d9987d40fad7867bf96a018ea',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '8f862beef863986b2785559b9e1aa599',
|
'id': '8f862beef863986b2785559b9e1aa599',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -32,13 +43,147 @@ class IGNIE(InfoExtractor):
|
||||||
'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
|
'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
|
||||||
'timestamp': 1370440800,
|
'timestamp': 1370440800,
|
||||||
'upload_date': '20130605',
|
'upload_date': '20130605',
|
||||||
'uploader_id': 'cberidon@ign.com',
|
'tags': 'count:9',
|
||||||
}
|
}
|
||||||
},
|
}, {
|
||||||
{
|
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
|
||||||
|
'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ee10d774b508c9b8ec07e763b9125b91',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'What\'s New Now: Is GoGo Snooping on Your Data?',
|
||||||
|
'description': 'md5:817a20299de610bd56f13175386da6fa',
|
||||||
|
'timestamp': 1420571160,
|
||||||
|
'upload_date': '20150106',
|
||||||
|
'tags': 'count:4',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
video = self._call_api(display_id)
|
||||||
|
video_id = video['videoId']
|
||||||
|
metadata = video['metadata']
|
||||||
|
title = metadata.get('longTitle') or metadata.get('title') or metadata['name']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
refs = video.get('refs') or {}
|
||||||
|
|
||||||
|
m3u8_url = refs.get('m3uUrl')
|
||||||
|
if m3u8_url:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
|
f4m_url = refs.get('f4mUrl')
|
||||||
|
if f4m_url:
|
||||||
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
f4m_url, video_id, f4m_id='hds', fatal=False))
|
||||||
|
|
||||||
|
for asset in (video.get('assets') or []):
|
||||||
|
asset_url = asset.get('url')
|
||||||
|
if not asset_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': asset_url,
|
||||||
|
'tbr': int_or_none(asset.get('bitrate'), 1000),
|
||||||
|
'fps': int_or_none(asset.get('frame_rate')),
|
||||||
|
'height': int_or_none(asset.get('height')),
|
||||||
|
'width': int_or_none(asset.get('width')),
|
||||||
|
})
|
||||||
|
|
||||||
|
mezzanine_url = try_get(video, lambda x: x['system']['mezzanineUrl'])
|
||||||
|
if mezzanine_url:
|
||||||
|
formats.append({
|
||||||
|
'ext': determine_ext(mezzanine_url, 'mp4'),
|
||||||
|
'format_id': 'mezzanine',
|
||||||
|
'preference': 1,
|
||||||
|
'url': mezzanine_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for thumbnail in (video.get('thumbnails') or []):
|
||||||
|
thumbnail_url = thumbnail.get('url')
|
||||||
|
if not thumbnail_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'url': thumbnail_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
tags = []
|
||||||
|
for tag in (video.get('tags') or []):
|
||||||
|
display_name = tag.get('displayName')
|
||||||
|
if not display_name:
|
||||||
|
continue
|
||||||
|
tags.append(display_name)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': strip_or_none(metadata.get('description')),
|
||||||
|
'timestamp': parse_iso8601(metadata.get('publishDate')),
|
||||||
|
'duration': int_or_none(metadata.get('duration')),
|
||||||
|
'display_id': display_id,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'formats': formats,
|
||||||
|
'tags': tags,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class IGNVideoIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
|
||||||
|
'md5': 'dd9aca7ed2657c4e118d8b261e5e9de1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'e9be7ea899a9bbfc0674accc22a36cc8',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'How Hitman Aims to Be Different Than Every Other Stealth Game - NYCC 2015',
|
||||||
|
'description': 'Taking out assassination targets in Hitman has never been more stylish.',
|
||||||
|
'timestamp': 1444665600,
|
||||||
|
'upload_date': '20151012',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Youtube embed
|
||||||
|
'url': 'https://me.ign.com/ar/ratchet-clank-rift-apart/144327/trailer/embed',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Twitter embed
|
||||||
|
'url': 'http://adria.ign.com/sherlock-season-4/9687/trailer/embed',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Vimeo embed
|
||||||
|
'url': 'https://kr.ign.com/bic-2018/3307/trailer/embed',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
req = HEADRequest(url.rsplit('/', 1)[0] + '/embed')
|
||||||
|
url = self._request_webpage(req, video_id).geturl()
|
||||||
|
ign_url = compat_parse_qs(
|
||||||
|
compat_urllib_parse_urlparse(url).query).get('url', [None])[0]
|
||||||
|
if ign_url:
|
||||||
|
return self.url_result(ign_url, IGNIE.ie_key())
|
||||||
|
return self.url_result(url)
|
||||||
|
|
||||||
|
|
||||||
|
class IGNArticleIE(IGNBaseIE):
|
||||||
|
_VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?feature/\d+)/(?P<id>[^/?&#]+)'
|
||||||
|
_PAGE_TYPE = 'article'
|
||||||
|
_TESTS = [{
|
||||||
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '100-little-things-in-gta-5-that-will-blow-your-mind',
|
'id': '524497489e4e8ff5848ece34',
|
||||||
|
'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
|
||||||
},
|
},
|
||||||
'playlist': [
|
'playlist': [
|
||||||
{
|
{
|
||||||
|
@ -49,7 +194,6 @@ class IGNIE(InfoExtractor):
|
||||||
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
|
'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
|
||||||
'timestamp': 1379339880,
|
'timestamp': 1379339880,
|
||||||
'upload_date': '20130916',
|
'upload_date': '20130916',
|
||||||
'uploader_id': 'danieljkrupa@gmail.com',
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -60,173 +204,54 @@ class IGNIE(InfoExtractor):
|
||||||
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
|
'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
|
||||||
'timestamp': 1386878820,
|
'timestamp': 1386878820,
|
||||||
'upload_date': '20131212',
|
'upload_date': '20131212',
|
||||||
'uploader_id': 'togilvie@ign.com',
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
'params': {
|
'params': {
|
||||||
|
'playlist_items': '2-3',
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
},
|
}, {
|
||||||
{
|
|
||||||
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
|
'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
|
||||||
'md5': '618fedb9c901fd086f6f093564ef8558',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '078fdd005f6d3c02f63d795faa1b984f',
|
'id': '53ee806780a81ec46e0790f8',
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
|
'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
|
||||||
'description': 'Brian and Jared explore Michel Ancel\'s captivating new preview.',
|
|
||||||
'timestamp': 1408047180,
|
|
||||||
'upload_date': '20140814',
|
|
||||||
'uploader_id': 'jamesduggan1990@gmail.com',
|
|
||||||
},
|
},
|
||||||
},
|
'playlist_count': 2,
|
||||||
{
|
}, {
|
||||||
'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
|
|
||||||
'only_matching': True,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
|
|
||||||
'only_matching': True,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
# videoId pattern
|
# videoId pattern
|
||||||
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
|
'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
def _find_video_id(self, webpage):
|
|
||||||
res_id = [
|
|
||||||
r'"video_id"\s*:\s*"(.*?)"',
|
|
||||||
r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
|
|
||||||
r'data-video-id="(.+?)"',
|
|
||||||
r'<object id="vid_(.+?)"',
|
|
||||||
r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
|
|
||||||
r'videoId"\s*:\s*"(.+?)"',
|
|
||||||
r'videoId["\']\s*:\s*["\']([^"\']+?)["\']',
|
|
||||||
]
|
|
||||||
return self._search_regex(res_id, webpage, 'video id', default=None)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
name_or_id = mobj.group('name_or_id')
|
|
||||||
page_type = mobj.group('type')
|
|
||||||
webpage = self._download_webpage(url, name_or_id)
|
|
||||||
if page_type != 'video':
|
|
||||||
multiple_urls = re.findall(
|
|
||||||
r'<param name="flashvars"[^>]*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
|
|
||||||
webpage)
|
|
||||||
if multiple_urls:
|
|
||||||
entries = [self.url_result(u, ie='IGN') for u in multiple_urls]
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'id': name_or_id,
|
|
||||||
'entries': entries,
|
|
||||||
}
|
|
||||||
|
|
||||||
video_id = self._find_video_id(webpage)
|
|
||||||
if not video_id:
|
|
||||||
return self.url_result(self._search_regex(
|
|
||||||
self._EMBED_RE, webpage, 'embed url'))
|
|
||||||
return self._get_video_info(video_id)
|
|
||||||
|
|
||||||
def _get_video_info(self, video_id):
|
|
||||||
api_data = self._download_json(
|
|
||||||
self._API_URL_TEMPLATE % video_id, video_id)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
m3u8_url = api_data['refs'].get('m3uUrl')
|
|
||||||
if m3u8_url:
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
f4m_url = api_data['refs'].get('f4mUrl')
|
|
||||||
if f4m_url:
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
f4m_url, video_id, f4m_id='hds', fatal=False))
|
|
||||||
for asset in api_data['assets']:
|
|
||||||
formats.append({
|
|
||||||
'url': asset['url'],
|
|
||||||
'tbr': asset.get('actual_bitrate_kbps'),
|
|
||||||
'fps': asset.get('frame_rate'),
|
|
||||||
'height': int_or_none(asset.get('height')),
|
|
||||||
'width': int_or_none(asset.get('width')),
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
thumbnails = [{
|
|
||||||
'url': thumbnail['url']
|
|
||||||
} for thumbnail in api_data.get('thumbnails', [])]
|
|
||||||
|
|
||||||
metadata = api_data['metadata']
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': api_data.get('videoId') or video_id,
|
|
||||||
'title': metadata.get('longTitle') or metadata.get('name') or metadata.get['title'],
|
|
||||||
'description': metadata.get('description'),
|
|
||||||
'timestamp': parse_iso8601(metadata.get('publishDate')),
|
|
||||||
'duration': int_or_none(metadata.get('duration')),
|
|
||||||
'display_id': metadata.get('slug') or video_id,
|
|
||||||
'uploader_id': metadata.get('creator'),
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class OneUPIE(IGNIE):
|
|
||||||
_VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html'
|
|
||||||
IE_NAME = '1up.com'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://gamevideos.1up.com/video/id/34976.html',
|
|
||||||
'md5': 'c9cc69e07acb675c31a16719f909e347',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '34976',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Sniper Elite V2 - Trailer',
|
|
||||||
'description': 'md5:bf0516c5ee32a3217aa703e9b1bc7826',
|
|
||||||
'timestamp': 1313099220,
|
|
||||||
'upload_date': '20110811',
|
|
||||||
'uploader_id': 'IGN',
|
|
||||||
}
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
result = super(OneUPIE, self)._real_extract(url)
|
|
||||||
result['id'] = mobj.group('name_or_id')
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
class PCMagIE(IGNIE):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)'
|
|
||||||
IE_NAME = 'pcmag'
|
|
||||||
|
|
||||||
_EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]'
|
|
||||||
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
|
|
||||||
'md5': '212d6154fd0361a2781075f1febbe9ad',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'ee10d774b508c9b8ec07e763b9125b91',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '010615_What\'s New Now: Is GoGo Snooping on Your Data?',
|
|
||||||
'description': 'md5:a7071ae64d2f68cc821c729d4ded6bb3',
|
|
||||||
'timestamp': 1420571160,
|
|
||||||
'upload_date': '20150106',
|
|
||||||
'uploader_id': 'cozzipix@gmail.com',
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.pcmag.com/article2/0,2817,2470156,00.asp',
|
# Youtube embed
|
||||||
'md5': '94130c1ca07ba0adb6088350681f16c1',
|
'url': 'https://www.ign.com/articles/2021-mvp-named-in-puppy-bowl-xvii',
|
||||||
'info_dict': {
|
'only_matching': True,
|
||||||
'id': '042e560ba94823d43afcb12ddf7142ca',
|
}, {
|
||||||
'ext': 'mp4',
|
# IMDB embed
|
||||||
'title': 'HTC\'s Weird New Re Camera - What\'s New Now',
|
'url': 'https://www.ign.com/articles/2014/08/07/sons-of-anarchy-final-season-trailer',
|
||||||
'description': 'md5:53433c45df96d2ea5d0fda18be2ca908',
|
'only_matching': True,
|
||||||
'timestamp': 1412953920,
|
}, {
|
||||||
'upload_date': '20141010',
|
# Facebook embed
|
||||||
'uploader_id': 'chris_snyder@pcmag.com',
|
'url': 'https://www.ign.com/articles/2017/09/20/marvels-the-punisher-watch-the-new-trailer-for-the-netflix-series',
|
||||||
}
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Brightcove embed
|
||||||
|
'url': 'https://www.ign.com/articles/2016/01/16/supergirl-goes-flying-with-martian-manhunter-in-new-clip',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
article = self._call_api(display_id)
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
media_url = try_get(article, lambda x: x['mediaRelations'][0]['media']['metadata']['url'])
|
||||||
|
if media_url:
|
||||||
|
yield self.url_result(media_url, IGNIE.ie_key())
|
||||||
|
for content in (article.get('content') or []):
|
||||||
|
for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
|
||||||
|
yield self.url_result(video_url)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries(), article.get('articleId'),
|
||||||
|
strip_or_none(try_get(article, lambda x: x['metadata']['headline'])))
|
||||||
|
|
97
haruhi_dl/extractor/iheart.py
Normal file
97
haruhi_dl/extractor/iheart.py
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
clean_podcast_url,
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class IHeartRadioBaseIE(InfoExtractor):
|
||||||
|
def _call_api(self, path, video_id, fatal=True, query=None):
|
||||||
|
return self._download_json(
|
||||||
|
'https://api.iheart.com/api/v3/podcast/' + path,
|
||||||
|
video_id, fatal=fatal, query=query)
|
||||||
|
|
||||||
|
def _extract_episode(self, episode):
|
||||||
|
return {
|
||||||
|
'thumbnail': episode.get('imageUrl'),
|
||||||
|
'description': clean_html(episode.get('description')),
|
||||||
|
'timestamp': int_or_none(episode.get('startDate'), 1000),
|
||||||
|
'duration': int_or_none(episode.get('duration')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class IHeartRadioIE(IHeartRadioBaseIE):
|
||||||
|
IENAME = 'iheartradio'
|
||||||
|
_VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true',
|
||||||
|
'md5': 'c8609c92c8688dcb69d8541042b8abca',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '70346499',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Part One: Alexander Lukashenko: The Dictator of Belarus',
|
||||||
|
'description': 'md5:96cc7297b3a5a9ebae28643801c96fae',
|
||||||
|
'timestamp': 1597741200,
|
||||||
|
'upload_date': '20200818',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
episode_id = self._match_id(url)
|
||||||
|
episode = self._call_api(
|
||||||
|
'episodes/' + episode_id, episode_id)['episode']
|
||||||
|
info = self._extract_episode(episode)
|
||||||
|
info.update({
|
||||||
|
'id': episode_id,
|
||||||
|
'title': episode['title'],
|
||||||
|
'url': clean_podcast_url(episode['mediaUrl']),
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class IHeartRadioPodcastIE(IHeartRadioBaseIE):
|
||||||
|
IE_NAME = 'iheartradio:podcast'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?iheart(?:podcastnetwork)?\.com/podcast/[^/?&#]+-(?P<id>\d+)/?(?:[?#&]|$)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.iheart.com/podcast/1119-it-could-happen-here-30717896/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '30717896',
|
||||||
|
'title': 'It Could Happen Here',
|
||||||
|
'description': 'md5:5842117412a967eb0b01f8088eb663e2',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 11,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.iheartpodcastnetwork.com/podcast/105-stuff-you-should-know-26940277',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
podcast_id = self._match_id(url)
|
||||||
|
path = 'podcasts/' + podcast_id
|
||||||
|
episodes = self._call_api(
|
||||||
|
path + '/episodes', podcast_id, query={'limit': 1000000000})['data']
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for episode in episodes:
|
||||||
|
episode_id = str_or_none(episode.get('id'))
|
||||||
|
if not episode_id:
|
||||||
|
continue
|
||||||
|
info = self._extract_episode(episode)
|
||||||
|
info.update({
|
||||||
|
'_type': 'url',
|
||||||
|
'id': episode_id,
|
||||||
|
'title': episode.get('title'),
|
||||||
|
'url': 'iheartradio:' + episode_id,
|
||||||
|
'ie_key': IHeartRadioIE.ie_key(),
|
||||||
|
})
|
||||||
|
entries.append(info)
|
||||||
|
|
||||||
|
podcast = self._call_api(path, podcast_id, False) or {}
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, podcast_id, podcast.get('title'), podcast.get('description'))
|
|
@ -12,7 +12,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class InaIE(InfoExtractor):
|
class InaIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
|
_VALID_URL = r'https?://(?:(?:www|m)\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
|
||||||
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
|
'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
|
||||||
|
@ -31,6 +31,9 @@ class InaIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.ina.fr/video/P16173408-video.html',
|
'url': 'https://www.ina.fr/video/P16173408-video.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://m.ina.fr/video/I12055569',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -54,7 +54,7 @@ class InfoQIE(BokeCCBaseIE):
|
||||||
|
|
||||||
def _extract_rtmp_video(self, webpage):
|
def _extract_rtmp_video(self, webpage):
|
||||||
# The server URL is hardcoded
|
# The server URL is hardcoded
|
||||||
video_url = 'rtmpe://video.infoq.com/cfx/st/'
|
video_url = 'rtmpe://videof.infoq.com/cfx/st/'
|
||||||
|
|
||||||
# Extract video URL
|
# Extract video URL
|
||||||
encoded_id = self._search_regex(
|
encoded_id = self._search_regex(
|
||||||
|
@ -86,17 +86,18 @@ class InfoQIE(BokeCCBaseIE):
|
||||||
return [{
|
return [{
|
||||||
'format_id': 'http_video',
|
'format_id': 'http_video',
|
||||||
'url': http_video_url,
|
'url': http_video_url,
|
||||||
|
'http_headers': {'Referer': 'https://www.infoq.com/'},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_http_audio(self, webpage, video_id):
|
def _extract_http_audio(self, webpage, video_id):
|
||||||
fields = self._hidden_inputs(webpage)
|
fields = self._form_hidden_inputs('mp3Form', webpage)
|
||||||
http_audio_url = fields.get('filename')
|
http_audio_url = fields.get('filename')
|
||||||
if not http_audio_url:
|
if not http_audio_url:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# base URL is found in the Location header in the response returned by
|
# base URL is found in the Location header in the response returned by
|
||||||
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
|
# GET https://www.infoq.com/mp3download.action?filename=... when logged in.
|
||||||
http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
|
http_audio_url = compat_urlparse.urljoin('http://ress.infoq.com/downloads/mp3downloads/', http_audio_url)
|
||||||
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
|
http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
|
||||||
|
|
||||||
# audio file seem to be missing some times even if there is a download link
|
# audio file seem to be missing some times even if there is a download link
|
||||||
|
|
|
@ -22,7 +22,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class InstagramIE(InfoExtractor):
|
class InstagramIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv)/(?P<id>[^/?#&]+))'
|
_VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
|
||||||
'md5': '0d2da106a9d2631273e192b372806516',
|
'md5': '0d2da106a9d2631273e192b372806516',
|
||||||
|
@ -35,7 +35,7 @@ class InstagramIE(InfoExtractor):
|
||||||
'timestamp': 1371748545,
|
'timestamp': 1371748545,
|
||||||
'upload_date': '20130620',
|
'upload_date': '20130620',
|
||||||
'uploader_id': 'naomipq',
|
'uploader_id': 'naomipq',
|
||||||
'uploader': 'Naomi Leonor Phan-Quang',
|
'uploader': 'B E A U T Y F O R A S H E S',
|
||||||
'like_count': int,
|
'like_count': int,
|
||||||
'comment_count': int,
|
'comment_count': int,
|
||||||
'comments': list,
|
'comments': list,
|
||||||
|
@ -95,6 +95,9 @@ class InstagramIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.instagram.com/tv/aye83DjauH/',
|
'url': 'https://www.instagram.com/tv/aye83DjauH/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.instagram.com/reel/CDUMkliABpa/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -122,9 +125,9 @@ class InstagramIE(InfoExtractor):
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
(video_url, description, thumbnail, timestamp, uploader,
|
(media, video_url, description, thumbnail, timestamp, uploader,
|
||||||
uploader_id, like_count, comment_count, comments, height,
|
uploader_id, like_count, comment_count, comments, height,
|
||||||
width) = [None] * 11
|
width) = [None] * 12
|
||||||
|
|
||||||
shared_data = self._parse_json(
|
shared_data = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
|
@ -137,6 +140,18 @@ class InstagramIE(InfoExtractor):
|
||||||
(lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
|
(lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
|
||||||
lambda x: x['entry_data']['PostPage'][0]['media']),
|
lambda x: x['entry_data']['PostPage'][0]['media']),
|
||||||
dict)
|
dict)
|
||||||
|
# _sharedData.entry_data.PostPage is empty when authenticated (see
|
||||||
|
# https://github.com/hdl-org/haruhi-dl/pull/22880)
|
||||||
|
if not media:
|
||||||
|
additional_data = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;',
|
||||||
|
webpage, 'additional data', default='{}'),
|
||||||
|
video_id, fatal=False)
|
||||||
|
if additional_data:
|
||||||
|
media = try_get(
|
||||||
|
additional_data, lambda x: x['graphql']['shortcode_media'],
|
||||||
|
dict)
|
||||||
if media:
|
if media:
|
||||||
video_url = media.get('video_url')
|
video_url = media.get('video_url')
|
||||||
height = int_or_none(media.get('dimensions', {}).get('height'))
|
height = int_or_none(media.get('dimensions', {}).get('height'))
|
||||||
|
@ -144,17 +159,23 @@ class InstagramIE(InfoExtractor):
|
||||||
description = try_get(
|
description = try_get(
|
||||||
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||||
compat_str) or media.get('caption')
|
compat_str) or media.get('caption')
|
||||||
thumbnail = media.get('display_src')
|
thumbnail = media.get('display_src') or media.get('display_url')
|
||||||
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
||||||
uploader = media.get('owner', {}).get('full_name')
|
uploader = media.get('owner', {}).get('full_name')
|
||||||
uploader_id = media.get('owner', {}).get('username')
|
uploader_id = media.get('owner', {}).get('username')
|
||||||
|
|
||||||
def get_count(key, kind):
|
def get_count(keys, kind):
|
||||||
return int_or_none(try_get(
|
if not isinstance(keys, (list, tuple)):
|
||||||
|
keys = [keys]
|
||||||
|
for key in keys:
|
||||||
|
count = int_or_none(try_get(
|
||||||
media, (lambda x: x['edge_media_%s' % key]['count'],
|
media, (lambda x: x['edge_media_%s' % key]['count'],
|
||||||
lambda x: x['%ss' % kind]['count'])))
|
lambda x: x['%ss' % kind]['count'])))
|
||||||
|
if count is not None:
|
||||||
|
return count
|
||||||
like_count = get_count('preview_like', 'like')
|
like_count = get_count('preview_like', 'like')
|
||||||
comment_count = get_count('to_comment', 'comment')
|
comment_count = get_count(
|
||||||
|
('preview_comment', 'to_comment', 'to_parent_comment'), 'comment')
|
||||||
|
|
||||||
comments = [{
|
comments = [{
|
||||||
'author': comment.get('user', {}).get('username'),
|
'author': comment.get('user', {}).get('username'),
|
||||||
|
|
|
@ -1,29 +1,21 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import uuid
|
|
||||||
import xml.etree.ElementTree as etree
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .brightcove import BrightcoveNewIE
|
from .brightcove import BrightcoveNewIE
|
||||||
from ..compat import (
|
|
||||||
compat_str,
|
|
||||||
compat_etree_register_namespace,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
int_or_none,
|
get_element_by_class,
|
||||||
|
JSON_LD_RE,
|
||||||
merge_dicts,
|
merge_dicts,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
xpath_with_ns,
|
|
||||||
xpath_element,
|
|
||||||
xpath_text,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -31,14 +23,18 @@ class ITVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
|
_VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
|
||||||
_GEO_COUNTRIES = ['GB']
|
_GEO_COUNTRIES = ['GB']
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
|
'url': 'https://www.itv.com/hub/liar/2a4547a0012',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2a2936a0053',
|
'id': '2a4547a0012',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Home Movie',
|
'title': 'Liar - Series 2 - Episode 6',
|
||||||
|
'description': 'md5:d0f91536569dec79ea184f0a44cca089',
|
||||||
|
'series': 'Liar',
|
||||||
|
'season_number': 2,
|
||||||
|
'episode_number': 6,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
# rtmp download
|
# m3u8 download
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
|
@ -61,139 +57,8 @@ class ITVIE(InfoExtractor):
|
||||||
params = extract_attributes(self._search_regex(
|
params = extract_attributes(self._search_regex(
|
||||||
r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
|
r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
|
||||||
|
|
||||||
ns_map = {
|
ios_playlist_url = params.get('data-video-playlist') or params['data-video-id']
|
||||||
'soapenv': 'http://schemas.xmlsoap.org/soap/envelope/',
|
hmac = params['data-video-hmac']
|
||||||
'tem': 'http://tempuri.org/',
|
|
||||||
'itv': 'http://schemas.datacontract.org/2004/07/Itv.BB.Mercury.Common.Types',
|
|
||||||
'com': 'http://schemas.itv.com/2009/05/Common',
|
|
||||||
}
|
|
||||||
for ns, full_ns in ns_map.items():
|
|
||||||
compat_etree_register_namespace(ns, full_ns)
|
|
||||||
|
|
||||||
def _add_ns(name):
|
|
||||||
return xpath_with_ns(name, ns_map)
|
|
||||||
|
|
||||||
def _add_sub_element(element, name):
|
|
||||||
return etree.SubElement(element, _add_ns(name))
|
|
||||||
|
|
||||||
production_id = (
|
|
||||||
params.get('data-video-autoplay-id')
|
|
||||||
or '%s#001' % (
|
|
||||||
params.get('data-video-episode-id')
|
|
||||||
or video_id.replace('a', '/')))
|
|
||||||
|
|
||||||
req_env = etree.Element(_add_ns('soapenv:Envelope'))
|
|
||||||
_add_sub_element(req_env, 'soapenv:Header')
|
|
||||||
body = _add_sub_element(req_env, 'soapenv:Body')
|
|
||||||
get_playlist = _add_sub_element(body, ('tem:GetPlaylist'))
|
|
||||||
request = _add_sub_element(get_playlist, 'tem:request')
|
|
||||||
_add_sub_element(request, 'itv:ProductionId').text = production_id
|
|
||||||
_add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper()
|
|
||||||
vodcrid = _add_sub_element(request, 'itv:Vodcrid')
|
|
||||||
_add_sub_element(vodcrid, 'com:Id')
|
|
||||||
_add_sub_element(request, 'itv:Partition')
|
|
||||||
user_info = _add_sub_element(get_playlist, 'tem:userInfo')
|
|
||||||
_add_sub_element(user_info, 'itv:Broadcaster').text = 'Itv'
|
|
||||||
_add_sub_element(user_info, 'itv:DM')
|
|
||||||
_add_sub_element(user_info, 'itv:RevenueScienceValue')
|
|
||||||
_add_sub_element(user_info, 'itv:SessionId')
|
|
||||||
_add_sub_element(user_info, 'itv:SsoToken')
|
|
||||||
_add_sub_element(user_info, 'itv:UserToken')
|
|
||||||
site_info = _add_sub_element(get_playlist, 'tem:siteInfo')
|
|
||||||
_add_sub_element(site_info, 'itv:AdvertisingRestriction').text = 'None'
|
|
||||||
_add_sub_element(site_info, 'itv:AdvertisingSite').text = 'ITV'
|
|
||||||
_add_sub_element(site_info, 'itv:AdvertisingType').text = 'Any'
|
|
||||||
_add_sub_element(site_info, 'itv:Area').text = 'ITVPLAYER.VIDEO'
|
|
||||||
_add_sub_element(site_info, 'itv:Category')
|
|
||||||
_add_sub_element(site_info, 'itv:Platform').text = 'DotCom'
|
|
||||||
_add_sub_element(site_info, 'itv:Site').text = 'ItvCom'
|
|
||||||
device_info = _add_sub_element(get_playlist, 'tem:deviceInfo')
|
|
||||||
_add_sub_element(device_info, 'itv:ScreenSize').text = 'Big'
|
|
||||||
player_info = _add_sub_element(get_playlist, 'tem:playerInfo')
|
|
||||||
_add_sub_element(player_info, 'itv:Version').text = '2'
|
|
||||||
|
|
||||||
headers = self.geo_verification_headers()
|
|
||||||
headers.update({
|
|
||||||
'Content-Type': 'text/xml; charset=utf-8',
|
|
||||||
'SOAPAction': 'http://tempuri.org/PlaylistService/GetPlaylist',
|
|
||||||
})
|
|
||||||
|
|
||||||
info = self._search_json_ld(webpage, video_id, default={})
|
|
||||||
formats = []
|
|
||||||
subtitles = {}
|
|
||||||
|
|
||||||
def extract_subtitle(sub_url):
|
|
||||||
ext = determine_ext(sub_url, 'ttml')
|
|
||||||
subtitles.setdefault('en', []).append({
|
|
||||||
'url': sub_url,
|
|
||||||
'ext': 'ttml' if ext == 'xml' else ext,
|
|
||||||
})
|
|
||||||
|
|
||||||
resp_env = self._download_xml(
|
|
||||||
params['data-playlist-url'], video_id,
|
|
||||||
headers=headers, data=etree.tostring(req_env), fatal=False)
|
|
||||||
if resp_env:
|
|
||||||
playlist = xpath_element(resp_env, './/Playlist')
|
|
||||||
if playlist is None:
|
|
||||||
fault_code = xpath_text(resp_env, './/faultcode')
|
|
||||||
fault_string = xpath_text(resp_env, './/faultstring')
|
|
||||||
if fault_code == 'InvalidGeoRegion':
|
|
||||||
self.raise_geo_restricted(
|
|
||||||
msg=fault_string, countries=self._GEO_COUNTRIES)
|
|
||||||
elif fault_code not in (
|
|
||||||
'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
|
|
||||||
raise ExtractorError(
|
|
||||||
'%s said: %s' % (self.IE_NAME, fault_string), expected=True)
|
|
||||||
info.update({
|
|
||||||
'title': self._og_search_title(webpage),
|
|
||||||
'episode_title': params.get('data-video-episode'),
|
|
||||||
'series': params.get('data-video-title'),
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
title = xpath_text(playlist, 'EpisodeTitle', default=None)
|
|
||||||
info.update({
|
|
||||||
'title': title,
|
|
||||||
'episode_title': title,
|
|
||||||
'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
|
|
||||||
'series': xpath_text(playlist, 'ProgrammeTitle'),
|
|
||||||
'duration': parse_duration(xpath_text(playlist, 'Duration')),
|
|
||||||
})
|
|
||||||
video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
|
|
||||||
media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
|
|
||||||
rtmp_url = media_files.attrib['base']
|
|
||||||
|
|
||||||
for media_file in media_files.findall('MediaFile'):
|
|
||||||
play_path = xpath_text(media_file, 'URL')
|
|
||||||
if not play_path:
|
|
||||||
continue
|
|
||||||
tbr = int_or_none(media_file.get('bitrate'), 1000)
|
|
||||||
f = {
|
|
||||||
'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
|
|
||||||
'play_path': play_path,
|
|
||||||
# Providing this swfVfy allows to avoid truncated downloads
|
|
||||||
'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
|
|
||||||
'page_url': url,
|
|
||||||
'tbr': tbr,
|
|
||||||
'ext': 'flv',
|
|
||||||
}
|
|
||||||
app = self._search_regex(
|
|
||||||
'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
|
|
||||||
if app:
|
|
||||||
f.update({
|
|
||||||
'url': rtmp_url.split('?', 1)[0],
|
|
||||||
'app': app,
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
f['url'] = rtmp_url
|
|
||||||
formats.append(f)
|
|
||||||
|
|
||||||
for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
|
|
||||||
if caption_url.text:
|
|
||||||
extract_subtitle(caption_url.text)
|
|
||||||
|
|
||||||
ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id')
|
|
||||||
hmac = params.get('data-video-hmac')
|
|
||||||
if ios_playlist_url and hmac and re.match(r'https?://', ios_playlist_url):
|
|
||||||
headers = self.geo_verification_headers()
|
headers = self.geo_verification_headers()
|
||||||
headers.update({
|
headers.update({
|
||||||
'Accept': 'application/vnd.itv.vod.playlist.v2+json',
|
'Accept': 'application/vnd.itv.vod.playlist.v2+json',
|
||||||
|
@ -227,11 +92,12 @@ class ITVIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
'platformTag': 'dotcom'
|
'platformTag': 'dotcom'
|
||||||
}
|
}
|
||||||
}).encode(), headers=headers, fatal=False)
|
}).encode(), headers=headers)
|
||||||
if ios_playlist:
|
video_data = ios_playlist['Playlist']['Video']
|
||||||
video_data = ios_playlist.get('Playlist', {}).get('Video', {})
|
|
||||||
ios_base_url = video_data.get('Base')
|
ios_base_url = video_data.get('Base')
|
||||||
for media_file in video_data.get('MediaFiles', []):
|
|
||||||
|
formats = []
|
||||||
|
for media_file in (video_data.get('MediaFiles') or []):
|
||||||
href = media_file.get('Href')
|
href = media_file.get('Href')
|
||||||
if not href:
|
if not href:
|
||||||
continue
|
continue
|
||||||
|
@ -246,35 +112,42 @@ class ITVIE(InfoExtractor):
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': href,
|
'url': href,
|
||||||
})
|
})
|
||||||
subs = video_data.get('Subtitles')
|
self._sort_formats(formats)
|
||||||
if isinstance(subs, list):
|
|
||||||
|
subtitles = {}
|
||||||
|
subs = video_data.get('Subtitles') or []
|
||||||
for sub in subs:
|
for sub in subs:
|
||||||
if not isinstance(sub, dict):
|
if not isinstance(sub, dict):
|
||||||
continue
|
continue
|
||||||
href = url_or_none(sub.get('Href'))
|
href = url_or_none(sub.get('Href'))
|
||||||
if href:
|
if not href:
|
||||||
extract_subtitle(href)
|
continue
|
||||||
if not info.get('duration'):
|
subtitles.setdefault('en', []).append({
|
||||||
info['duration'] = parse_duration(video_data.get('Duration'))
|
'url': href,
|
||||||
|
'ext': determine_ext(href, 'vtt'),
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
info.update({
|
|
||||||
'id': video_id,
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
})
|
})
|
||||||
|
|
||||||
webpage_info = self._search_json_ld(webpage, video_id, default={})
|
info = self._search_json_ld(webpage, video_id, default={})
|
||||||
if not webpage_info.get('title'):
|
if not info:
|
||||||
webpage_info['title'] = self._html_search_regex(
|
json_ld = self._parse_json(self._search_regex(
|
||||||
r'(?s)<h\d+[^>]+\bclass=["\'][^>]*episode-title["\'][^>]*>([^<]+)<',
|
JSON_LD_RE, webpage, 'JSON-LD', '{}',
|
||||||
webpage, 'title', default=None) or self._og_search_title(
|
group='json_ld'), video_id, fatal=False)
|
||||||
webpage, default=None) or self._html_search_meta(
|
if json_ld and json_ld.get('@type') == 'BreadcrumbList':
|
||||||
'twitter:title', webpage, 'title',
|
for ile in (json_ld.get('itemListElement:') or []):
|
||||||
default=None) or webpage_info['episode']
|
item = ile.get('item:') or {}
|
||||||
|
if item.get('@type') == 'TVEpisode':
|
||||||
|
item['@context'] = 'http://schema.org'
|
||||||
|
info = self._json_ld(item, video_id, fatal=False) or {}
|
||||||
|
break
|
||||||
|
|
||||||
return merge_dicts(info, webpage_info)
|
return merge_dicts({
|
||||||
|
'id': video_id,
|
||||||
|
'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'duration': parse_duration(video_data.get('Duration')),
|
||||||
|
'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)),
|
||||||
|
}, info)
|
||||||
|
|
||||||
|
|
||||||
class ITVBTCCIE(InfoExtractor):
|
class ITVBTCCIE(InfoExtractor):
|
||||||
|
|
|
@ -3,10 +3,13 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_HTTPError
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
update_url_query,
|
update_url_query,
|
||||||
)
|
)
|
||||||
|
@ -23,7 +26,7 @@ class KakaoIE(InfoExtractor):
|
||||||
'id': '301965083',
|
'id': '301965083',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
|
'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
|
||||||
'uploader_id': 2671005,
|
'uploader_id': '2671005',
|
||||||
'uploader': '그랑그랑이',
|
'uploader': '그랑그랑이',
|
||||||
'timestamp': 1488160199,
|
'timestamp': 1488160199,
|
||||||
'upload_date': '20170227',
|
'upload_date': '20170227',
|
||||||
|
@ -36,11 +39,15 @@ class KakaoIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
|
'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
|
||||||
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
|
'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
|
||||||
'uploader_id': 2653210,
|
'uploader_id': '2653210',
|
||||||
'uploader': '쇼! 음악중심',
|
'uploader': '쇼! 음악중심',
|
||||||
'timestamp': 1485684628,
|
'timestamp': 1485684628,
|
||||||
'upload_date': '20170129',
|
'upload_date': '20170129',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# geo restricted
|
||||||
|
'url': 'https://tv.kakao.com/channel/3643855/cliplink/412069491',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -68,8 +75,7 @@ class KakaoIE(InfoExtractor):
|
||||||
'fields': ','.join([
|
'fields': ','.join([
|
||||||
'-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title',
|
'-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title',
|
||||||
'description', 'channelId', 'createTime', 'duration', 'playCount',
|
'description', 'channelId', 'createTime', 'duration', 'playCount',
|
||||||
'likeCount', 'commentCount', 'tagList', 'channel', 'name',
|
'likeCount', 'commentCount', 'tagList', 'channel', 'name', 'thumbnailUrl',
|
||||||
'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault',
|
|
||||||
'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label'])
|
'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label'])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -82,24 +88,28 @@ class KakaoIE(InfoExtractor):
|
||||||
|
|
||||||
title = clip.get('title') or clip_link.get('displayTitle')
|
title = clip.get('title') or clip_link.get('displayTitle')
|
||||||
|
|
||||||
query['tid'] = impress.get('tid', '')
|
query.update({
|
||||||
|
'fields': '-*,code,message,url',
|
||||||
|
'tid': impress.get('tid') or '',
|
||||||
|
})
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for fmt in clip.get('videoOutputList', []):
|
for fmt in (clip.get('videoOutputList') or []):
|
||||||
try:
|
try:
|
||||||
profile_name = fmt['profile']
|
profile_name = fmt['profile']
|
||||||
if profile_name == 'AUDIO':
|
if profile_name == 'AUDIO':
|
||||||
continue
|
continue
|
||||||
query.update({
|
query['profile'] = profile_name
|
||||||
'profile': profile_name,
|
try:
|
||||||
'fields': '-*,url',
|
|
||||||
})
|
|
||||||
fmt_url_json = self._download_json(
|
fmt_url_json = self._download_json(
|
||||||
api_base + 'raw/videolocation', display_id,
|
api_base + 'raw/videolocation', display_id,
|
||||||
'Downloading video URL for profile %s' % profile_name,
|
'Downloading video URL for profile %s' % profile_name,
|
||||||
query=query, headers=player_header, fatal=False)
|
query=query, headers=player_header)
|
||||||
|
except ExtractorError as e:
|
||||||
if fmt_url_json is None:
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
|
||||||
|
resp = self._parse_json(e.cause.read().decode(), video_id)
|
||||||
|
if resp.get('code') == 'GeoBlocked':
|
||||||
|
self.raise_geo_restricted()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
fmt_url = fmt_url_json['url']
|
fmt_url = fmt_url_json['url']
|
||||||
|
@ -116,27 +126,13 @@ class KakaoIE(InfoExtractor):
|
||||||
pass
|
pass
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
thumbs = []
|
|
||||||
for thumb in clip.get('clipChapterThumbnailList', []):
|
|
||||||
thumbs.append({
|
|
||||||
'url': thumb.get('thumbnailUrl'),
|
|
||||||
'id': compat_str(thumb.get('timeInSec')),
|
|
||||||
'preference': -1 if thumb.get('isDefault') else 0
|
|
||||||
})
|
|
||||||
top_thumbnail = clip.get('thumbnailUrl')
|
|
||||||
if top_thumbnail:
|
|
||||||
thumbs.append({
|
|
||||||
'url': top_thumbnail,
|
|
||||||
'preference': 10,
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': display_id,
|
'id': display_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': strip_or_none(clip.get('description')),
|
'description': strip_or_none(clip.get('description')),
|
||||||
'uploader': clip_link.get('channel', {}).get('name'),
|
'uploader': try_get(clip_link, lambda x: x['channel']['name']),
|
||||||
'uploader_id': clip_link.get('channelId'),
|
'uploader_id': str_or_none(clip_link.get('channelId')),
|
||||||
'thumbnails': thumbs,
|
'thumbnail': clip.get('thumbnailUrl'),
|
||||||
'timestamp': unified_timestamp(clip_link.get('createTime')),
|
'timestamp': unified_timestamp(clip_link.get('createTime')),
|
||||||
'duration': int_or_none(clip.get('duration')),
|
'duration': int_or_none(clip.get('duration')),
|
||||||
'view_count': int_or_none(clip.get('playCount')),
|
'view_count': int_or_none(clip.get('playCount')),
|
||||||
|
|
|
@ -1,97 +0,0 @@
|
||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
ExtractorError,
|
|
||||||
float_or_none,
|
|
||||||
srt_subtitles_timecode,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class KanalPlayIE(InfoExtractor):
|
|
||||||
IE_DESC = 'Kanal 5/9/11 Play'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '3270012277',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Saknar både dusch och avlopp',
|
|
||||||
'description': 'md5:6023a95832a06059832ae93bc3c7efb7',
|
|
||||||
'duration': 2636.36,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# rtmp download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _fix_subtitles(self, subs):
|
|
||||||
return '\r\n\r\n'.join(
|
|
||||||
'%s\r\n%s --> %s\r\n%s'
|
|
||||||
% (
|
|
||||||
num,
|
|
||||||
srt_subtitles_timecode(item['startMillis'] / 1000.0),
|
|
||||||
srt_subtitles_timecode(item['endMillis'] / 1000.0),
|
|
||||||
item['text'],
|
|
||||||
) for num, item in enumerate(subs, 1))
|
|
||||||
|
|
||||||
def _get_subtitles(self, channel_id, video_id):
|
|
||||||
subs = self._download_json(
|
|
||||||
'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
|
|
||||||
video_id, 'Downloading subtitles JSON', fatal=False)
|
|
||||||
return {'sv': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
video_id = mobj.group('id')
|
|
||||||
channel_id = mobj.group('channel_id')
|
|
||||||
|
|
||||||
video = self._download_json(
|
|
||||||
'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
reasons_for_no_streams = video.get('reasonsForNoStreams')
|
|
||||||
if reasons_for_no_streams:
|
|
||||||
raise ExtractorError(
|
|
||||||
'%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),
|
|
||||||
expected=True)
|
|
||||||
|
|
||||||
title = video['title']
|
|
||||||
description = video.get('description')
|
|
||||||
duration = float_or_none(video.get('length'), 1000)
|
|
||||||
thumbnail = video.get('posterUrl')
|
|
||||||
|
|
||||||
stream_base_url = video['streamBaseUrl']
|
|
||||||
|
|
||||||
formats = [{
|
|
||||||
'url': stream_base_url,
|
|
||||||
'play_path': stream['source'],
|
|
||||||
'ext': 'flv',
|
|
||||||
'tbr': float_or_none(stream.get('bitrate'), 1000),
|
|
||||||
'rtmp_real_time': True,
|
|
||||||
} for stream in video['streams']]
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
subtitles = {}
|
|
||||||
if video.get('hasSubtitle'):
|
|
||||||
subtitles = self.extract_subtitles(channel_id, video_id)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': description,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'duration': duration,
|
|
||||||
'formats': formats,
|
|
||||||
'subtitles': subtitles,
|
|
||||||
}
|
|
|
@ -2,92 +2,71 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from .canvas import CanvasIE
|
from .canvas import CanvasIE
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_urllib_parse_unquote
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class KetnetIE(InfoExtractor):
|
class KetnetIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes',
|
'url': 'https://www.ketnet.be/kijken/n/nachtwacht/3/nachtwacht-s3a1-de-greystook',
|
||||||
'md5': '6bdeb65998930251bbd1c510750edba9',
|
'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'zomerse-filmpjes',
|
'id': 'pbs-pub-aef8b526-115e-4006-aa24-e59ff6c6ef6f$vid-ddb815bf-c8e7-467b-8879-6bad7a32cebd',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Gluur mee op de filmset en op Pennenzakkenrock',
|
'title': 'Nachtwacht - Reeks 3: Aflevering 1',
|
||||||
'description': 'Gluur mee met Ghost Rockers op de filmset',
|
'description': 'De Nachtwacht krijgt te maken met een parasiet',
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
}
|
'duration': 1468.02,
|
||||||
}, {
|
'timestamp': 1609225200,
|
||||||
# mzid in playerConfig instead of sources
|
'upload_date': '20201229',
|
||||||
'url': 'https://www.ketnet.be/kijken/nachtwacht/de-greystook',
|
'series': 'Nachtwacht',
|
||||||
'md5': '90139b746a0a9bd7bb631283f6e2a64e',
|
'season': 'Reeks 3',
|
||||||
'info_dict': {
|
'episode': 'De Greystook',
|
||||||
'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
'episode_number': 1,
|
||||||
'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
|
|
||||||
'ext': 'flv',
|
|
||||||
'title': 'Nachtwacht: De Greystook',
|
|
||||||
'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
'duration': 1468.03,
|
|
||||||
},
|
},
|
||||||
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
|
'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016',
|
'url': 'https://www.ketnet.be/themas/karrewiet/jaaroverzicht-20200/karrewiet-het-jaar-van-black-mamba',
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
# mzsource, geo restricted to Belgium
|
|
||||||
'url': 'https://www.ketnet.be/kijken/nachtwacht/de-bermadoe',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
video = self._download_json(
|
||||||
|
'https://senior-bff.ketnet.be/graphql', display_id, query={
|
||||||
|
'query': '''{
|
||||||
|
video(id: "content/ketnet/nl/%s.model.json") {
|
||||||
|
description
|
||||||
|
episodeNr
|
||||||
|
imageUrl
|
||||||
|
mediaReference
|
||||||
|
programTitle
|
||||||
|
publicationDate
|
||||||
|
seasonTitle
|
||||||
|
subtitleVideodetail
|
||||||
|
titleVideodetail
|
||||||
|
}
|
||||||
|
}''' % display_id,
|
||||||
|
})['data']['video']
|
||||||
|
|
||||||
config = self._parse_json(
|
mz_id = compat_urllib_parse_unquote(video['mediaReference'])
|
||||||
self._search_regex(
|
|
||||||
r'(?s)playerConfig\s*=\s*({.+?})\s*;', webpage,
|
|
||||||
'player config'),
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
mzid = config.get('mzid')
|
|
||||||
if mzid:
|
|
||||||
return self.url_result(
|
|
||||||
'https://mediazone.vrt.be/api/v1/ketnet/assets/%s' % mzid,
|
|
||||||
CanvasIE.ie_key(), video_id=mzid)
|
|
||||||
|
|
||||||
title = config['title']
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for source_key in ('', 'mz'):
|
|
||||||
source = config.get('%ssource' % source_key)
|
|
||||||
if not isinstance(source, dict):
|
|
||||||
continue
|
|
||||||
for format_id, format_url in source.items():
|
|
||||||
if format_id == 'hls':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
format_url, video_id, 'mp4',
|
|
||||||
entry_protocol='m3u8_native', m3u8_id=format_id,
|
|
||||||
fatal=False))
|
|
||||||
elif format_id == 'hds':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
format_url, video_id, f4m_id=format_id, fatal=False))
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': format_url,
|
|
||||||
'format_id': format_id,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'_type': 'url_transparent',
|
||||||
'title': title,
|
'id': mz_id,
|
||||||
'description': config.get('description'),
|
'title': video['titleVideodetail'],
|
||||||
'thumbnail': config.get('image'),
|
'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/' + mz_id,
|
||||||
'series': config.get('program'),
|
'thumbnail': video.get('imageUrl'),
|
||||||
'episode': config.get('episode'),
|
'description': video.get('description'),
|
||||||
'formats': formats,
|
'timestamp': parse_iso8601(video.get('publicationDate')),
|
||||||
|
'series': video.get('programTitle'),
|
||||||
|
'season': video.get('seasonTitle'),
|
||||||
|
'episode': video.get('subtitleVideodetail'),
|
||||||
|
'episode_number': int_or_none(video.get('episodeNr')),
|
||||||
|
'ie_key': CanvasIE.ie_key(),
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,82 +1,107 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import json
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
unified_strdate,
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class KhanAcademyIE(InfoExtractor):
|
class KhanAcademyBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
|
_VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
|
||||||
IE_NAME = 'KhanAcademy'
|
|
||||||
|
|
||||||
_TESTS = [{
|
def _parse_video(self, video):
|
||||||
'url': 'http://www.khanacademy.org/video/one-time-pad',
|
return {
|
||||||
'md5': '7b391cce85e758fb94f763ddc1bbb979',
|
'_type': 'url_transparent',
|
||||||
|
'url': video['youtubeId'],
|
||||||
|
'id': video.get('slug'),
|
||||||
|
'title': video.get('title'),
|
||||||
|
'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
'description': video.get('description'),
|
||||||
|
'ie_key': 'Youtube',
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
component_props = self._parse_json(self._download_json(
|
||||||
|
'https://www.khanacademy.org/api/internal/graphql',
|
||||||
|
display_id, query={
|
||||||
|
'hash': 1604303425,
|
||||||
|
'variables': json.dumps({
|
||||||
|
'path': display_id,
|
||||||
|
'queryParams': '',
|
||||||
|
}),
|
||||||
|
})['data']['contentJson'], display_id)['componentProps']
|
||||||
|
return self._parse_component_props(component_props)
|
||||||
|
|
||||||
|
|
||||||
|
class KhanAcademyIE(KhanAcademyBaseIE):
|
||||||
|
IE_NAME = 'khanacademy'
|
||||||
|
_VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
|
||||||
|
'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'one-time-pad',
|
'id': 'FlIG3TvQCBQ',
|
||||||
'ext': 'webm',
|
'ext': 'mp4',
|
||||||
'title': 'The one-time pad',
|
'title': 'The one-time pad',
|
||||||
'description': 'The perfect cipher',
|
'description': 'The perfect cipher',
|
||||||
'duration': 176,
|
'duration': 176,
|
||||||
'uploader': 'Brit Cruise',
|
'uploader': 'Brit Cruise',
|
||||||
'uploader_id': 'khanacademy',
|
'uploader_id': 'khanacademy',
|
||||||
'upload_date': '20120411',
|
'upload_date': '20120411',
|
||||||
|
'timestamp': 1334170113,
|
||||||
|
'license': 'cc-by-nc-sa',
|
||||||
},
|
},
|
||||||
'add_ie': ['Youtube'],
|
'add_ie': ['Youtube'],
|
||||||
}, {
|
}
|
||||||
'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
|
|
||||||
|
def _parse_component_props(self, component_props):
|
||||||
|
video = component_props['tutorialPageData']['contentModel']
|
||||||
|
info = self._parse_video(video)
|
||||||
|
author_names = video.get('authorNames')
|
||||||
|
info.update({
|
||||||
|
'uploader': ', '.join(author_names) if author_names else None,
|
||||||
|
'timestamp': parse_iso8601(video.get('dateAdded')),
|
||||||
|
'license': video.get('kaUserLicense'),
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class KhanAcademyUnitIE(KhanAcademyBaseIE):
|
||||||
|
IE_NAME = 'khanacademy:unit'
|
||||||
|
_VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'cryptography',
|
'id': 'cryptography',
|
||||||
'title': 'Journey into cryptography',
|
'title': 'Cryptography',
|
||||||
'description': 'How have humans protected their secret messages through history? What has changed today?',
|
'description': 'How have humans protected their secret messages through history? What has changed today?',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 3,
|
'playlist_mincount': 31,
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
m = re.match(self._VALID_URL, url)
|
|
||||||
video_id = m.group('id')
|
|
||||||
|
|
||||||
if m.group('key') == 'video':
|
|
||||||
data = self._download_json(
|
|
||||||
'http://api.khanacademy.org/api/v1/videos/' + video_id,
|
|
||||||
video_id, 'Downloading video info')
|
|
||||||
|
|
||||||
upload_date = unified_strdate(data['date_added'])
|
|
||||||
uploader = ', '.join(data['author_names'])
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': data['url'],
|
|
||||||
'id': video_id,
|
|
||||||
'title': data['title'],
|
|
||||||
'thumbnail': data['image_url'],
|
|
||||||
'duration': data['duration'],
|
|
||||||
'description': data['description'],
|
|
||||||
'uploader': uploader,
|
|
||||||
'upload_date': upload_date,
|
|
||||||
}
|
}
|
||||||
else:
|
|
||||||
# topic
|
|
||||||
data = self._download_json(
|
|
||||||
'http://api.khanacademy.org/api/v1/topic/' + video_id,
|
|
||||||
video_id, 'Downloading topic info')
|
|
||||||
|
|
||||||
entries = [
|
def _parse_component_props(self, component_props):
|
||||||
{
|
curation = component_props['curation']
|
||||||
'_type': 'url',
|
|
||||||
'url': c['url'],
|
|
||||||
'id': c['id'],
|
|
||||||
'title': c['title'],
|
|
||||||
}
|
|
||||||
for c in data['children'] if c['kind'] in ('Video', 'Topic')]
|
|
||||||
|
|
||||||
return {
|
entries = []
|
||||||
'_type': 'playlist',
|
tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or []
|
||||||
'id': video_id,
|
for tutorial_number, tutorial in enumerate(tutorials, 1):
|
||||||
'title': data['title'],
|
chapter_info = {
|
||||||
'description': data['description'],
|
'chapter': tutorial.get('title'),
|
||||||
'entries': entries,
|
'chapter_number': tutorial_number,
|
||||||
|
'chapter_id': tutorial.get('id'),
|
||||||
}
|
}
|
||||||
|
for content_item in (tutorial.get('contentItems') or []):
|
||||||
|
if content_item.get('kind') == 'Video':
|
||||||
|
info = self._parse_video(content_item)
|
||||||
|
info.update(chapter_info)
|
||||||
|
entries.append(info)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, curation.get('unit'), curation.get('title'),
|
||||||
|
curation.get('description'))
|
||||||
|
|
|
@ -8,11 +8,15 @@ from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_b64decode,
|
compat_b64decode,
|
||||||
compat_HTTPError,
|
compat_HTTPError,
|
||||||
|
compat_str,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
orderedSet,
|
js_to_json,
|
||||||
unescapeHTML,
|
parse_duration,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
|
@ -28,11 +32,15 @@ class LinuxAcademyIE(InfoExtractor):
|
||||||
)
|
)
|
||||||
'''
|
'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154',
|
'url': 'https://linuxacademy.com/cp/courses/lesson/course/7971/lesson/2/module/675',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1498-2',
|
'id': '7971-2',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': "Introduction to the Practitioner's Brief",
|
'title': 'What Is Data Science',
|
||||||
|
'description': 'md5:c574a3c20607144fb36cb65bdde76c99',
|
||||||
|
'timestamp': 1607387907,
|
||||||
|
'upload_date': '20201208',
|
||||||
|
'duration': 304,
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
|
@ -46,7 +54,8 @@ class LinuxAcademyIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '154',
|
'id': '154',
|
||||||
'title': 'AWS Certified Cloud Practitioner',
|
'title': 'AWS Certified Cloud Practitioner',
|
||||||
'description': 'md5:039db7e60e4aac9cf43630e0a75fa834',
|
'description': 'md5:a68a299ca9bb98d41cca5abc4d4ce22c',
|
||||||
|
'duration': 28835,
|
||||||
},
|
},
|
||||||
'playlist_count': 41,
|
'playlist_count': 41,
|
||||||
'skip': 'Requires Linux Academy account credentials',
|
'skip': 'Requires Linux Academy account credentials',
|
||||||
|
@ -74,6 +83,7 @@ class LinuxAcademyIE(InfoExtractor):
|
||||||
self._AUTHORIZE_URL, None, 'Downloading authorize page', query={
|
self._AUTHORIZE_URL, None, 'Downloading authorize page', query={
|
||||||
'client_id': self._CLIENT_ID,
|
'client_id': self._CLIENT_ID,
|
||||||
'response_type': 'token id_token',
|
'response_type': 'token id_token',
|
||||||
|
'response_mode': 'web_message',
|
||||||
'redirect_uri': self._ORIGIN_URL,
|
'redirect_uri': self._ORIGIN_URL,
|
||||||
'scope': 'openid email user_impersonation profile',
|
'scope': 'openid email user_impersonation profile',
|
||||||
'audience': self._ORIGIN_URL,
|
'audience': self._ORIGIN_URL,
|
||||||
|
@ -129,7 +139,13 @@ class LinuxAcademyIE(InfoExtractor):
|
||||||
|
|
||||||
access_token = self._search_regex(
|
access_token = self._search_regex(
|
||||||
r'access_token=([^=&]+)', urlh.geturl(),
|
r'access_token=([^=&]+)', urlh.geturl(),
|
||||||
'access token')
|
'access token', default=None)
|
||||||
|
if not access_token:
|
||||||
|
access_token = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'authorizationResponse\s*=\s*({.+?})\s*;', callback_page,
|
||||||
|
'authorization response'), None,
|
||||||
|
transform_source=js_to_json)['response']['access_token']
|
||||||
|
|
||||||
self._download_webpage(
|
self._download_webpage(
|
||||||
'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s'
|
'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s'
|
||||||
|
@ -144,30 +160,84 @@ class LinuxAcademyIE(InfoExtractor):
|
||||||
|
|
||||||
# course path
|
# course path
|
||||||
if course_id:
|
if course_id:
|
||||||
entries = [
|
module = self._parse_json(
|
||||||
self.url_result(
|
self._search_regex(
|
||||||
urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key())
|
r'window\.module\s*=\s*({.+?})\s*;', webpage, 'module'),
|
||||||
for lesson_url in orderedSet(re.findall(
|
item_id)
|
||||||
r'<a[^>]+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)',
|
entries = []
|
||||||
webpage))]
|
chapter_number = None
|
||||||
title = unescapeHTML(self._html_search_regex(
|
chapter = None
|
||||||
(r'class=["\']course-title["\'][^>]*>(?P<value>[^<]+)',
|
chapter_id = None
|
||||||
r'var\s+title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'),
|
for item in module['items']:
|
||||||
webpage, 'title', default=None, group='value'))
|
if not isinstance(item, dict):
|
||||||
description = unescapeHTML(self._html_search_regex(
|
continue
|
||||||
r'var\s+description\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
|
|
||||||
webpage, 'description', default=None, group='value'))
|
def type_field(key):
|
||||||
return self.playlist_result(entries, course_id, title, description)
|
return (try_get(item, lambda x: x['type'][key], compat_str) or '').lower()
|
||||||
|
type_fields = (type_field('name'), type_field('slug'))
|
||||||
|
# Move to next module section
|
||||||
|
if 'section' in type_fields:
|
||||||
|
chapter = item.get('course_name')
|
||||||
|
chapter_id = item.get('course_module')
|
||||||
|
chapter_number = 1 if not chapter_number else chapter_number + 1
|
||||||
|
continue
|
||||||
|
# Skip non-lessons
|
||||||
|
if 'lesson' not in type_fields:
|
||||||
|
continue
|
||||||
|
lesson_url = urljoin(url, item.get('url'))
|
||||||
|
if not lesson_url:
|
||||||
|
continue
|
||||||
|
title = item.get('title') or item.get('lesson_name')
|
||||||
|
description = item.get('md_desc') or clean_html(item.get('description')) or clean_html(item.get('text'))
|
||||||
|
entries.append({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': lesson_url,
|
||||||
|
'ie_key': LinuxAcademyIE.ie_key(),
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'timestamp': unified_timestamp(item.get('date')) or unified_timestamp(item.get('created_on')),
|
||||||
|
'duration': parse_duration(item.get('duration')),
|
||||||
|
'chapter': chapter,
|
||||||
|
'chapter_id': chapter_id,
|
||||||
|
'chapter_number': chapter_number,
|
||||||
|
})
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
'id': course_id,
|
||||||
|
'title': module.get('title'),
|
||||||
|
'description': module.get('md_desc') or clean_html(module.get('desc')),
|
||||||
|
'duration': parse_duration(module.get('duration')),
|
||||||
|
}
|
||||||
|
|
||||||
# single video path
|
# single video path
|
||||||
info = self._extract_jwplayer_data(
|
m3u8_url = self._parse_json(
|
||||||
webpage, item_id, require_title=False, m3u8_id='hls',)
|
self._search_regex(
|
||||||
title = self._search_regex(
|
r'player\.playlist\s*=\s*(\[.+?\])\s*;', webpage, 'playlist'),
|
||||||
|
item_id)[0]['file']
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls')
|
||||||
|
self._sort_formats(formats)
|
||||||
|
info = {
|
||||||
|
'id': item_id,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
lesson = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
(r'window\.lesson\s*=\s*({.+?})\s*;',
|
||||||
|
r'player\.lesson\s*=\s*({.+?})\s*;'),
|
||||||
|
webpage, 'lesson', default='{}'), item_id, fatal=False)
|
||||||
|
if lesson:
|
||||||
|
info.update({
|
||||||
|
'title': lesson.get('lesson_name'),
|
||||||
|
'description': lesson.get('md_desc') or clean_html(lesson.get('desc')),
|
||||||
|
'timestamp': unified_timestamp(lesson.get('date')) or unified_timestamp(lesson.get('created_on')),
|
||||||
|
'duration': parse_duration(lesson.get('duration')),
|
||||||
|
})
|
||||||
|
if not info.get('title'):
|
||||||
|
info['title'] = self._search_regex(
|
||||||
(r'>Lecture\s*:\s*(?P<value>[^<]+)',
|
(r'>Lecture\s*:\s*(?P<value>[^<]+)',
|
||||||
r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
|
||||||
'title', group='value')
|
'title', group='value')
|
||||||
info.update({
|
|
||||||
'id': item_id,
|
|
||||||
'title': title,
|
|
||||||
})
|
|
||||||
return info
|
return info
|
||||||
|
|
|
@ -5,28 +5,26 @@ import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
clean_html,
|
||||||
int_or_none,
|
merge_dicts,
|
||||||
parse_duration,
|
|
||||||
remove_end,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class LRTIE(InfoExtractor):
|
class LRTIE(InfoExtractor):
|
||||||
IE_NAME = 'lrt.lt'
|
IE_NAME = 'lrt.lt'
|
||||||
_VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?lrt\.lt(?P<path>/mediateka/irasas/(?P<id>[0-9]+))'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# m3u8 download
|
# m3u8 download
|
||||||
'url': 'http://www.lrt.lt/mediateka/irasas/54391/',
|
'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene',
|
||||||
'md5': 'fe44cf7e4ab3198055f2c598fc175cb0',
|
'md5': '85cb2bb530f31d91a9c65b479516ade4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '54391',
|
'id': '2000127261',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Septynios Kauno dienos',
|
'title': 'Greita ir gardu: Sicilijos įkvėpta klasikinių makaronų su baklažanais vakarienė',
|
||||||
'description': 'md5:24d84534c7dc76581e59f5689462411a',
|
'description': 'md5:ad7d985f51b0dc1489ba2d76d7ed47fa',
|
||||||
'duration': 1783,
|
'duration': 3035,
|
||||||
'view_count': int,
|
'timestamp': 1604079000,
|
||||||
'like_count': int,
|
'upload_date': '20201030',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# direct mp3 download
|
# direct mp3 download
|
||||||
|
@ -43,52 +41,35 @@ class LRTIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _extract_js_var(self, webpage, var_name, default):
|
||||||
|
return self._search_regex(
|
||||||
|
r'%s\s*=\s*(["\'])((?:(?!\1).)+)\1' % var_name,
|
||||||
|
webpage, var_name.replace('_', ' '), default, group=2)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
path, video_id = re.match(self._VALID_URL, url).groups()
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
title = remove_end(self._og_search_title(webpage), ' - LRT')
|
media_url = self._extract_js_var(webpage, 'main_url', path)
|
||||||
|
media = self._download_json(self._extract_js_var(
|
||||||
|
webpage, 'media_info_url',
|
||||||
|
'https://www.lrt.lt/servisai/stream_url/vod/media_info/'),
|
||||||
|
video_id, query={'url': media_url})
|
||||||
|
jw_data = self._parse_jwplayer_data(
|
||||||
|
media['playlist_item'], video_id, base_url=url)
|
||||||
|
|
||||||
formats = []
|
json_ld_data = self._search_json_ld(webpage, video_id)
|
||||||
for _, file_url in re.findall(
|
|
||||||
r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
|
tags = []
|
||||||
ext = determine_ext(file_url)
|
for tag in (media.get('tags') or []):
|
||||||
if ext not in ('m3u8', 'mp3'):
|
tag_name = tag.get('name')
|
||||||
|
if not tag_name:
|
||||||
continue
|
continue
|
||||||
# mp3 served as m3u8 produces stuttered media file
|
tags.append(tag_name)
|
||||||
if ext == 'm3u8' and '.mp3' in file_url:
|
|
||||||
continue
|
|
||||||
if ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
file_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
|
||||||
fatal=False))
|
|
||||||
elif ext == 'mp3':
|
|
||||||
formats.append({
|
|
||||||
'url': file_url,
|
|
||||||
'vcodec': 'none',
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
thumbnail = self._og_search_thumbnail(webpage)
|
clean_info = {
|
||||||
description = self._og_search_description(webpage)
|
'description': clean_html(media.get('content')),
|
||||||
duration = parse_duration(self._search_regex(
|
'tags': tags,
|
||||||
r'var\s+record_len\s*=\s*(["\'])(?P<duration>[0-9]+:[0-9]+:[0-9]+)\1',
|
|
||||||
webpage, 'duration', default=None, group='duration'))
|
|
||||||
|
|
||||||
view_count = int_or_none(self._html_search_regex(
|
|
||||||
r'<div[^>]+class=(["\']).*?record-desc-seen.*?\1[^>]*>(?P<count>.+?)</div>',
|
|
||||||
webpage, 'view count', fatal=False, group='count'))
|
|
||||||
like_count = int_or_none(self._search_regex(
|
|
||||||
r'<span[^>]+id=(["\'])flikesCount.*?\1>(?P<count>\d+)<',
|
|
||||||
webpage, 'like count', fatal=False, group='count'))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'formats': formats,
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'description': description,
|
|
||||||
'duration': duration,
|
|
||||||
'view_count': view_count,
|
|
||||||
'like_count': like_count,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return merge_dicts(clean_info, jw_data, json_ld_data)
|
||||||
|
|
|
@ -1,10 +1,16 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import merge_dicts
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
dict_get,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
parse_duration,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class MallTVIE(InfoExtractor):
|
class MallTVIE(InfoExtractor):
|
||||||
|
@ -17,7 +23,7 @@ class MallTVIE(InfoExtractor):
|
||||||
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
|
'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
|
||||||
'description': 'md5:25fc0ec42a72ba602b602c683fa29deb',
|
'description': 'md5:db7d5744a4bd4043d9d98324aa72ab35',
|
||||||
'duration': 216,
|
'duration': 216,
|
||||||
'timestamp': 1538870400,
|
'timestamp': 1538870400,
|
||||||
'upload_date': '20181007',
|
'upload_date': '20181007',
|
||||||
|
@ -37,20 +43,46 @@ class MallTVIE(InfoExtractor):
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
url, display_id, headers=self.geo_verification_headers())
|
url, display_id, headers=self.geo_verification_headers())
|
||||||
|
|
||||||
SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b'
|
video = self._parse_json(self._search_regex(
|
||||||
|
r'videoObject\s*=\s*JSON\.parse\(JSON\.stringify\(({.+?})\)\);',
|
||||||
|
webpage, 'video object'), display_id)
|
||||||
|
video_source = video['VideoSource']
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
SOURCE_RE, webpage, 'video id', group='id')
|
r'/([\da-z]+)/index\b', video_source, 'video id')
|
||||||
|
|
||||||
media = self._parse_html5_media_entries(
|
formats = self._extract_m3u8_formats(
|
||||||
url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id,
|
video_source + '.m3u8', video_id, 'mp4', 'm3u8_native')
|
||||||
m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0]
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
for s in (video.get('Subtitles') or {}):
|
||||||
|
s_url = s.get('Url')
|
||||||
|
if not s_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(s.get('Language') or 'cz', []).append({
|
||||||
|
'url': s_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
entity_counts = video.get('EntityCounts') or {}
|
||||||
|
|
||||||
|
def get_count(k):
|
||||||
|
v = entity_counts.get(k + 's') or {}
|
||||||
|
return int_or_none(dict_get(v, ('Count', 'StrCount')))
|
||||||
|
|
||||||
info = self._search_json_ld(webpage, video_id, default={})
|
info = self._search_json_ld(webpage, video_id, default={})
|
||||||
|
|
||||||
return merge_dicts(media, info, {
|
return merge_dicts({
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': self._og_search_title(webpage, default=None) or display_id,
|
'title': video.get('Title'),
|
||||||
'description': self._og_search_description(webpage, default=None),
|
'description': clean_html(video.get('Description')),
|
||||||
'thumbnail': self._og_search_thumbnail(webpage, default=None),
|
'thumbnail': video.get('ThumbnailUrl'),
|
||||||
})
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'duration': int_or_none(video.get('DurationSeconds')) or parse_duration(video.get('Duration')),
|
||||||
|
'view_count': get_count('View'),
|
||||||
|
'like_count': get_count('Like'),
|
||||||
|
'dislike_count': get_count('Dislike'),
|
||||||
|
'average_rating': float_or_none(try_get(video, lambda x: x['EntityRating']['AvarageRate'])),
|
||||||
|
'comment_count': get_count('Comment'),
|
||||||
|
}, info)
|
||||||
|
|
|
@ -2,12 +2,16 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_urlparse
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
url_or_none,
|
||||||
xpath_text,
|
xpath_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -16,6 +20,8 @@ class MDRIE(InfoExtractor):
|
||||||
IE_DESC = 'MDR.DE and KiKA'
|
IE_DESC = 'MDR.DE and KiKA'
|
||||||
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
|
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
|
||||||
|
|
||||||
|
_GEO_COUNTRIES = ['DE']
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# MDR regularly deletes its videos
|
# MDR regularly deletes its videos
|
||||||
'url': 'http://www.mdr.de/fakt/video189002.html',
|
'url': 'http://www.mdr.de/fakt/video189002.html',
|
||||||
|
@ -66,6 +72,22 @@ class MDRIE(InfoExtractor):
|
||||||
'duration': 3239,
|
'duration': 3239,
|
||||||
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# empty bitrateVideo and bitrateAudio
|
||||||
|
'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '128372',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Der kleine Wichtel kehrt zurück',
|
||||||
|
'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
|
||||||
|
'duration': 4876,
|
||||||
|
'timestamp': 1607823300,
|
||||||
|
'upload_date': '20201213',
|
||||||
|
'uploader': 'ZDF',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
|
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -91,10 +113,13 @@ class MDRIE(InfoExtractor):
|
||||||
|
|
||||||
title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True)
|
title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True)
|
||||||
|
|
||||||
|
type_ = xpath_text(doc, './type', default=None)
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
processed_urls = []
|
processed_urls = []
|
||||||
for asset in doc.findall('./assets/asset'):
|
for asset in doc.findall('./assets/asset'):
|
||||||
for source in (
|
for source in (
|
||||||
|
'download',
|
||||||
'progressiveDownload',
|
'progressiveDownload',
|
||||||
'dynamicHttpStreamingRedirector',
|
'dynamicHttpStreamingRedirector',
|
||||||
'adaptiveHttpStreamingRedirector'):
|
'adaptiveHttpStreamingRedirector'):
|
||||||
|
@ -102,63 +127,49 @@ class MDRIE(InfoExtractor):
|
||||||
if url_el is None:
|
if url_el is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
video_url = url_el.text
|
video_url = url_or_none(url_el.text)
|
||||||
if video_url in processed_urls:
|
if not video_url or video_url in processed_urls:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
processed_urls.append(video_url)
|
processed_urls.append(video_url)
|
||||||
|
|
||||||
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
|
ext = determine_ext(video_url)
|
||||||
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
|
|
||||||
|
|
||||||
ext = determine_ext(url_el.text)
|
|
||||||
if ext == 'm3u8':
|
if ext == 'm3u8':
|
||||||
url_formats = self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
preference=0, m3u8_id='HLS', fatal=False)
|
preference=0, m3u8_id='HLS', fatal=False))
|
||||||
elif ext == 'f4m':
|
elif ext == 'f4m':
|
||||||
url_formats = self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
|
video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
|
||||||
preference=0, f4m_id='HDS', fatal=False)
|
preference=0, f4m_id='HDS', fatal=False))
|
||||||
else:
|
else:
|
||||||
media_type = xpath_text(asset, './mediaType', 'media type', default='MP4')
|
media_type = xpath_text(asset, './mediaType', 'media type', default='MP4')
|
||||||
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
|
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
|
||||||
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
|
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
|
||||||
filesize = int_or_none(xpath_text(asset, './fileSize', 'file size'))
|
filesize = int_or_none(xpath_text(asset, './fileSize', 'file size'))
|
||||||
|
|
||||||
|
format_id = [media_type]
|
||||||
|
if vbr or abr:
|
||||||
|
format_id.append(compat_str(vbr or abr))
|
||||||
|
|
||||||
f = {
|
f = {
|
||||||
'url': video_url,
|
'url': video_url,
|
||||||
'format_id': '%s-%d' % (media_type, vbr or abr),
|
'format_id': '-'.join(format_id),
|
||||||
'filesize': filesize,
|
'filesize': filesize,
|
||||||
'abr': abr,
|
'abr': abr,
|
||||||
'preference': 1,
|
'vbr': vbr,
|
||||||
}
|
}
|
||||||
|
|
||||||
if vbr:
|
if vbr:
|
||||||
width = int_or_none(xpath_text(asset, './frameWidth', 'width'))
|
|
||||||
height = int_or_none(xpath_text(asset, './frameHeight', 'height'))
|
|
||||||
f.update({
|
f.update({
|
||||||
'vbr': vbr,
|
'width': int_or_none(xpath_text(asset, './frameWidth', 'width')),
|
||||||
'width': width,
|
'height': int_or_none(xpath_text(asset, './frameHeight', 'height')),
|
||||||
'height': height,
|
|
||||||
})
|
})
|
||||||
|
|
||||||
url_formats = [f]
|
if type_ == 'audio':
|
||||||
|
f['vcodec'] = 'none'
|
||||||
|
|
||||||
if not url_formats:
|
formats.append(f)
|
||||||
continue
|
|
||||||
|
|
||||||
if not vbr:
|
|
||||||
for f in url_formats:
|
|
||||||
abr = f.get('tbr') or abr
|
|
||||||
if 'tbr' in f:
|
|
||||||
del f['tbr']
|
|
||||||
f.update({
|
|
||||||
'abr': abr,
|
|
||||||
'vcodec': 'none',
|
|
||||||
})
|
|
||||||
|
|
||||||
formats.extend(url_formats)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
|
131
haruhi_dl/extractor/medaltv.py
Normal file
131
haruhi_dl/extractor/medaltv.py
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MedalTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr',
|
||||||
|
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '34934644',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Quad Cold',
|
||||||
|
'description': 'Medal,https://medal.tv/desktop/',
|
||||||
|
'uploader': 'MowgliSB',
|
||||||
|
'timestamp': 1603165266,
|
||||||
|
'upload_date': '20201020',
|
||||||
|
'uploader_id': 10619174,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://medal.tv/clips/36787208',
|
||||||
|
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '36787208',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'u tk me i tk u bigger',
|
||||||
|
'description': 'Medal,https://medal.tv/desktop/',
|
||||||
|
'uploader': 'Mimicc',
|
||||||
|
'timestamp': 1605580939,
|
||||||
|
'upload_date': '20201117',
|
||||||
|
'uploader_id': 5156321,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
hydration_data = self._parse_json(self._search_regex(
|
||||||
|
r'<script[^>]*>\s*(?:var\s*)?hydrationData\s*=\s*({.+?})\s*</script>',
|
||||||
|
webpage, 'hydration data', default='{}'), video_id)
|
||||||
|
|
||||||
|
clip = try_get(
|
||||||
|
hydration_data, lambda x: x['clips'][video_id], dict) or {}
|
||||||
|
if not clip:
|
||||||
|
raise ExtractorError(
|
||||||
|
'Could not find video information.', video_id=video_id)
|
||||||
|
|
||||||
|
title = clip['contentTitle']
|
||||||
|
|
||||||
|
source_width = int_or_none(clip.get('sourceWidth'))
|
||||||
|
source_height = int_or_none(clip.get('sourceHeight'))
|
||||||
|
|
||||||
|
aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9
|
||||||
|
|
||||||
|
def add_item(container, item_url, height, id_key='format_id', item_id=None):
|
||||||
|
item_id = item_id or '%dp' % height
|
||||||
|
if item_id not in item_url:
|
||||||
|
return
|
||||||
|
width = int(round(aspect_ratio * height))
|
||||||
|
container.append({
|
||||||
|
'url': item_url,
|
||||||
|
id_key: item_id,
|
||||||
|
'width': width,
|
||||||
|
'height': height
|
||||||
|
})
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
thumbnails = []
|
||||||
|
for k, v in clip.items():
|
||||||
|
if not (v and isinstance(v, compat_str)):
|
||||||
|
continue
|
||||||
|
mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k)
|
||||||
|
if not mobj:
|
||||||
|
continue
|
||||||
|
prefix = mobj.group(1)
|
||||||
|
height = int_or_none(mobj.group(2))
|
||||||
|
if prefix == 'contentUrl':
|
||||||
|
add_item(
|
||||||
|
formats, v, height or source_height,
|
||||||
|
item_id=None if height else 'source')
|
||||||
|
elif prefix == 'thumbnail':
|
||||||
|
add_item(thumbnails, v, height, 'id')
|
||||||
|
|
||||||
|
error = clip.get('error')
|
||||||
|
if not formats and error:
|
||||||
|
if error == 404:
|
||||||
|
raise ExtractorError(
|
||||||
|
'That clip does not exist.',
|
||||||
|
expected=True, video_id=video_id)
|
||||||
|
else:
|
||||||
|
raise ExtractorError(
|
||||||
|
'An unknown error occurred ({0}).'.format(error),
|
||||||
|
video_id=video_id)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
# Necessary because the id of the author is not known in advance.
|
||||||
|
# Won't raise an issue if no profile can be found as this is optional.
|
||||||
|
author = try_get(
|
||||||
|
hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
|
||||||
|
author_id = str_or_none(author.get('id'))
|
||||||
|
author_url = 'https://medal.tv/users/{0}'.format(author_id) if author_id else None
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'description': clip.get('contentDescription'),
|
||||||
|
'uploader': author.get('displayName'),
|
||||||
|
'timestamp': float_or_none(clip.get('created'), 1000),
|
||||||
|
'uploader_id': author_id,
|
||||||
|
'uploader_url': author_url,
|
||||||
|
'duration': int_or_none(clip.get('videoLengthSeconds')),
|
||||||
|
'view_count': int_or_none(clip.get('views')),
|
||||||
|
'like_count': int_or_none(clip.get('likes')),
|
||||||
|
'comment_count': int_or_none(clip.get('comments')),
|
||||||
|
}
|
|
@ -2,268 +2,113 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .gigya import GigyaBaseIE
|
from .common import InfoExtractor
|
||||||
|
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_duration,
|
mimetype2ext,
|
||||||
try_get,
|
parse_iso8601,
|
||||||
unified_timestamp,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MedialaanIE(GigyaBaseIE):
|
class MedialaanIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:www\.|nieuws\.)?
|
|
||||||
(?:
|
(?:
|
||||||
(?P<site_id>vtm|q2|vtmkzoom)\.be/
|
(?:embed\.)?mychannels.video/embed/|
|
||||||
|
embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/|
|
||||||
|
(?:www\.)?(?:
|
||||||
(?:
|
(?:
|
||||||
video(?:/[^/]+/id/|/?\?.*?\baid=)|
|
7sur7|
|
||||||
(?:[^/]+/)*
|
demorgen|
|
||||||
|
hln|
|
||||||
|
joe|
|
||||||
|
qmusic
|
||||||
|
)\.be|
|
||||||
|
(?:
|
||||||
|
[abe]d|
|
||||||
|
bndestem|
|
||||||
|
destentor|
|
||||||
|
gelderlander|
|
||||||
|
pzc|
|
||||||
|
tubantia|
|
||||||
|
volkskrant
|
||||||
|
)\.nl
|
||||||
|
)/video/(?:[^/]+/)*[^/?&#]+~p
|
||||||
)
|
)
|
||||||
)
|
(?P<id>\d+)
|
||||||
(?P<id>[^/?#&]+)
|
|
||||||
'''
|
'''
|
||||||
_NETRC_MACHINE = 'medialaan'
|
|
||||||
_APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
|
|
||||||
_SITE_TO_APP_ID = {
|
|
||||||
'vtm': 'vtm_watch',
|
|
||||||
'q2': 'q2',
|
|
||||||
'vtmkzoom': 'vtmkzoom',
|
|
||||||
}
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# vod
|
'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993',
|
||||||
'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'vtm_20170219_VM0678361_vtmwatch',
|
'id': '193993',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Allemaal Chris afl. 6',
|
'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
|
||||||
'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2',
|
'timestamp': 1611663540,
|
||||||
'timestamp': 1487533280,
|
'upload_date': '20210126',
|
||||||
'upload_date': '20170219',
|
'duration': 238,
|
||||||
'duration': 2562,
|
|
||||||
'series': 'Allemaal Chris',
|
|
||||||
'season': 'Allemaal Chris',
|
|
||||||
'season_number': 1,
|
|
||||||
'season_id': '256936078124527',
|
|
||||||
'episode': 'Allemaal Chris afl. 6',
|
|
||||||
'episode_number': 6,
|
|
||||||
'episode_id': '256936078591527',
|
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'skip': 'Requires account credentials',
|
|
||||||
}, {
|
}, {
|
||||||
# clip
|
'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
|
||||||
'url': 'http://vtm.be/video?aid=168332',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '168332',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '"Veronique liegt!"',
|
|
||||||
'description': 'md5:1385e2b743923afe54ba4adc38476155',
|
|
||||||
'timestamp': 1489002029,
|
|
||||||
'upload_date': '20170308',
|
|
||||||
'duration': 96,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
# vod
|
|
||||||
'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# vod
|
'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default',
|
||||||
'url': 'http://vtm.be/video?aid=163157',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# vod
|
'url': 'https://embed.mychannels.video/script/production/193993',
|
||||||
'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# clip
|
'url': 'https://embed.mychannels.video/production/193993',
|
||||||
'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# http/s redirect
|
'url': 'https://mychannels.video/embed/193993',
|
||||||
'url': 'https://vtmkzoom.be/video?aid=45724',
|
'only_matching': True,
|
||||||
'info_dict': {
|
|
||||||
'id': '257136373657000',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'K3 Dansstudio Ushuaia afl.6',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'skip': 'Requires account credentials',
|
|
||||||
}, {
|
}, {
|
||||||
# nieuws.vtm.be
|
'url': 'https://embed.mychannels.video/embed/193993',
|
||||||
'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_initialize(self):
|
@staticmethod
|
||||||
self._logged_in = False
|
def _extract_urls(webpage, **kw):
|
||||||
|
entries = []
|
||||||
def _login(self):
|
for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
|
||||||
username, password = self._get_login_info()
|
mychannels_id = extract_attributes(element).get('data-mychannels-id')
|
||||||
if username is None:
|
if mychannels_id:
|
||||||
self.raise_login_required()
|
entries.append('https://mychannels.video/embed/' + mychannels_id)
|
||||||
|
return entries
|
||||||
auth_data = {
|
|
||||||
'APIKey': self._APIKEY,
|
|
||||||
'sdk': 'js_6.1',
|
|
||||||
'format': 'json',
|
|
||||||
'loginID': username,
|
|
||||||
'password': password,
|
|
||||||
}
|
|
||||||
|
|
||||||
auth_info = self._gigya_login(auth_data)
|
|
||||||
|
|
||||||
self._uid = auth_info['UID']
|
|
||||||
self._uid_signature = auth_info['UIDSignature']
|
|
||||||
self._signature_timestamp = auth_info['signatureTimestamp']
|
|
||||||
|
|
||||||
self._logged_in = True
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
production_id = self._match_id(url)
|
||||||
video_id, site_id = mobj.group('id', 'site_id')
|
production = self._download_json(
|
||||||
|
'https://embed.mychannels.video/sdk/production/' + production_id,
|
||||||
|
production_id, query={'options': 'UUUU_default'})['productions'][0]
|
||||||
|
title = production['title']
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
formats = []
|
||||||
|
for source in (production.get('sources') or []):
|
||||||
config = self._parse_json(
|
src = source.get('src')
|
||||||
self._search_regex(
|
if not src:
|
||||||
r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);',
|
continue
|
||||||
webpage, 'config', default='{}'), video_id,
|
ext = mimetype2ext(source.get('type'))
|
||||||
transform_source=lambda s: s.replace(
|
if ext == 'm3u8':
|
||||||
'\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'"))
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
src, production_id, 'mp4', 'm3u8_native',
|
||||||
vod_id = config.get('vodId') or self._search_regex(
|
m3u8_id='hls', fatal=False))
|
||||||
(r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
|
|
||||||
r'"vodId"\s*:\s*"(.+?)"',
|
|
||||||
r'<[^>]+id=["\']vod-(\d+)'),
|
|
||||||
webpage, 'video_id', default=None)
|
|
||||||
|
|
||||||
# clip, no authentication required
|
|
||||||
if not vod_id:
|
|
||||||
player = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'vmmaplayer\(({.+?})\);', webpage, 'vmma player',
|
|
||||||
default=''),
|
|
||||||
video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
|
|
||||||
if player:
|
|
||||||
video = player[-1]
|
|
||||||
if video['videoUrl'] in ('http', 'https'):
|
|
||||||
return self.url_result(video['url'], MedialaanIE.ie_key())
|
|
||||||
info = {
|
|
||||||
'id': video_id,
|
|
||||||
'url': video['videoUrl'],
|
|
||||||
'title': video['title'],
|
|
||||||
'thumbnail': video.get('imageUrl'),
|
|
||||||
'timestamp': int_or_none(video.get('createdDate')),
|
|
||||||
'duration': int_or_none(video.get('duration')),
|
|
||||||
}
|
|
||||||
else:
|
else:
|
||||||
info = self._parse_html5_media_entries(
|
formats.append({
|
||||||
url, webpage, video_id, m3u8_id='hls')[0]
|
'ext': ext,
|
||||||
info.update({
|
'url': src,
|
||||||
'id': video_id,
|
|
||||||
'title': self._html_search_meta('description', webpage),
|
|
||||||
'duration': parse_duration(self._html_search_meta('duration', webpage)),
|
|
||||||
})
|
})
|
||||||
# vod, authentication required
|
|
||||||
else:
|
|
||||||
if not self._logged_in:
|
|
||||||
self._login()
|
|
||||||
|
|
||||||
settings = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
|
|
||||||
webpage, 'drupal settings', default='{}'),
|
|
||||||
video_id)
|
|
||||||
|
|
||||||
def get(container, item):
|
|
||||||
return try_get(
|
|
||||||
settings, lambda x: x[container][item],
|
|
||||||
compat_str) or self._search_regex(
|
|
||||||
r'"%s"\s*:\s*"([^"]+)' % item, webpage, item,
|
|
||||||
default=None)
|
|
||||||
|
|
||||||
app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch')
|
|
||||||
sso = get('vod', 'gigyaDatabase') or 'vtm-sso'
|
|
||||||
|
|
||||||
data = self._download_json(
|
|
||||||
'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id,
|
|
||||||
video_id, query={
|
|
||||||
'app_id': app_id,
|
|
||||||
'user_network': sso,
|
|
||||||
'UID': self._uid,
|
|
||||||
'UIDSignature': self._uid_signature,
|
|
||||||
'signatureTimestamp': self._signature_timestamp,
|
|
||||||
})
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(
|
|
||||||
data['response']['uri'], video_id, entry_protocol='m3u8_native',
|
|
||||||
ext='mp4', m3u8_id='hls')
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
info = {
|
return {
|
||||||
'id': vod_id,
|
'id': production_id,
|
||||||
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'thumbnail': production.get('posterUrl'),
|
||||||
|
'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
|
||||||
|
'duration': int_or_none(production.get('duration')) or None,
|
||||||
}
|
}
|
||||||
|
|
||||||
api_key = get('vod', 'apiKey')
|
|
||||||
channel = get('medialaanGigya', 'channel')
|
|
||||||
|
|
||||||
if api_key:
|
|
||||||
videos = self._download_json(
|
|
||||||
'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False,
|
|
||||||
query={
|
|
||||||
'channels': channel,
|
|
||||||
'ids': vod_id,
|
|
||||||
'limit': 1,
|
|
||||||
'apikey': api_key,
|
|
||||||
})
|
|
||||||
if videos:
|
|
||||||
video = try_get(
|
|
||||||
videos, lambda x: x['response']['videos'][0], dict)
|
|
||||||
if video:
|
|
||||||
def get(container, item, expected_type=None):
|
|
||||||
return try_get(
|
|
||||||
video, lambda x: x[container][item], expected_type)
|
|
||||||
|
|
||||||
def get_string(container, item):
|
|
||||||
return get(container, item, compat_str)
|
|
||||||
|
|
||||||
info.update({
|
|
||||||
'series': get_string('program', 'title'),
|
|
||||||
'season': get_string('season', 'title'),
|
|
||||||
'season_number': int_or_none(get('season', 'number')),
|
|
||||||
'season_id': get_string('season', 'id'),
|
|
||||||
'episode': get_string('episode', 'title'),
|
|
||||||
'episode_number': int_or_none(get('episode', 'number')),
|
|
||||||
'episode_id': get_string('episode', 'id'),
|
|
||||||
'duration': int_or_none(
|
|
||||||
video.get('duration')) or int_or_none(
|
|
||||||
video.get('durationMillis'), scale=1000),
|
|
||||||
'title': get_string('episode', 'title'),
|
|
||||||
'description': get_string('episode', 'text'),
|
|
||||||
'timestamp': unified_timestamp(get_string(
|
|
||||||
'publication', 'begin')),
|
|
||||||
})
|
|
||||||
|
|
||||||
if not info.get('title'):
|
|
||||||
info['title'] = try_get(
|
|
||||||
config, lambda x: x['videoConfig']['title'],
|
|
||||||
compat_str) or self._html_search_regex(
|
|
||||||
r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title',
|
|
||||||
default=None) or self._og_search_title(webpage)
|
|
||||||
|
|
||||||
if not info.get('description'):
|
|
||||||
info['description'] = self._html_search_regex(
|
|
||||||
r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
|
|
||||||
webpage, 'description', default=None)
|
|
||||||
|
|
||||||
return info
|
|
||||||
|
|
|
@ -23,7 +23,7 @@ class MediasetIE(ThePlatformBaseIE):
|
||||||
https?://
|
https?://
|
||||||
(?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
|
(?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
|
||||||
(?:
|
(?:
|
||||||
(?:video|on-demand)/(?:[^/]+/)+[^/]+_|
|
(?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
|
||||||
player/index\.html\?.*?\bprogramGuid=
|
player/index\.html\?.*?\bprogramGuid=
|
||||||
)
|
)
|
||||||
)(?P<id>[0-9A-Z]{16,})
|
)(?P<id>[0-9A-Z]{16,})
|
||||||
|
@ -88,6 +88,9 @@ class MediasetIE(ThePlatformBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135',
|
'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.mediasetplay.mediaset.it/movie/herculeslaleggendahainizio/hercules-la-leggenda-ha-inizio_F305927501000102',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
@ -17,9 +17,8 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class MGTVIE(InfoExtractor):
|
class MGTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
_VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
|
||||||
IE_DESC = '芒果TV'
|
IE_DESC = '芒果TV'
|
||||||
_GEO_COUNTRIES = ['CN']
|
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
|
||||||
|
@ -34,14 +33,18 @@ class MGTVIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.mgtv.com/b/301817/3826653.html',
|
'url': 'http://www.mgtv.com/b/301817/3826653.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://w.mgtv.com/b/301817/3826653.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
tk2 = base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1]
|
||||||
try:
|
try:
|
||||||
api_data = self._download_json(
|
api_data = self._download_json(
|
||||||
'https://pcweb.api.mgtv.com/player/video', video_id, query={
|
'https://pcweb.api.mgtv.com/player/video', video_id, query={
|
||||||
'tk2': base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1],
|
'tk2': tk2,
|
||||||
'video_id': video_id,
|
'video_id': video_id,
|
||||||
}, headers=self.geo_verification_headers())['data']
|
}, headers=self.geo_verification_headers())['data']
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
|
@ -56,6 +59,7 @@ class MGTVIE(InfoExtractor):
|
||||||
stream_data = self._download_json(
|
stream_data = self._download_json(
|
||||||
'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
|
'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
|
||||||
'pm2': api_data['atc']['pm2'],
|
'pm2': api_data['atc']['pm2'],
|
||||||
|
'tk2': tk2,
|
||||||
'video_id': video_id,
|
'video_id': video_id,
|
||||||
}, headers=self.geo_verification_headers())['data']
|
}, headers=self.geo_verification_headers())['data']
|
||||||
stream_domain = stream_data['stream_domain'][0]
|
stream_domain = stream_data['stream_domain'][0]
|
||||||
|
|
196
haruhi_dl/extractor/minds.py
Normal file
196
haruhi_dl/extractor/minds.py
Normal file
|
@ -0,0 +1,196 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
strip_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MindsBaseIE(InfoExtractor):
|
||||||
|
_VALID_URL_BASE = r'https?://(?:www\.)?minds\.com/'
|
||||||
|
|
||||||
|
def _call_api(self, path, video_id, resource, query=None):
|
||||||
|
api_url = 'https://www.minds.com/api/' + path
|
||||||
|
token = self._get_cookies(api_url).get('XSRF-TOKEN')
|
||||||
|
return self._download_json(
|
||||||
|
api_url, video_id, 'Downloading %s JSON metadata' % resource, headers={
|
||||||
|
'Referer': 'https://www.minds.com/',
|
||||||
|
'X-XSRF-TOKEN': token.value if token else '',
|
||||||
|
}, query=query)
|
||||||
|
|
||||||
|
|
||||||
|
class MindsIE(MindsBaseIE):
|
||||||
|
IE_NAME = 'minds'
|
||||||
|
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?:media|newsfeed|archive/view)/(?P<id>[0-9]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.minds.com/media/100000000000086822',
|
||||||
|
'md5': '215a658184a419764852239d4970b045',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100000000000086822',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Minds intro sequence',
|
||||||
|
'thumbnail': r're:https?://.+\.png',
|
||||||
|
'uploader_id': 'ottman',
|
||||||
|
'upload_date': '20130524',
|
||||||
|
'timestamp': 1369404826,
|
||||||
|
'uploader': 'Bill Ottman',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'dislike_count': int,
|
||||||
|
'tags': ['animation'],
|
||||||
|
'comment_count': int,
|
||||||
|
'license': 'attribution-cc',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# entity.type == 'activity' and empty title
|
||||||
|
'url': 'https://www.minds.com/newsfeed/798025111988506624',
|
||||||
|
'md5': 'b2733a74af78d7fd3f541c4cbbaa5950',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '798022190320226304',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '798022190320226304',
|
||||||
|
'uploader': 'ColinFlaherty',
|
||||||
|
'upload_date': '20180111',
|
||||||
|
'timestamp': 1515639316,
|
||||||
|
'uploader_id': 'ColinFlaherty',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.minds.com/archive/view/715172106794442752',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# youtube perma_url
|
||||||
|
'url': 'https://www.minds.com/newsfeed/1197131838022602752',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
entity_id = self._match_id(url)
|
||||||
|
entity = self._call_api(
|
||||||
|
'v1/entities/entity/' + entity_id, entity_id, 'entity')['entity']
|
||||||
|
if entity.get('type') == 'activity':
|
||||||
|
if entity.get('custom_type') == 'video':
|
||||||
|
video_id = entity['entity_guid']
|
||||||
|
else:
|
||||||
|
return self.url_result(entity['perma_url'])
|
||||||
|
else:
|
||||||
|
assert(entity['subtype'] == 'video')
|
||||||
|
video_id = entity_id
|
||||||
|
# 1080p and webm formats available only on the sources array
|
||||||
|
video = self._call_api(
|
||||||
|
'v2/media/video/' + video_id, video_id, 'video')
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for source in (video.get('sources') or []):
|
||||||
|
src = source.get('src')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'format_id': source.get('label'),
|
||||||
|
'height': int_or_none(source.get('size')),
|
||||||
|
'url': src,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
entity = video.get('entity') or entity
|
||||||
|
owner = entity.get('ownerObj') or {}
|
||||||
|
uploader_id = owner.get('username')
|
||||||
|
|
||||||
|
tags = entity.get('tags')
|
||||||
|
if tags and isinstance(tags, compat_str):
|
||||||
|
tags = [tags]
|
||||||
|
|
||||||
|
thumbnail = None
|
||||||
|
poster = video.get('poster') or entity.get('thumbnail_src')
|
||||||
|
if poster:
|
||||||
|
urlh = self._request_webpage(poster, video_id, fatal=False)
|
||||||
|
if urlh:
|
||||||
|
thumbnail = urlh.geturl()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': entity.get('title') or video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'description': clean_html(entity.get('description')) or None,
|
||||||
|
'license': str_or_none(entity.get('license')),
|
||||||
|
'timestamp': int_or_none(entity.get('time_created')),
|
||||||
|
'uploader': strip_or_none(owner.get('name')),
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'uploader_url': 'https://www.minds.com/' + uploader_id if uploader_id else None,
|
||||||
|
'view_count': int_or_none(entity.get('play:count')),
|
||||||
|
'like_count': int_or_none(entity.get('thumbs:up:count')),
|
||||||
|
'dislike_count': int_or_none(entity.get('thumbs:down:count')),
|
||||||
|
'tags': tags,
|
||||||
|
'comment_count': int_or_none(entity.get('comments:count')),
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MindsFeedBaseIE(MindsBaseIE):
|
||||||
|
_PAGE_SIZE = 150
|
||||||
|
|
||||||
|
def _entries(self, feed_id):
|
||||||
|
query = {'limit': self._PAGE_SIZE, 'sync': 1}
|
||||||
|
i = 1
|
||||||
|
while True:
|
||||||
|
data = self._call_api(
|
||||||
|
'v2/feeds/container/%s/videos' % feed_id,
|
||||||
|
feed_id, 'page %s' % i, query)
|
||||||
|
entities = data.get('entities') or []
|
||||||
|
for entity in entities:
|
||||||
|
guid = entity.get('guid')
|
||||||
|
if not guid:
|
||||||
|
continue
|
||||||
|
yield self.url_result(
|
||||||
|
'https://www.minds.com/newsfeed/' + guid,
|
||||||
|
MindsIE.ie_key(), guid)
|
||||||
|
query['from_timestamp'] = data['load-next']
|
||||||
|
if not (query['from_timestamp'] and len(entities) == self._PAGE_SIZE):
|
||||||
|
break
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
feed_id = self._match_id(url)
|
||||||
|
feed = self._call_api(
|
||||||
|
'v1/%s/%s' % (self._FEED_PATH, feed_id),
|
||||||
|
feed_id, self._FEED_TYPE)[self._FEED_TYPE]
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
self._entries(feed['guid']), feed_id,
|
||||||
|
strip_or_none(feed.get('name')),
|
||||||
|
feed.get('briefdescription'))
|
||||||
|
|
||||||
|
|
||||||
|
class MindsChannelIE(MindsFeedBaseIE):
|
||||||
|
_FEED_TYPE = 'channel'
|
||||||
|
IE_NAME = 'minds:' + _FEED_TYPE
|
||||||
|
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?!(?:newsfeed|media|api|archive|groups)/)(?P<id>[^/?&#]+)'
|
||||||
|
_FEED_PATH = 'channel'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.minds.com/ottman',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'ottman',
|
||||||
|
'title': 'Bill Ottman',
|
||||||
|
'description': 'Co-creator & CEO @minds',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 54,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MindsGroupIE(MindsFeedBaseIE):
|
||||||
|
_FEED_TYPE = 'group'
|
||||||
|
IE_NAME = 'minds:' + _FEED_TYPE
|
||||||
|
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'groups/profile/(?P<id>[0-9]+)'
|
||||||
|
_FEED_PATH = 'groups/group'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.minds.com/groups/profile/785582576369672204/feed/videos',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '785582576369672204',
|
||||||
|
'title': 'Cooking Videos',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1,
|
||||||
|
}
|
|
@ -1,15 +1,14 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .telecinco import TelecincoIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
smuggle_url,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class MiTeleIE(InfoExtractor):
|
class MiTeleIE(TelecincoIE):
|
||||||
IE_DESC = 'mitele.es'
|
IE_DESC = 'mitele.es'
|
||||||
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
|
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
|
||||||
|
|
||||||
|
@ -31,7 +30,6 @@ class MiTeleIE(InfoExtractor):
|
||||||
'timestamp': 1471209401,
|
'timestamp': 1471209401,
|
||||||
'upload_date': '20160814',
|
'upload_date': '20160814',
|
||||||
},
|
},
|
||||||
'add_ie': ['Ooyala'],
|
|
||||||
}, {
|
}, {
|
||||||
# no explicit title
|
# no explicit title
|
||||||
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
|
'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
|
||||||
|
@ -54,7 +52,6 @@ class MiTeleIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
'add_ie': ['Ooyala'],
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
|
'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -70,16 +67,11 @@ class MiTeleIE(InfoExtractor):
|
||||||
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})',
|
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})',
|
||||||
webpage, 'Pre Player'), display_id)['prePlayer']
|
webpage, 'Pre Player'), display_id)['prePlayer']
|
||||||
title = pre_player['title']
|
title = pre_player['title']
|
||||||
video = pre_player['video']
|
video_info = self._parse_content(pre_player['video'], url)
|
||||||
video_id = video['dataMediaId']
|
|
||||||
content = pre_player.get('content') or {}
|
content = pre_player.get('content') or {}
|
||||||
info = content.get('info') or {}
|
info = content.get('info') or {}
|
||||||
|
|
||||||
return {
|
video_info.update({
|
||||||
'_type': 'url_transparent',
|
|
||||||
# for some reason only HLS is supported
|
|
||||||
'url': smuggle_url('ooyala:' + video_id, {'supportedformats': 'm3u8,dash'}),
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': info.get('synopsis'),
|
'description': info.get('synopsis'),
|
||||||
'series': content.get('title'),
|
'series': content.get('title'),
|
||||||
|
@ -87,7 +79,7 @@ class MiTeleIE(InfoExtractor):
|
||||||
'episode': content.get('subtitle'),
|
'episode': content.get('subtitle'),
|
||||||
'episode_number': int_or_none(info.get('episode_number')),
|
'episode_number': int_or_none(info.get('episode_number')),
|
||||||
'duration': int_or_none(info.get('duration')),
|
'duration': int_or_none(info.get('duration')),
|
||||||
'thumbnail': video.get('dataPoster'),
|
|
||||||
'age_limit': int_or_none(info.get('rating')),
|
'age_limit': int_or_none(info.get('rating')),
|
||||||
'timestamp': parse_iso8601(pre_player.get('publishedTime')),
|
'timestamp': parse_iso8601(pre_player.get('publishedTime')),
|
||||||
}
|
})
|
||||||
|
return video_info
|
||||||
|
|
|
@ -251,8 +251,11 @@ class MixcloudPlaylistBaseIE(MixcloudBaseIE):
|
||||||
cloudcast_url = cloudcast.get('url')
|
cloudcast_url = cloudcast.get('url')
|
||||||
if not cloudcast_url:
|
if not cloudcast_url:
|
||||||
continue
|
continue
|
||||||
|
slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
|
||||||
|
owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
|
||||||
|
video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None
|
||||||
entries.append(self.url_result(
|
entries.append(self.url_result(
|
||||||
cloudcast_url, MixcloudIE.ie_key(), cloudcast.get('slug')))
|
cloudcast_url, MixcloudIE.ie_key(), video_id))
|
||||||
|
|
||||||
page_info = items['pageInfo']
|
page_info = items['pageInfo']
|
||||||
has_next_page = page_info['hasNextPage']
|
has_next_page = page_info['hasNextPage']
|
||||||
|
@ -321,7 +324,8 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
|
||||||
_DESCRIPTION_KEY = 'biog'
|
_DESCRIPTION_KEY = 'biog'
|
||||||
_ROOT_TYPE = 'user'
|
_ROOT_TYPE = 'user'
|
||||||
_NODE_TEMPLATE = '''slug
|
_NODE_TEMPLATE = '''slug
|
||||||
url'''
|
url
|
||||||
|
owner { username }'''
|
||||||
|
|
||||||
def _get_playlist_title(self, title, slug):
|
def _get_playlist_title(self, title, slug):
|
||||||
return '%s (%s)' % (title, slug)
|
return '%s (%s)' % (title, slug)
|
||||||
|
@ -345,6 +349,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
|
||||||
_NODE_TEMPLATE = '''cloudcast {
|
_NODE_TEMPLATE = '''cloudcast {
|
||||||
slug
|
slug
|
||||||
url
|
url
|
||||||
|
owner { username }
|
||||||
}'''
|
}'''
|
||||||
|
|
||||||
def _get_cloudcast(self, node):
|
def _get_cloudcast(self, node):
|
||||||
|
|
|
@ -61,6 +61,23 @@ class MotherlessIE(InfoExtractor):
|
||||||
# no keywords
|
# no keywords
|
||||||
'url': 'http://motherless.com/8B4BBC1',
|
'url': 'http://motherless.com/8B4BBC1',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# see https://motherless.com/videos/recent for recent videos with
|
||||||
|
# uploaded date in "ago" format
|
||||||
|
'url': 'https://motherless.com/3C3E2CF',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3C3E2CF',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'a/ Hot Teens',
|
||||||
|
'categories': list,
|
||||||
|
'upload_date': '20210104',
|
||||||
|
'uploader_id': 'yonbiw',
|
||||||
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
|
'age_limit': 18,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -85,20 +102,28 @@ class MotherlessIE(InfoExtractor):
|
||||||
or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
|
or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
|
||||||
age_limit = self._rta_search(webpage)
|
age_limit = self._rta_search(webpage)
|
||||||
view_count = str_to_int(self._html_search_regex(
|
view_count = str_to_int(self._html_search_regex(
|
||||||
(r'>(\d+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
|
(r'>([\d,.]+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
|
||||||
webpage, 'view count', fatal=False))
|
webpage, 'view count', fatal=False))
|
||||||
like_count = str_to_int(self._html_search_regex(
|
like_count = str_to_int(self._html_search_regex(
|
||||||
(r'>(\d+)\s+Favorites<', r'<strong>Favorited</strong>\s+([^<]+)<'),
|
(r'>([\d,.]+)\s+Favorites<',
|
||||||
|
r'<strong>Favorited</strong>\s+([^<]+)<'),
|
||||||
webpage, 'like count', fatal=False))
|
webpage, 'like count', fatal=False))
|
||||||
|
|
||||||
upload_date = self._html_search_regex(
|
upload_date = unified_strdate(self._search_regex(
|
||||||
(r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<',
|
r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', webpage,
|
||||||
r'<strong>Uploaded</strong>\s+([^<]+)<'), webpage, 'upload date')
|
'upload date', default=None))
|
||||||
if 'Ago' in upload_date:
|
if not upload_date:
|
||||||
days = int(re.search(r'([0-9]+)', upload_date).group(1))
|
uploaded_ago = self._search_regex(
|
||||||
upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
|
r'>\s*(\d+[hd])\s+[aA]go\b', webpage, 'uploaded ago',
|
||||||
else:
|
default=None)
|
||||||
upload_date = unified_strdate(upload_date)
|
if uploaded_ago:
|
||||||
|
delta = int(uploaded_ago[:-1])
|
||||||
|
_AGO_UNITS = {
|
||||||
|
'h': 'hours',
|
||||||
|
'd': 'days',
|
||||||
|
}
|
||||||
|
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
|
||||||
|
upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
|
||||||
|
|
||||||
comment_count = webpage.count('class="media-comment-contents"')
|
comment_count = webpage.count('class="media-comment-contents"')
|
||||||
uploader_id = self._html_search_regex(
|
uploader_id = self._html_search_regex(
|
||||||
|
|
|
@ -253,6 +253,10 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
|
|
||||||
return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
|
return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_child_with_type(parent, t):
|
||||||
|
return next(c for c in parent['children'] if c.get('type') == t)
|
||||||
|
|
||||||
def _extract_mgid(self, webpage):
|
def _extract_mgid(self, webpage):
|
||||||
try:
|
try:
|
||||||
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
|
# the url can be http://media.mtvnservices.com/fb/{mgid}.swf
|
||||||
|
@ -278,6 +282,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
if not mgid:
|
if not mgid:
|
||||||
mgid = self._extract_triforce_mgid(webpage)
|
mgid = self._extract_triforce_mgid(webpage)
|
||||||
|
|
||||||
|
if not mgid:
|
||||||
|
data = self._parse_json(self._search_regex(
|
||||||
|
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||||
|
main_container = self._extract_child_with_type(data, 'MainContainer')
|
||||||
|
video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
|
||||||
|
mgid = video_player['props']['media']['video']['config']['uri']
|
||||||
|
|
||||||
return mgid
|
return mgid
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -5,33 +5,137 @@ import re
|
||||||
|
|
||||||
from .turner import TurnerBaseIE
|
from .turner import TurnerBaseIE
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urllib_parse_urlencode,
|
compat_parse_qs,
|
||||||
compat_urlparse,
|
compat_str,
|
||||||
|
compat_urllib_parse_unquote,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
OnDemandPagedList,
|
OnDemandPagedList,
|
||||||
remove_start,
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
|
update_url_query,
|
||||||
|
urljoin,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NBAIE(TurnerBaseIE):
|
class NBACVPBaseIE(TurnerBaseIE):
|
||||||
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)+(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
|
def _extract_nba_cvp_info(self, path, video_id, fatal=False):
|
||||||
|
return self._extract_cvp_info(
|
||||||
|
'http://secure.nba.com/%s' % path, video_id, {
|
||||||
|
'default': {
|
||||||
|
'media_src': 'http://nba.cdn.turner.com/nba/big',
|
||||||
|
},
|
||||||
|
'm3u8': {
|
||||||
|
'media_src': 'http://nbavod-f.akamaihd.net',
|
||||||
|
},
|
||||||
|
}, fatal=fatal)
|
||||||
|
|
||||||
|
|
||||||
|
class NBAWatchBaseIE(NBACVPBaseIE):
|
||||||
|
_VALID_URL_BASE = r'https?://(?:(?:www\.)?nba\.com(?:/watch)?|watch\.nba\.com)/'
|
||||||
|
|
||||||
|
def _extract_video(self, filter_key, filter_value):
|
||||||
|
video = self._download_json(
|
||||||
|
'https://neulionscnbav2-a.akamaihd.net/solr/nbad_program/usersearch',
|
||||||
|
filter_value, query={
|
||||||
|
'fl': 'description,image,name,pid,releaseDate,runtime,tags,seoName',
|
||||||
|
'q': filter_key + ':' + filter_value,
|
||||||
|
'wt': 'json',
|
||||||
|
})['response']['docs'][0]
|
||||||
|
|
||||||
|
video_id = str(video['pid'])
|
||||||
|
title = video['name']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
m3u8_url = (self._download_json(
|
||||||
|
'https://watch.nba.com/service/publishpoint', video_id, query={
|
||||||
|
'type': 'video',
|
||||||
|
'format': 'json',
|
||||||
|
'id': video_id,
|
||||||
|
}, headers={
|
||||||
|
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
|
||||||
|
}, fatal=False) or {}).get('path')
|
||||||
|
if m3u8_url:
|
||||||
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
|
re.sub(r'_(?:pc|iphone)\.', '.', m3u8_url), video_id, 'mp4',
|
||||||
|
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||||
|
formats.extend(m3u8_formats)
|
||||||
|
for f in m3u8_formats:
|
||||||
|
http_f = f.copy()
|
||||||
|
http_f.update({
|
||||||
|
'format_id': http_f['format_id'].replace('hls-', 'http-'),
|
||||||
|
'protocol': 'http',
|
||||||
|
'url': http_f['url'].replace('.m3u8', ''),
|
||||||
|
})
|
||||||
|
formats.append(http_f)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': urljoin('https://nbadsdmt.akamaized.net/media/nba/nba/thumbs/', video.get('image')),
|
||||||
|
'description': video.get('description'),
|
||||||
|
'duration': int_or_none(video.get('runtime')),
|
||||||
|
'timestamp': parse_iso8601(video.get('releaseDate')),
|
||||||
|
'tags': video.get('tags'),
|
||||||
|
}
|
||||||
|
|
||||||
|
seo_name = video.get('seoName')
|
||||||
|
if seo_name and re.search(r'\d{4}/\d{2}/\d{2}/', seo_name):
|
||||||
|
base_path = ''
|
||||||
|
if seo_name.startswith('teams/'):
|
||||||
|
base_path += seo_name.split('/')[1] + '/'
|
||||||
|
base_path += 'video/'
|
||||||
|
cvp_info = self._extract_nba_cvp_info(
|
||||||
|
base_path + seo_name + '.xml', video_id, False)
|
||||||
|
if cvp_info:
|
||||||
|
formats.extend(cvp_info['formats'])
|
||||||
|
info = merge_dicts(info, cvp_info)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
info['formats'] = formats
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class NBAWatchEmbedIE(NBAWatchBaseIE):
|
||||||
|
IENAME = 'nba:watch:embed'
|
||||||
|
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'http://watch.nba.com/embed?id=659395',
|
||||||
|
'md5': 'b7e3f9946595f4ca0a13903ce5edd120',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '659395',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
|
||||||
|
'description': 'Mix clip: More than 7 points of Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
|
||||||
|
'timestamp': 1492228800,
|
||||||
|
'upload_date': '20170415',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
return self._extract_video('pid', video_id)
|
||||||
|
|
||||||
|
|
||||||
|
class NBAWatchIE(NBAWatchBaseIE):
|
||||||
|
IE_NAME = 'nba:watch'
|
||||||
|
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P<id>.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
|
||||||
'md5': '9e7729d3010a9c71506fd1248f74e4f4',
|
'md5': '9d902940d2a127af3f7f9d2f3dc79c96',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0021200253-okc-bkn-recap',
|
'id': '70946',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Thunder vs. Nets',
|
'title': 'Thunder vs. Nets',
|
||||||
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
|
||||||
'duration': 181,
|
'duration': 181,
|
||||||
'timestamp': 1354638466,
|
'timestamp': 1354597200,
|
||||||
'upload_date': '20121204',
|
'upload_date': '20121204',
|
||||||
},
|
},
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
|
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -39,116 +143,286 @@ class NBAIE(TurnerBaseIE):
|
||||||
'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
|
'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
|
||||||
'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
|
'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
|
'id': '330865',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Hawks vs. Cavaliers Game 1',
|
'title': 'Hawks vs. Cavaliers Game 1',
|
||||||
'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
|
'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
|
||||||
'duration': 228,
|
'duration': 228,
|
||||||
'timestamp': 1432134543,
|
'timestamp': 1432094400,
|
||||||
'upload_date': '20150520',
|
'upload_date': '20150521',
|
||||||
},
|
|
||||||
'expected_warnings': ['Unable to download f4m manifest'],
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'teams/clippers/2016/02/17/1455672027478-Doc_Feb16_720.mov-297324',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Practice: Doc Rivers - 2/16/16',
|
|
||||||
'description': 'Head Coach Doc Rivers addresses the media following practice.',
|
|
||||||
'upload_date': '20160216',
|
|
||||||
'timestamp': 1455672000,
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'expected_warnings': ['Unable to download f4m manifest'],
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'timberwolves',
|
|
||||||
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
|
|
||||||
},
|
|
||||||
'playlist_count': 30,
|
|
||||||
'params': {
|
|
||||||
# Download the whole playlist takes too long time
|
|
||||||
'playlist_items': '1-30',
|
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
|
'url': 'http://watch.nba.com/nba/video/channels/nba_tv/2015/06/11/YT_go_big_go_home_Game4_061115',
|
||||||
'info_dict': {
|
'only_matching': True,
|
||||||
'id': 'teams/timberwolves/2014/12/12/Wigginsmp4-3462601',
|
}, {
|
||||||
'ext': 'mp4',
|
# only CVP mp4 format available
|
||||||
'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
|
'url': 'https://watch.nba.com/video/teams/cavaliers/2012/10/15/sloan121015mov-2249106',
|
||||||
'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.',
|
'only_matching': True,
|
||||||
'upload_date': '20141212',
|
}, {
|
||||||
'timestamp': 1418418600,
|
'url': 'https://watch.nba.com/video/top-100-dunks-from-the-2019-20-season?plsrc=nba&collection=2019-20-season-highlights',
|
||||||
},
|
'only_matching': True,
|
||||||
'params': {
|
|
||||||
'noplaylist': True,
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
'expected_warnings': ['Unable to download f4m manifest'],
|
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_PAGE_SIZE = 30
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
def _fetch_page(self, team, video_id, page):
|
collection_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('collection', [None])[0]
|
||||||
search_url = 'http://searchapp2.nba.com/nba-search/query.jsp?' + compat_urllib_parse_urlencode({
|
if collection_id:
|
||||||
'type': 'teamvideo',
|
|
||||||
'start': page * self._PAGE_SIZE + 1,
|
|
||||||
'npp': (page + 1) * self._PAGE_SIZE + 1,
|
|
||||||
'sort': 'recent',
|
|
||||||
'output': 'json',
|
|
||||||
'site': team,
|
|
||||||
})
|
|
||||||
results = self._download_json(
|
|
||||||
search_url, video_id, note='Download page %d of playlist data' % page)['results'][0]
|
|
||||||
for item in results:
|
|
||||||
yield self.url_result(compat_urlparse.urljoin('http://www.nba.com/', item['url']))
|
|
||||||
|
|
||||||
def _extract_playlist(self, orig_path, video_id, webpage):
|
|
||||||
team = orig_path.split('/')[0]
|
|
||||||
|
|
||||||
if self._downloader.params.get('noplaylist'):
|
if self._downloader.params.get('noplaylist'):
|
||||||
self.to_screen('Downloading just video because of --no-playlist')
|
self.to_screen('Downloading just video %s because of --no-playlist' % display_id)
|
||||||
video_path = self._search_regex(
|
else:
|
||||||
r'nbaVideoCore\.firstVideo\s*=\s*\'([^\']+)\';', webpage, 'video path')
|
self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % collection_id)
|
||||||
video_url = 'http://www.nba.com/%s/video/%s' % (team, video_path)
|
return self.url_result(
|
||||||
return self.url_result(video_url)
|
'https://www.nba.com/watch/list/collection/' + collection_id,
|
||||||
|
NBAWatchCollectionIE.ie_key(), collection_id)
|
||||||
|
return self._extract_video('seoName', display_id)
|
||||||
|
|
||||||
self.to_screen('Downloading playlist - add --no-playlist to just download video')
|
|
||||||
playlist_title = self._og_search_title(webpage, fatal=False)
|
|
||||||
entries = OnDemandPagedList(
|
|
||||||
functools.partial(self._fetch_page, team, video_id),
|
|
||||||
self._PAGE_SIZE)
|
|
||||||
|
|
||||||
return self.playlist_result(entries, team, playlist_title)
|
class NBAWatchCollectionIE(NBAWatchBaseIE):
|
||||||
|
IE_NAME = 'nba:watch:collection'
|
||||||
|
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://watch.nba.com/list/collection/season-preview-2020',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'season-preview-2020',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 43,
|
||||||
|
}]
|
||||||
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
|
def _fetch_page(self, collection_id, page):
|
||||||
|
page += 1
|
||||||
|
videos = self._download_json(
|
||||||
|
'https://content-api-prod.nba.com/public/1/endeavor/video-list/collection/' + collection_id,
|
||||||
|
collection_id, 'Downloading page %d JSON metadata' % page, query={
|
||||||
|
'count': self._PAGE_SIZE,
|
||||||
|
'page': page,
|
||||||
|
})['results']['videos']
|
||||||
|
for video in videos:
|
||||||
|
program = video.get('program') or {}
|
||||||
|
seo_name = program.get('seoName') or program.get('slug')
|
||||||
|
if not seo_name:
|
||||||
|
continue
|
||||||
|
yield {
|
||||||
|
'_type': 'url',
|
||||||
|
'id': program.get('id'),
|
||||||
|
'title': program.get('title') or video.get('title'),
|
||||||
|
'url': 'https://www.nba.com/watch/video/' + seo_name,
|
||||||
|
'thumbnail': video.get('image'),
|
||||||
|
'description': program.get('description') or video.get('description'),
|
||||||
|
'duration': parse_duration(program.get('runtimeHours')),
|
||||||
|
'timestamp': parse_iso8601(video.get('releaseDate')),
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
path, video_id = re.match(self._VALID_URL, url).groups()
|
collection_id = self._match_id(url)
|
||||||
orig_path = path
|
entries = OnDemandPagedList(
|
||||||
if path.startswith('nba/'):
|
functools.partial(self._fetch_page, collection_id),
|
||||||
path = path[3:]
|
self._PAGE_SIZE)
|
||||||
|
return self.playlist_result(entries, collection_id)
|
||||||
|
|
||||||
if 'video/' not in path:
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
path = remove_start(self._search_regex(r'data-videoid="([^"]+)"', webpage, 'video id'), '/')
|
|
||||||
|
|
||||||
if path == '{{id}}':
|
class NBABaseIE(NBACVPBaseIE):
|
||||||
return self._extract_playlist(orig_path, video_id, webpage)
|
_VALID_URL_BASE = r'''(?x)
|
||||||
|
https?://(?:www\.)?nba\.com/
|
||||||
|
(?P<team>
|
||||||
|
blazers|
|
||||||
|
bucks|
|
||||||
|
bulls|
|
||||||
|
cavaliers|
|
||||||
|
celtics|
|
||||||
|
clippers|
|
||||||
|
grizzlies|
|
||||||
|
hawks|
|
||||||
|
heat|
|
||||||
|
hornets|
|
||||||
|
jazz|
|
||||||
|
kings|
|
||||||
|
knicks|
|
||||||
|
lakers|
|
||||||
|
magic|
|
||||||
|
mavericks|
|
||||||
|
nets|
|
||||||
|
nuggets|
|
||||||
|
pacers|
|
||||||
|
pelicans|
|
||||||
|
pistons|
|
||||||
|
raptors|
|
||||||
|
rockets|
|
||||||
|
sixers|
|
||||||
|
spurs|
|
||||||
|
suns|
|
||||||
|
thunder|
|
||||||
|
timberwolves|
|
||||||
|
warriors|
|
||||||
|
wizards
|
||||||
|
)
|
||||||
|
(?:/play\#)?/'''
|
||||||
|
_CHANNEL_PATH_REGEX = r'video/channel|series'
|
||||||
|
|
||||||
# See prepareContentId() of pkgCvp.js
|
def _embed_url_result(self, team, content_id):
|
||||||
if path.startswith('video/teams'):
|
return self.url_result(update_url_query(
|
||||||
path = 'video/channels/proxy/' + path[6:]
|
'https://secure.nba.com/assets/amp/include/video/iframe.html', {
|
||||||
|
'contentId': content_id,
|
||||||
|
'team': team,
|
||||||
|
}), NBAEmbedIE.ie_key())
|
||||||
|
|
||||||
return self._extract_cvp_info(
|
def _call_api(self, team, content_id, query, resource):
|
||||||
'http://www.nba.com/%s.xml' % path, video_id, {
|
return self._download_json(
|
||||||
'default': {
|
'https://api.nba.net/2/%s/video,imported_video,wsc/' % team,
|
||||||
'media_src': 'http://nba.cdn.turner.com/nba/big',
|
content_id, 'Download %s JSON metadata' % resource,
|
||||||
},
|
query=query, headers={
|
||||||
'm3u8': {
|
'accessToken': 'internal|bb88df6b4c2244e78822812cecf1ee1b',
|
||||||
'media_src': 'http://nbavod-f.akamaihd.net',
|
})['response']['result']
|
||||||
},
|
|
||||||
|
def _extract_video(self, video, team, extract_all=True):
|
||||||
|
video_id = compat_str(video['nid'])
|
||||||
|
team = video['brand']
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'title': video.get('title') or video.get('headline') or video['shortHeadline'],
|
||||||
|
'description': video.get('description'),
|
||||||
|
'timestamp': parse_iso8601(video.get('published')),
|
||||||
|
}
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
captions = try_get(video, lambda x: x['videoCaptions']['sidecars'], dict) or {}
|
||||||
|
for caption_url in captions.values():
|
||||||
|
subtitles.setdefault('en', []).append({'url': caption_url})
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
mp4_url = video.get('mp4')
|
||||||
|
if mp4_url:
|
||||||
|
formats.append({
|
||||||
|
'url': mp4_url,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
if extract_all:
|
||||||
|
source_url = video.get('videoSource')
|
||||||
|
if source_url and not source_url.startswith('s3://') and self._is_valid_url(source_url, video_id, 'source'):
|
||||||
|
formats.append({
|
||||||
|
'format_id': 'source',
|
||||||
|
'url': source_url,
|
||||||
|
'preference': 1,
|
||||||
|
})
|
||||||
|
|
||||||
|
m3u8_url = video.get('m3u8')
|
||||||
|
if m3u8_url:
|
||||||
|
if '.akamaihd.net/i/' in m3u8_url:
|
||||||
|
formats.extend(self._extract_akamai_formats(
|
||||||
|
m3u8_url, video_id, {'http': 'pmd.cdn.turner.com'}))
|
||||||
|
else:
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
m3u8_url, video_id, 'mp4',
|
||||||
|
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||||
|
|
||||||
|
content_xml = video.get('contentXml')
|
||||||
|
if team and content_xml:
|
||||||
|
cvp_info = self._extract_nba_cvp_info(
|
||||||
|
team + content_xml, video_id, fatal=False)
|
||||||
|
if cvp_info:
|
||||||
|
formats.extend(cvp_info['formats'])
|
||||||
|
subtitles = self._merge_subtitles(subtitles, cvp_info['subtitles'])
|
||||||
|
info = merge_dicts(info, cvp_info)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
else:
|
||||||
|
info.update(self._embed_url_result(team, video['videoId']))
|
||||||
|
|
||||||
|
info.update({
|
||||||
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
})
|
||||||
|
|
||||||
|
return info
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
team, display_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
if '/play#/' in url:
|
||||||
|
display_id = compat_urllib_parse_unquote(display_id)
|
||||||
|
else:
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
display_id = self._search_regex(
|
||||||
|
self._CONTENT_ID_REGEX + r'\s*:\s*"([^"]+)"', webpage, 'video id')
|
||||||
|
return self._extract_url_results(team, display_id)
|
||||||
|
|
||||||
|
|
||||||
|
class NBAEmbedIE(NBABaseIE):
|
||||||
|
IENAME = 'nba:embed'
|
||||||
|
_VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&Env=',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://secure.nba.com/assets/amp/include/video/iframe.html?contentId=2016/10/29/0021600027boschaplay7&adFree=false&profile=71&team=&videoPlayerName=LAMPCVP',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
|
content_id = qs['contentId'][0]
|
||||||
|
team = qs.get('team', [None])[0]
|
||||||
|
if not team:
|
||||||
|
return self.url_result(
|
||||||
|
'https://watch.nba.com/video/' + content_id, NBAWatchIE.ie_key())
|
||||||
|
video = self._call_api(team, content_id, {'videoid': content_id}, 'video')[0]
|
||||||
|
return self._extract_video(video, team)
|
||||||
|
|
||||||
|
|
||||||
|
class NBAIE(NBABaseIE):
|
||||||
|
IENAME = 'nba'
|
||||||
|
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '45039',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'AND WE BACK.',
|
||||||
|
'description': 'Part 1 of our 2020-21 schedule is here! Watch our games on NBC Sports Chicago.',
|
||||||
|
'duration': 94,
|
||||||
|
'timestamp': 1607112000,
|
||||||
|
'upload_date': '20201218',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.nba.com/bucks/play#/video/teams%2Fbucks%2F2020%2F12%2F17%2F64860%2F1608252863446-Op_Dream_16x9-64860',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.nba.com/bucks/play#/video/wsc%2Fteams%2F2787C911AA1ACD154B5377F7577CCC7134B2A4B0',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_CONTENT_ID_REGEX = r'videoID'
|
||||||
|
|
||||||
|
def _extract_url_results(self, team, content_id):
|
||||||
|
return self._embed_url_result(team, content_id)
|
||||||
|
|
||||||
|
|
||||||
|
class NBAChannelIE(NBABaseIE):
|
||||||
|
IENAME = 'nba:channel'
|
||||||
|
_VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.nba.com/blazers/video/channel/summer_league',
|
||||||
|
'info_dict': {
|
||||||
|
'title': 'Summer League',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 138,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.nba.com/bucks/play#/series/On%20This%20Date',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_CONTENT_ID_REGEX = r'videoSubCategory'
|
||||||
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
|
def _fetch_page(self, team, channel, page):
|
||||||
|
results = self._call_api(team, channel, {
|
||||||
|
'channels': channel,
|
||||||
|
'count': self._PAGE_SIZE,
|
||||||
|
'offset': page * self._PAGE_SIZE,
|
||||||
|
}, 'page %d' % (page + 1))
|
||||||
|
for video in results:
|
||||||
|
yield self._extract_video(video, team, False)
|
||||||
|
|
||||||
|
def _extract_url_results(self, team, content_id):
|
||||||
|
entries = OnDemandPagedList(
|
||||||
|
functools.partial(self._fetch_page, team, content_id),
|
||||||
|
self._PAGE_SIZE)
|
||||||
|
return self.playlist_result(entries, playlist_title=content_id)
|
||||||
|
|
|
@ -10,7 +10,6 @@ from .adobepass import AdobePassIE
|
||||||
from ..compat import compat_urllib_parse_unquote
|
from ..compat import compat_urllib_parse_unquote
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
|
||||||
parse_duration,
|
parse_duration,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
try_get,
|
try_get,
|
||||||
|
@ -159,7 +158,8 @@ class NBCIE(AdobePassIE):
|
||||||
|
|
||||||
|
|
||||||
class NBCSportsVPlayerIE(InfoExtractor):
|
class NBCSportsVPlayerIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
_VALID_URL_BASE = r'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
|
||||||
|
_VALID_URL = _VALID_URL_BASE + r'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
|
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
|
||||||
|
@ -175,12 +175,15 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z',
|
'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.nbcsports.com/vplayer/p/BxmELC/nbcsports/select/PHJSaFWbrTY9?form=html&autoPlay=true',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage):
|
def _extract_url(webpage):
|
||||||
iframe_m = re.search(
|
iframe_m = re.search(
|
||||||
r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
|
r'<(?:iframe[^>]+|div[^>]+data-(?:mpx-)?)src="(?P<url>%s[^"]+)"' % NBCSportsVPlayerIE._VALID_URL_BASE, webpage)
|
||||||
if iframe_m:
|
if iframe_m:
|
||||||
return iframe_m.group('url')
|
return iframe_m.group('url')
|
||||||
|
|
||||||
|
@ -193,21 +196,29 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class NBCSportsIE(InfoExtractor):
|
class NBCSportsIE(InfoExtractor):
|
||||||
# Does not include https because its certificate is invalid
|
_VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
|
||||||
_VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
|
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
|
# iframe src
|
||||||
'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
|
'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'PHJSaFWbrTY9',
|
'id': 'PHJSaFWbrTY9',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
|
'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
|
||||||
'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
|
'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
|
||||||
'uploader': 'NBCU-SPORTS',
|
'uploader': 'NBCU-SPORTS',
|
||||||
'upload_date': '20150330',
|
'upload_date': '20150330',
|
||||||
'timestamp': 1427726529,
|
'timestamp': 1427726529,
|
||||||
}
|
}
|
||||||
}
|
}, {
|
||||||
|
# data-mpx-src
|
||||||
|
'url': 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# data-src
|
||||||
|
'url': 'https://www.nbcsports.com/boston/video/report-card-pats-secondary-no-match-josh-allen',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -275,33 +286,6 @@ class NBCSportsStreamIE(AdobePassIE):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class CSNNEIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'yvBLLUgQ8WU0',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'SNC evening update: Wright named Red Sox\' No. 5 starter.',
|
|
||||||
'description': 'md5:1753cfee40d9352b19b4c9b3e589b9e3',
|
|
||||||
'timestamp': 1459369979,
|
|
||||||
'upload_date': '20160330',
|
|
||||||
'uploader': 'NBCU-SPORTS',
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
display_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'ie_key': 'ThePlatform',
|
|
||||||
'url': self._html_search_meta('twitter:player:stream', webpage),
|
|
||||||
'display_id': display_id,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class NBCNewsIE(ThePlatformIE):
|
class NBCNewsIE(ThePlatformIE):
|
||||||
_VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
|
_VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
|
||||||
|
|
||||||
|
@ -394,8 +378,8 @@ class NBCNewsIE(ThePlatformIE):
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
data = self._parse_json(self._search_regex(
|
data = self._parse_json(self._search_regex(
|
||||||
r'window\.__data\s*=\s*({.+});', webpage,
|
r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
|
||||||
'bootstrap json'), video_id, js_to_json)
|
webpage, 'bootstrap json'), video_id)['props']['initialState']
|
||||||
video_data = try_get(data, lambda x: x['video']['current'], dict)
|
video_data = try_get(data, lambda x: x['video']['current'], dict)
|
||||||
if not video_data:
|
if not video_data:
|
||||||
video_data = data['article']['content'][0]['primaryMedia']['video']
|
video_data = data['article']['content'][0]['primaryMedia']['video']
|
||||||
|
|
|
@ -81,6 +81,29 @@ class NDRIE(NDRBaseIE):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# with subtitles
|
||||||
|
'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'extra18674',
|
||||||
|
'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
|
||||||
|
'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6',
|
||||||
|
'uploader': 'ndrtv',
|
||||||
|
'upload_date': '20201113',
|
||||||
|
'duration': 1749,
|
||||||
|
'subtitles': {
|
||||||
|
'de': [{
|
||||||
|
'ext': 'ttml',
|
||||||
|
'url': r're:^https://www\.ndr\.de.+',
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
'expected_warnings': ['Unable to download f4m manifest'],
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
|
'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -239,6 +262,20 @@ class NDREmbedBaseIE(InfoExtractor):
|
||||||
'preference': quality_key(thumbnail.get('quality')),
|
'preference': quality_key(thumbnail.get('quality')),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
subtitles = {}
|
||||||
|
tracks = config.get('tracks')
|
||||||
|
if tracks and isinstance(tracks, list):
|
||||||
|
for track in tracks:
|
||||||
|
if not isinstance(track, dict):
|
||||||
|
continue
|
||||||
|
track_url = urljoin(url, track.get('src'))
|
||||||
|
if not track_url:
|
||||||
|
continue
|
||||||
|
subtitles.setdefault(track.get('srclang') or 'de', []).append({
|
||||||
|
'url': track_url,
|
||||||
|
'ext': 'ttml',
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -248,6 +285,7 @@ class NDREmbedBaseIE(InfoExtractor):
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -4,19 +4,15 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
|
||||||
compat_urllib_parse_urlparse,
|
|
||||||
)
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
clean_html,
|
||||||
int_or_none,
|
determine_ext,
|
||||||
remove_end,
|
get_element_by_class,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class NFLIE(InfoExtractor):
|
class NFLBaseIE(InfoExtractor):
|
||||||
IE_NAME = 'nfl.com'
|
_VALID_URL_BASE = r'''(?x)
|
||||||
_VALID_URL = r'''(?x)
|
|
||||||
https?://
|
https?://
|
||||||
(?P<host>
|
(?P<host>
|
||||||
(?:www\.)?
|
(?:www\.)?
|
||||||
|
@ -34,15 +30,15 @@ class NFLIE(InfoExtractor):
|
||||||
houstontexans|
|
houstontexans|
|
||||||
colts|
|
colts|
|
||||||
jaguars|
|
jaguars|
|
||||||
titansonline|
|
(?:titansonline|tennesseetitans)|
|
||||||
denverbroncos|
|
denverbroncos|
|
||||||
kcchiefs|
|
(?:kc)?chiefs|
|
||||||
raiders|
|
raiders|
|
||||||
chargers|
|
chargers|
|
||||||
dallascowboys|
|
dallascowboys|
|
||||||
giants|
|
giants|
|
||||||
philadelphiaeagles|
|
philadelphiaeagles|
|
||||||
redskins|
|
(?:redskins|washingtonfootball)|
|
||||||
chicagobears|
|
chicagobears|
|
||||||
detroitlions|
|
detroitlions|
|
||||||
packers|
|
packers|
|
||||||
|
@ -52,180 +48,113 @@ class NFLIE(InfoExtractor):
|
||||||
neworleanssaints|
|
neworleanssaints|
|
||||||
buccaneers|
|
buccaneers|
|
||||||
azcardinals|
|
azcardinals|
|
||||||
stlouisrams|
|
(?:stlouis|the)rams|
|
||||||
49ers|
|
49ers|
|
||||||
seahawks
|
seahawks
|
||||||
)\.com|
|
)\.com|
|
||||||
.+?\.clubs\.nfl\.com
|
.+?\.clubs\.nfl\.com
|
||||||
)
|
)
|
||||||
)/
|
)/
|
||||||
(?:.+?/)*
|
|
||||||
(?P<id>[^/#?&]+)
|
|
||||||
'''
|
'''
|
||||||
|
_VIDEO_CONFIG_REGEX = r'<script[^>]+id="[^"]*video-config-[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}[^"]*"[^>]*>\s*({.+})'
|
||||||
|
_WORKING = False
|
||||||
|
|
||||||
|
def _parse_video_config(self, video_config, display_id):
|
||||||
|
video_config = self._parse_json(video_config, display_id)
|
||||||
|
item = video_config['playlist'][0]
|
||||||
|
mcp_id = item.get('mcpID')
|
||||||
|
if mcp_id:
|
||||||
|
info = self.url_result(
|
||||||
|
'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:' + mcp_id,
|
||||||
|
'Anvato', mcp_id)
|
||||||
|
else:
|
||||||
|
media_id = item.get('id') or item['entityId']
|
||||||
|
title = item['title']
|
||||||
|
item_url = item['url']
|
||||||
|
info = {'id': media_id}
|
||||||
|
ext = determine_ext(item_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4')
|
||||||
|
self._sort_formats(info['formats'])
|
||||||
|
else:
|
||||||
|
info['url'] = item_url
|
||||||
|
if item.get('audio') is True:
|
||||||
|
info['vcodec'] = 'none'
|
||||||
|
is_live = video_config.get('live') is True
|
||||||
|
thumbnails = None
|
||||||
|
image_url = item.get(item.get('imageSrc')) or item.get(item.get('posterImage'))
|
||||||
|
if image_url:
|
||||||
|
thumbnails = [{
|
||||||
|
'url': image_url,
|
||||||
|
'ext': determine_ext(image_url, 'jpg'),
|
||||||
|
}]
|
||||||
|
info.update({
|
||||||
|
'title': self._live_title(title) if is_live else title,
|
||||||
|
'is_live': is_live,
|
||||||
|
'description': clean_html(item.get('description')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
})
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class NFLIE(NFLBaseIE):
|
||||||
|
IE_NAME = 'nfl.com'
|
||||||
|
_VALID_URL = NFLBaseIE._VALID_URL_BASE + r'(?:videos?|listen|audio)/(?P<id>[^/#?&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
|
'url': 'https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14',
|
||||||
'md5': '394ef771ddcd1354f665b471d78ec4c6',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '0ap3000000398478',
|
'id': '899441',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Week 3: Redskins vs. Eagles highlights',
|
'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14",
|
||||||
'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
|
'description': 'md5:85e05a3cc163f8c344340f220521136d',
|
||||||
'upload_date': '20140921',
|
'upload_date': '20201215',
|
||||||
'timestamp': 1411337580,
|
'timestamp': 1608009755,
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
|
'uploader': 'NFL',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
|
'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown',
|
||||||
'md5': 'cf85bdb4bc49f6e9d3816d130c78279c',
|
'md5': '6886b32c24b463038c760ceb55a34566',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266',
|
'id': 'd87e8790-3e14-11eb-8ceb-ff05c2867f99',
|
||||||
'ext': 'mp4',
|
'ext': 'mp3',
|
||||||
'title': 'LIVE: Post Game vs. Browns',
|
'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown',
|
||||||
'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8',
|
'description': 'md5:12ada8ee70e6762658c30e223e095075',
|
||||||
'upload_date': '20131229',
|
|
||||||
'timestamp': 1388354455,
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
|
'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14',
|
||||||
'info_dict': {
|
|
||||||
'id': '0ap3000000467607',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Frustrations flare on the field',
|
|
||||||
'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.',
|
|
||||||
'timestamp': 1422850320,
|
|
||||||
'upload_date': '20150202',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.patriots.com/video/2015/09/18/10-days-gillette',
|
|
||||||
'md5': '4c319e2f625ffd0b481b4382c6fc124c',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'n-238346',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': '10 Days at Gillette',
|
|
||||||
'description': 'md5:8cd9cd48fac16de596eadc0b24add951',
|
|
||||||
'timestamp': 1442618809,
|
|
||||||
'upload_date': '20150918',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
# lowercase data-contentid
|
|
||||||
'url': 'http://www.steelers.com/news/article-1/Tomlin-on-Ben-getting-Vick-ready/56399c96-4160-48cf-a7ad-1d17d4a3aef7',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '12693586-6ea9-4743-9c1c-02c59e4a5ef2',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Tomlin looks ahead to Ravens on a short week',
|
|
||||||
'description': 'md5:32f3f7b139f43913181d5cbb24ecad75',
|
|
||||||
'timestamp': 1443459651,
|
|
||||||
'upload_date': '20150928',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.buffalobills.com/video/videos/Rex_Ryan_Show_World_Wide_Rex/b1dcfab2-3190-4bb1-bfc0-d6e603d6601a',
|
'url': 'https://www.raiders.com/audio/instant-reactions-raiders-week-14-loss-to-indianapolis-colts-espn-jason-fitz',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
def _real_extract(self, url):
|
||||||
def prepend_host(host, url):
|
display_id = self._match_id(url)
|
||||||
if not url.startswith('http'):
|
webpage = self._download_webpage(url, display_id)
|
||||||
if not url.startswith('/'):
|
return self._parse_video_config(self._search_regex(
|
||||||
url = '/%s' % url
|
self._VIDEO_CONFIG_REGEX, webpage, 'video config'), display_id)
|
||||||
url = 'http://{0:}{1:}'.format(host, url)
|
|
||||||
return url
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def format_from_stream(stream, protocol, host, path_prefix='',
|
class NFLArticleIE(NFLBaseIE):
|
||||||
preference=0, note=None):
|
IE_NAME = 'nfl.com:article'
|
||||||
url = '{protocol:}://{host:}/{prefix:}{path:}'.format(
|
_VALID_URL = NFLBaseIE._VALID_URL_BASE + r'news/(?P<id>[^/#?&]+)'
|
||||||
protocol=protocol,
|
_TEST = {
|
||||||
host=host,
|
'url': 'https://www.buffalobills.com/news/the-only-thing-we-ve-earned-is-the-noise-bills-coaches-discuss-handling-rising-e',
|
||||||
prefix=path_prefix,
|
'info_dict': {
|
||||||
path=stream.get('path'),
|
'id': 'the-only-thing-we-ve-earned-is-the-noise-bills-coaches-discuss-handling-rising-e',
|
||||||
)
|
'title': "'The only thing we've earned is the noise' | Bills coaches discuss handling rising expectations",
|
||||||
return {
|
},
|
||||||
'url': url,
|
'playlist_count': 4,
|
||||||
'vbr': int_or_none(stream.get('rate', 0), 1000),
|
|
||||||
'preference': preference,
|
|
||||||
'format_note': note,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
display_id = self._match_id(url)
|
||||||
video_id, host = mobj.group('id'), mobj.group('host')
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
entries = []
|
||||||
webpage = self._download_webpage(url, video_id)
|
for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
|
||||||
|
entries.append(self._parse_video_config(video_config, display_id))
|
||||||
config_url = NFLIE.prepend_host(host, self._search_regex(
|
title = clean_html(get_element_by_class(
|
||||||
r'(?:(?:config|configURL)\s*:\s*|<nflcs:avplayer[^>]+data-config\s*=\s*)(["\'])(?P<config>.+?)\1',
|
'nfl-c-article__title', webpage)) or self._html_search_meta(
|
||||||
webpage, 'config URL', default='static/content/static/config/video/config.json',
|
['og:title', 'twitter:title'], webpage)
|
||||||
group='config'))
|
return self.playlist_result(entries, display_id, title)
|
||||||
# For articles, the id in the url is not the video id
|
|
||||||
video_id = self._search_regex(
|
|
||||||
r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>(?:(?!\1).)+)\1',
|
|
||||||
webpage, 'video id', default=video_id, group='id')
|
|
||||||
config = self._download_json(config_url, video_id, 'Downloading player config')
|
|
||||||
url_template = NFLIE.prepend_host(
|
|
||||||
host, '{contentURLTemplate:}'.format(**config))
|
|
||||||
video_data = self._download_json(
|
|
||||||
url_template.format(id=video_id), video_id)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
cdn_data = video_data.get('cdnData', {})
|
|
||||||
streams = cdn_data.get('bitrateInfo', [])
|
|
||||||
if cdn_data.get('format') == 'EXTERNAL_HTTP_STREAM':
|
|
||||||
parts = compat_urllib_parse_urlparse(cdn_data.get('uri'))
|
|
||||||
protocol, host = parts.scheme, parts.netloc
|
|
||||||
for stream in streams:
|
|
||||||
formats.append(
|
|
||||||
NFLIE.format_from_stream(stream, protocol, host))
|
|
||||||
else:
|
|
||||||
cdns = config.get('cdns')
|
|
||||||
if not cdns:
|
|
||||||
raise ExtractorError('Failed to get CDN data', expected=True)
|
|
||||||
|
|
||||||
for name, cdn in cdns.items():
|
|
||||||
# LimeLight streams don't seem to work
|
|
||||||
if cdn.get('name') == 'LIMELIGHT':
|
|
||||||
continue
|
|
||||||
|
|
||||||
protocol = cdn.get('protocol')
|
|
||||||
host = remove_end(cdn.get('host', ''), '/')
|
|
||||||
if not (protocol and host):
|
|
||||||
continue
|
|
||||||
|
|
||||||
prefix = cdn.get('pathprefix', '')
|
|
||||||
if prefix and not prefix.endswith('/'):
|
|
||||||
prefix = '%s/' % prefix
|
|
||||||
|
|
||||||
preference = 0
|
|
||||||
if protocol == 'rtmp':
|
|
||||||
preference = -2
|
|
||||||
elif 'prog' in name.lower():
|
|
||||||
preference = 1
|
|
||||||
|
|
||||||
for stream in streams:
|
|
||||||
formats.append(
|
|
||||||
NFLIE.format_from_stream(stream, protocol, host,
|
|
||||||
prefix, preference, name))
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
thumbnail = None
|
|
||||||
for q in ('xl', 'l', 'm', 's', 'xs'):
|
|
||||||
thumbnail = video_data.get('imagePaths', {}).get(q)
|
|
||||||
if thumbnail:
|
|
||||||
break
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': video_data.get('headline'),
|
|
||||||
'formats': formats,
|
|
||||||
'description': video_data.get('caption'),
|
|
||||||
'duration': video_data.get('duration'),
|
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'timestamp': int_or_none(video_data.get('posted'), 1000),
|
|
||||||
}
|
|
||||||
|
|
|
@ -3,51 +3,33 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..utils import urljoin
|
||||||
|
|
||||||
|
|
||||||
class NhkVodIE(InfoExtractor):
|
class NhkBaseIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand/(?P<type>video|audio)/(?P<id>\d{7}|[^/]+?-\d{8}-\d+)'
|
_API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/%s/%s/%s/all%s.json'
|
||||||
# Content available only for a limited period of time. Visit
|
_BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
|
||||||
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
_TYPE_REGEX = r'/(?P<type>video|audio)/'
|
||||||
_TESTS = [{
|
|
||||||
# clip
|
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
|
|
||||||
'md5': '256a1be14f48d960a7e61e2532d95ec3',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'a95j5iza',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU",
|
|
||||||
'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
|
|
||||||
'timestamp': 1565965194,
|
|
||||||
'upload_date': '20190816',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
_API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/episode/%s/%s/all%s.json'
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _call_api(self, m_id, lang, is_video, is_episode, is_clip):
|
||||||
lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
|
return self._download_json(
|
||||||
|
self._API_URL_TEMPLATE % (
|
||||||
|
'v' if is_video else 'r',
|
||||||
|
'clip' if is_clip else 'esd',
|
||||||
|
'episode' if is_episode else 'program',
|
||||||
|
m_id, lang, '/all' if is_video else ''),
|
||||||
|
m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or []
|
||||||
|
|
||||||
|
def _extract_episode_info(self, url, episode=None):
|
||||||
|
fetch_episode = episode is None
|
||||||
|
lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
|
||||||
if episode_id.isdigit():
|
if episode_id.isdigit():
|
||||||
episode_id = episode_id[:4] + '-' + episode_id[4:]
|
episode_id = episode_id[:4] + '-' + episode_id[4:]
|
||||||
|
|
||||||
is_video = m_type == 'video'
|
is_video = m_type == 'video'
|
||||||
episode = self._download_json(
|
if fetch_episode:
|
||||||
self._API_URL_TEMPLATE % (
|
episode = self._call_api(
|
||||||
'v' if is_video else 'r',
|
episode_id, lang, is_video, True, episode_id[:4] == '9999')[0]
|
||||||
'clip' if episode_id[:4] == '9999' else 'esd',
|
|
||||||
episode_id, lang, '/all' if is_video else ''),
|
|
||||||
episode_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'][0]
|
|
||||||
title = episode.get('sub_title_clean') or episode['sub_title']
|
title = episode.get('sub_title_clean') or episode['sub_title']
|
||||||
|
|
||||||
def get_clean_field(key):
|
def get_clean_field(key):
|
||||||
|
@ -76,18 +58,121 @@ class NhkVodIE(InfoExtractor):
|
||||||
'episode': title,
|
'episode': title,
|
||||||
}
|
}
|
||||||
if is_video:
|
if is_video:
|
||||||
|
vod_id = episode['vod_id']
|
||||||
info.update({
|
info.update({
|
||||||
'_type': 'url_transparent',
|
'_type': 'url_transparent',
|
||||||
'ie_key': 'Piksel',
|
'ie_key': 'Piksel',
|
||||||
'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + episode['vod_id'],
|
'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + vod_id,
|
||||||
|
'id': vod_id,
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
audio = episode['audio']
|
if fetch_episode:
|
||||||
audio_path = audio['audio']
|
audio_path = episode['audio']['audio']
|
||||||
info['formats'] = self._extract_m3u8_formats(
|
info['formats'] = self._extract_m3u8_formats(
|
||||||
'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
|
'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
|
||||||
episode_id, 'm4a', entry_protocol='m3u8_native',
|
episode_id, 'm4a', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls', fatal=False)
|
m3u8_id='hls', fatal=False)
|
||||||
for f in info['formats']:
|
for f in info['formats']:
|
||||||
f['language'] = lang
|
f['language'] = lang
|
||||||
|
else:
|
||||||
|
info.update({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'ie_key': NhkVodIE.ie_key(),
|
||||||
|
'url': url,
|
||||||
|
})
|
||||||
return info
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class NhkVodIE(NhkBaseIE):
|
||||||
|
_VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
|
||||||
|
# Content available only for a limited period of time. Visit
|
||||||
|
# https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
|
||||||
|
_TESTS = [{
|
||||||
|
# video clip
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
|
||||||
|
'md5': '7a90abcfe610ec22a6bfe15bd46b30ca',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'a95j5iza',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU",
|
||||||
|
'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
|
||||||
|
'timestamp': 1565965194,
|
||||||
|
'upload_date': '20190816',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# audio clip
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/r_inventions-20201104-1/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'r_inventions-20201104-1-en',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': "Japan's Top Inventions - Miniature Video Cameras",
|
||||||
|
'description': 'md5:07ea722bdbbb4936fdd360b6a480c25b',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
return self._extract_episode_info(url)
|
||||||
|
|
||||||
|
|
||||||
|
class NhkVodProgramIE(NhkBaseIE):
|
||||||
|
_VALID_URL = r'%s/program%s(?P<id>[0-9a-z]+)(?:.+?\btype=(?P<episode_type>clip|(?:radio|tv)Episode))?' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
|
||||||
|
_TESTS = [{
|
||||||
|
# video program episodes
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'japanrailway',
|
||||||
|
'title': 'Japan Railway Journal',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1,
|
||||||
|
}, {
|
||||||
|
# video program clips
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway/?type=clip',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'japanrailway',
|
||||||
|
'title': 'Japan Railway Journal',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/10yearshayaomiyazaki/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# audio program
|
||||||
|
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/audio/listener/',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
lang, m_type, program_id, episode_type = re.match(self._VALID_URL, url).groups()
|
||||||
|
|
||||||
|
episodes = self._call_api(
|
||||||
|
program_id, lang, m_type == 'video', False, episode_type == 'clip')
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for episode in episodes:
|
||||||
|
episode_path = episode.get('url')
|
||||||
|
if not episode_path:
|
||||||
|
continue
|
||||||
|
entries.append(self._extract_episode_info(
|
||||||
|
urljoin(url, episode_path), episode))
|
||||||
|
|
||||||
|
program_title = None
|
||||||
|
if entries:
|
||||||
|
program_title = entries[0].get('series')
|
||||||
|
|
||||||
|
return self.playlist_result(entries, program_id, program_title)
|
||||||
|
|
|
@ -1,20 +1,23 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import json
|
|
||||||
import datetime
|
import datetime
|
||||||
|
import functools
|
||||||
|
import json
|
||||||
|
import math
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
dict_get,
|
dict_get,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
InAdvancePagedList,
|
||||||
|
int_or_none,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
remove_start,
|
remove_start,
|
||||||
|
@ -181,7 +184,7 @@ class NiconicoIE(InfoExtractor):
|
||||||
if urlh is False:
|
if urlh is False:
|
||||||
login_ok = False
|
login_ok = False
|
||||||
else:
|
else:
|
||||||
parts = compat_urlparse.urlparse(urlh.geturl())
|
parts = compat_urllib_parse_urlparse(urlh.geturl())
|
||||||
if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
|
if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
|
||||||
login_ok = False
|
login_ok = False
|
||||||
if not login_ok:
|
if not login_ok:
|
||||||
|
@ -292,7 +295,7 @@ class NiconicoIE(InfoExtractor):
|
||||||
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
||||||
video_id, 'Downloading flv info')
|
video_id, 'Downloading flv info')
|
||||||
|
|
||||||
flv_info = compat_urlparse.parse_qs(flv_info_webpage)
|
flv_info = compat_parse_qs(flv_info_webpage)
|
||||||
if 'url' not in flv_info:
|
if 'url' not in flv_info:
|
||||||
if 'deleted' in flv_info:
|
if 'deleted' in flv_info:
|
||||||
raise ExtractorError('The video has been deleted.',
|
raise ExtractorError('The video has been deleted.',
|
||||||
|
@ -437,34 +440,76 @@ class NiconicoIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class NiconicoPlaylistIE(InfoExtractor):
|
class NiconicoPlaylistIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/mylist/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
|
||||||
|
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '27411728',
|
'id': '27411728',
|
||||||
'title': 'AKB48のオールナイトニッポン',
|
'title': 'AKB48のオールナイトニッポン',
|
||||||
|
'description': 'md5:d89694c5ded4b6c693dea2db6e41aa08',
|
||||||
|
'uploader': 'のっく',
|
||||||
|
'uploader_id': '805442',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 225,
|
'playlist_mincount': 225,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
|
def _call_api(self, list_id, resource, query):
|
||||||
|
return self._download_json(
|
||||||
|
'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
|
||||||
|
'Downloading %s JSON metatdata' % resource, query=query,
|
||||||
|
headers={'X-Frontend-Id': 6})['data']['mylist']
|
||||||
|
|
||||||
|
def _parse_owner(self, item):
|
||||||
|
owner = item.get('owner') or {}
|
||||||
|
if owner:
|
||||||
|
return {
|
||||||
|
'uploader': owner.get('name'),
|
||||||
|
'uploader_id': owner.get('id'),
|
||||||
}
|
}
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _fetch_page(self, list_id, page):
|
||||||
|
page += 1
|
||||||
|
items = self._call_api(list_id, 'page %d' % page, {
|
||||||
|
'page': page,
|
||||||
|
'pageSize': self._PAGE_SIZE,
|
||||||
|
})['items']
|
||||||
|
for item in items:
|
||||||
|
video = item.get('video') or {}
|
||||||
|
video_id = video.get('id')
|
||||||
|
if not video_id:
|
||||||
|
continue
|
||||||
|
count = video.get('count') or {}
|
||||||
|
get_count = lambda x: int_or_none(count.get(x))
|
||||||
|
info = {
|
||||||
|
'_type': 'url',
|
||||||
|
'id': video_id,
|
||||||
|
'title': video.get('title'),
|
||||||
|
'url': 'https://www.nicovideo.jp/watch/' + video_id,
|
||||||
|
'description': video.get('shortDescription'),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
'view_count': get_count('view'),
|
||||||
|
'comment_count': get_count('comment'),
|
||||||
|
'ie_key': NiconicoIE.ie_key(),
|
||||||
|
}
|
||||||
|
info.update(self._parse_owner(video))
|
||||||
|
yield info
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
list_id = self._match_id(url)
|
list_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, list_id)
|
mylist = self._call_api(list_id, 'list', {
|
||||||
|
'pageSize': 1,
|
||||||
entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);',
|
})
|
||||||
webpage, 'entries')
|
entries = InAdvancePagedList(
|
||||||
entries = json.loads(entries_json)
|
functools.partial(self._fetch_page, list_id),
|
||||||
entries = [{
|
math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE),
|
||||||
'_type': 'url',
|
self._PAGE_SIZE)
|
||||||
'ie_key': NiconicoIE.ie_key(),
|
result = self.playlist_result(
|
||||||
'url': ('http://www.nicovideo.jp/watch/%s' %
|
entries, list_id, mylist.get('name'), mylist.get('description'))
|
||||||
entry['item_data']['video_id']),
|
result.update(self._parse_owner(mylist))
|
||||||
} for entry in entries]
|
return result
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'title': self._search_regex(r'\s+name: "(.*?)"', webpage, 'title'),
|
|
||||||
'id': list_id,
|
|
||||||
'entries': entries,
|
|
||||||
}
|
|
||||||
|
|
|
@ -5,10 +5,11 @@ import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
parse_iso8601,
|
|
||||||
float_or_none,
|
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -35,7 +36,7 @@ class NineCNineMediaIE(InfoExtractor):
|
||||||
'$include': '[HasClosedCaptions]',
|
'$include': '[HasClosedCaptions]',
|
||||||
})
|
})
|
||||||
|
|
||||||
if content_package.get('Constraints', {}).get('Security', {}).get('Type'):
|
if try_get(content_package, lambda x: x['Constraints']['Security']['Type']):
|
||||||
raise ExtractorError('This video is DRM protected.', expected=True)
|
raise ExtractorError('This video is DRM protected.', expected=True)
|
||||||
|
|
||||||
manifest_base_url = content_package_url + 'manifest.'
|
manifest_base_url = content_package_url + 'manifest.'
|
||||||
|
@ -52,7 +53,7 @@ class NineCNineMediaIE(InfoExtractor):
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for image in content.get('Images', []):
|
for image in (content.get('Images') or []):
|
||||||
image_url = image.get('Url')
|
image_url = image.get('Url')
|
||||||
if not image_url:
|
if not image_url:
|
||||||
continue
|
continue
|
||||||
|
@ -70,7 +71,7 @@ class NineCNineMediaIE(InfoExtractor):
|
||||||
continue
|
continue
|
||||||
container.append(e_name)
|
container.append(e_name)
|
||||||
|
|
||||||
season = content.get('Season', {})
|
season = content.get('Season') or {}
|
||||||
|
|
||||||
info = {
|
info = {
|
||||||
'id': content_id,
|
'id': content_id,
|
||||||
|
@ -79,13 +80,14 @@ class NineCNineMediaIE(InfoExtractor):
|
||||||
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
|
'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
|
||||||
'episode_number': int_or_none(content.get('Episode')),
|
'episode_number': int_or_none(content.get('Episode')),
|
||||||
'season': season.get('Name'),
|
'season': season.get('Name'),
|
||||||
'season_number': season.get('Number'),
|
'season_number': int_or_none(season.get('Number')),
|
||||||
'season_id': season.get('Id'),
|
'season_id': season.get('Id'),
|
||||||
'series': content.get('Media', {}).get('Name'),
|
'series': try_get(content, lambda x: x['Media']['Name']),
|
||||||
'tags': tags,
|
'tags': tags,
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
'duration': float_or_none(content_package.get('Duration')),
|
'duration': float_or_none(content_package.get('Duration')),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
}
|
}
|
||||||
|
|
||||||
if content_package.get('HasClosedCaptions'):
|
if content_package.get('HasClosedCaptions'):
|
||||||
|
|
|
@ -1,104 +1,130 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import str_to_int
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
unescapeHTML,
|
||||||
|
url_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class NineGagIE(InfoExtractor):
|
class NineGagIE(InfoExtractor):
|
||||||
IE_NAME = '9gag'
|
IE_NAME = '9gag'
|
||||||
_VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?'
|
_VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
|
'url': 'https://9gag.com/gag/ae5Ag7B',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'kXzwOKyGlSA',
|
'id': 'ae5Ag7B',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
|
'title': 'Capybara Agility Training',
|
||||||
'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
|
'upload_date': '20191108',
|
||||||
'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
|
'timestamp': 1573237208,
|
||||||
'uploader': 'CompilationChannel',
|
'categories': ['Awesome'],
|
||||||
'upload_date': '20131110',
|
'tags': ['Weimaraner', 'American Pit Bull Terrier'],
|
||||||
'view_count': int,
|
'duration': 44,
|
||||||
},
|
'like_count': int,
|
||||||
'add_ie': ['Youtube'],
|
'dislike_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://9gag.com/tv/p/aKolP3',
|
# HTML escaped title
|
||||||
'info_dict': {
|
'url': 'https://9gag.com/gag/av5nvyb',
|
||||||
'id': 'aKolP3',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video',
|
|
||||||
'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!",
|
|
||||||
'uploader_id': 'rickmereki',
|
|
||||||
'uploader': 'Rick Mereki',
|
|
||||||
'upload_date': '20110803',
|
|
||||||
'view_count': int,
|
|
||||||
},
|
|
||||||
'add_ie': ['Vimeo'],
|
|
||||||
}, {
|
|
||||||
'url': 'http://9gag.com/tv/p/KklwM',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://9gag.tv/p/Kk2X5',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'http://9gag.com/tv/embed/a5Dmvl',
|
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_EXTERNAL_VIDEO_PROVIDER = {
|
|
||||||
'1': {
|
|
||||||
'url': '%s',
|
|
||||||
'ie_key': 'Youtube',
|
|
||||||
},
|
|
||||||
'2': {
|
|
||||||
'url': 'http://player.vimeo.com/video/%s',
|
|
||||||
'ie_key': 'Vimeo',
|
|
||||||
},
|
|
||||||
'3': {
|
|
||||||
'url': 'http://instagram.com/p/%s',
|
|
||||||
'ie_key': 'Instagram',
|
|
||||||
},
|
|
||||||
'4': {
|
|
||||||
'url': 'http://vine.co/v/%s',
|
|
||||||
'ie_key': 'Vine',
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
post_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
post = self._download_json(
|
||||||
display_id = mobj.group('display_id') or video_id
|
'https://9gag.com/v1/post', post_id, query={
|
||||||
|
'id': post_id
|
||||||
|
})['data']['post']
|
||||||
|
|
||||||
webpage = self._download_webpage(url, display_id)
|
if post.get('type') != 'Animated':
|
||||||
|
raise ExtractorError(
|
||||||
|
'The given url does not contain a video',
|
||||||
|
expected=True)
|
||||||
|
|
||||||
post_view = self._parse_json(
|
title = unescapeHTML(post['title'])
|
||||||
self._search_regex(
|
|
||||||
r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost',
|
|
||||||
webpage, 'post view'),
|
|
||||||
display_id)
|
|
||||||
|
|
||||||
ie_key = None
|
duration = None
|
||||||
source_url = post_view.get('sourceUrl')
|
formats = []
|
||||||
if not source_url:
|
thumbnails = []
|
||||||
external_video_id = post_view['videoExternalId']
|
for key, image in (post.get('images') or {}).items():
|
||||||
external_video_provider = post_view['videoExternalProvider']
|
image_url = url_or_none(image.get('url'))
|
||||||
source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id
|
if not image_url:
|
||||||
ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key']
|
continue
|
||||||
title = post_view['title']
|
ext = determine_ext(image_url)
|
||||||
description = post_view.get('description')
|
image_id = key.strip('image')
|
||||||
view_count = str_to_int(post_view.get('externalView'))
|
common = {
|
||||||
thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
|
'url': image_url,
|
||||||
|
'width': int_or_none(image.get('width')),
|
||||||
|
'height': int_or_none(image.get('height')),
|
||||||
|
}
|
||||||
|
if ext in ('jpg', 'png'):
|
||||||
|
webp_url = image.get('webpUrl')
|
||||||
|
if webp_url:
|
||||||
|
t = common.copy()
|
||||||
|
t.update({
|
||||||
|
'id': image_id + '-webp',
|
||||||
|
'url': webp_url,
|
||||||
|
})
|
||||||
|
thumbnails.append(t)
|
||||||
|
common.update({
|
||||||
|
'id': image_id,
|
||||||
|
'ext': ext,
|
||||||
|
})
|
||||||
|
thumbnails.append(common)
|
||||||
|
elif ext in ('webm', 'mp4'):
|
||||||
|
if not duration:
|
||||||
|
duration = int_or_none(image.get('duration'))
|
||||||
|
common['acodec'] = 'none' if image.get('hasAudio') == 0 else None
|
||||||
|
for vcodec in ('vp8', 'vp9', 'h265'):
|
||||||
|
c_url = image.get(vcodec + 'Url')
|
||||||
|
if not c_url:
|
||||||
|
continue
|
||||||
|
c_f = common.copy()
|
||||||
|
c_f.update({
|
||||||
|
'format_id': image_id + '-' + vcodec,
|
||||||
|
'url': c_url,
|
||||||
|
'vcodec': vcodec,
|
||||||
|
})
|
||||||
|
formats.append(c_f)
|
||||||
|
common.update({
|
||||||
|
'ext': ext,
|
||||||
|
'format_id': image_id,
|
||||||
|
})
|
||||||
|
formats.append(common)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
section = try_get(post, lambda x: x['postSection']['name'])
|
||||||
|
|
||||||
|
tags = None
|
||||||
|
post_tags = post.get('tags')
|
||||||
|
if post_tags:
|
||||||
|
tags = []
|
||||||
|
for tag in post_tags:
|
||||||
|
tag_key = tag.get('key')
|
||||||
|
if not tag_key:
|
||||||
|
continue
|
||||||
|
tags.append(tag_key)
|
||||||
|
|
||||||
|
get_count = lambda x: int_or_none(post.get(x + 'Count'))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'url_transparent',
|
'id': post_id,
|
||||||
'url': source_url,
|
|
||||||
'ie_key': ie_key,
|
|
||||||
'id': video_id,
|
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'timestamp': int_or_none(post.get('creationTs')),
|
||||||
'view_count': view_count,
|
'duration': duration,
|
||||||
'thumbnail': thumbnail,
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'like_count': get_count('upVote'),
|
||||||
|
'dislike_count': get_count('downVote'),
|
||||||
|
'comment_count': get_count('comments'),
|
||||||
|
'age_limit': 18 if post.get('nsfw') == 1 else None,
|
||||||
|
'categories': [section] if section else None,
|
||||||
|
'tags': tags,
|
||||||
}
|
}
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue