Merge branch 'ytdl-backports' into 'master'

youtube-dl backports Closes #26, #23, and #1 See merge request laudompat/haruhi-dl!3
2021-02-26 19:28:28 +00:00 · 2021-02-26 19:28:28 +00:00 · 7b16bb6509
parent 37d16d8dbf 67692545da
commit 7b16bb6509
194 changed files with 11750 additions and 6551 deletions
--- a/devscripts/copykitku-patch-hook.js
+++ b/devscripts/copykitku-patch-hook.js
@ -4,16 +4,18 @@
 module.exports = function patchHook(patchContent) {
    [
        [/(?:youtube-|yt-?)dl\.org/g, 'haruhi.download'],
        [/youtube_dl/g, 'haruhi_dl'],
        [/youtube-dl/g, 'haruhi-dl'],
        [/youtubedl/g, 'haruhidl'],
        [/YoutubeDL/g, 'HaruhiDL'],
        [/ytdl/g, 'hdl'],
        [/(?:youtube-|yt-?)dl\.org/g, 'haruhi.download'],
        [/yt-dl/g, 'h-dl'],
        [/ydl/g, 'hdl'],
        // prevent from linking to non-existent repository
        [/github\.com\/ytdl-org\/haruhi-dl/g, 'github.com/ytdl-org/youtube-dl'],
        [/github\.com\/rg3\/haruhi-dl/g, 'github.com/ytdl-org/youtube-dl'],
        // prevent changing the smuggle URLs (for compatibility with ytdl)
        [/__haruhidl_smuggle/g, '__youtubedl_smuggle'],
    ].forEach(([regex, replacement]) => patchContent = patchContent.replace(regex, replacement));
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@ -77,7 +77,7 @@ def build_lazy_ie(ie, name):
    return s
-# find the correct sorting and add the required base classes so that sublcasses
+# find the correct sorting and add the required base classes so that subclasses
 # can be correctly created
 classes = _ALL_CLASSES[:-1]
 ordered_cls = []
--- a/haruhi_dl/HaruhiDL.py
+++ b/haruhi_dl/HaruhiDL.py
@ -163,6 +163,7 @@ class HaruhiDL(object):
    simulate:          Do not download the video files.
    format:            Video format code. See options.py for more information.
    outtmpl:           Template for output names.
    outtmpl_na_placeholder: Placeholder for unavailable meta fields.
    restrictfilenames: Do not allow "&" and spaces in file names
    ignoreerrors:      Do not stop on download errors.
    force_generic_extractor: Force downloader to use the generic extractor
@ -338,6 +339,8 @@ class HaruhiDL(object):
    _pps = []
    _download_retcode = None
    _num_downloads = None
    _playlist_level = 0
    _playlist_urls = set()
    _screen_file = None
    def __init__(self, params=None, auto_init=True):
@ -660,7 +663,7 @@ class HaruhiDL(object):
            template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
                                 for k, v in template_dict.items()
                                 if v is not None and not isinstance(v, (list, tuple, dict)))
-            template_dict = collections.defaultdict(lambda: 'NA', template_dict)
+            template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
            outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
@ -680,8 +683,8 @@ class HaruhiDL(object):
            # Missing numeric fields used together with integer presentation types
            # in format specification will break the argument substitution since
-            # string 'NA' is returned for missing fields. We will patch output
+            # string NA placeholder is returned for missing fields. We will patch
-            # template for missing fields to meet string presentation type.
+            # output template for missing fields to meet string presentation type.
            for numeric_field in self._NUMERIC_FIELDS:
                if numeric_field not in template_dict:
                    # As of [1] format syntax is:
@ -797,10 +800,37 @@ class HaruhiDL(object):
                self.report_warning('The program functionality for this site has been marked as broken, '
                                    'and will probably not work.')
            return self.__extract_info(url, ie, download, extra_info, process)
        else:
            self.report_error('no suitable InfoExtractor for URL %s' % url)
    def __handle_extraction_exceptions(func):
        def wrapper(self, *args, **kwargs):
            try:
                return func(self, *args, **kwargs)
            except GeoRestrictedError as e:
                msg = e.msg
                if e.countries:
                    msg += '\nThis video is available in %s.' % ', '.join(
                        map(ISO3166Utils.short2full, e.countries))
                msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
                self.report_error(msg)
            except ExtractorError as e:  # An error we somewhat expected
                self.report_error(compat_str(e), e.format_traceback())
            except MaxDownloadsReached:
                raise
            except Exception as e:
                if self.params.get('ignoreerrors', False):
                    self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
                else:
                    raise
        return wrapper
    @__handle_extraction_exceptions
    def __extract_info(self, url, ie, download, extra_info, process):
        ie_result = ie.extract(url)
        if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
-                    break
+            return
        if isinstance(ie_result, list):
            # Backwards compatibility: old IE result format
            ie_result = {
@ -812,27 +842,6 @@ class HaruhiDL(object):
            return self.process_ie_result(ie_result, download, extra_info)
        else:
            return ie_result
            except GeoRestrictedError as e:
                msg = e.msg
                if e.countries:
                    msg += '\nThis video is available in %s.' % ', '.join(
                        map(ISO3166Utils.short2full, e.countries))
                msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
                self.report_error(msg)
                break
            except ExtractorError as e:  # An error we somewhat expected
                self.report_error(compat_str(e), e.format_traceback())
                break
            except MaxDownloadsReached:
                raise
            except Exception as e:
                if self.params.get('ignoreerrors', False):
                    self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
                    break
                else:
                    raise
        else:
            self.report_error('no suitable InfoExtractor for URL %s' % url)
    def add_default_extra_info(self, ie_result, ie, url):
        self.add_extra_info(ie_result, {
@ -904,8 +913,51 @@ class HaruhiDL(object):
            return self.process_ie_result(
                new_result, download=download, extra_info=extra_info)
        elif result_type in ('playlist', 'multi_video'):
            # Protect from infinite recursion due to recursively nested playlists
            # (see https://github.com/hdl-org/haruhi-dl/issues/27833)
            webpage_url = ie_result['webpage_url']
            if webpage_url in self._playlist_urls:
                self.to_screen(
                    '[download] Skipping already downloaded playlist: %s'
                    % ie_result.get('title') or ie_result.get('id'))
                return
            self._playlist_level += 1
            self._playlist_urls.add(webpage_url)
            try:
                return self.__process_playlist(ie_result, download)
            finally:
                self._playlist_level -= 1
                if not self._playlist_level:
                    self._playlist_urls.clear()
        elif result_type == 'compat_list':
            self.report_warning(
                'Extractor %s returned a compat_list result. '
                'It needs to be updated.' % ie_result.get('extractor'))
            def _fixup(r):
                self.add_extra_info(
                    r,
                    {
                        'extractor': ie_result['extractor'],
                        'webpage_url': ie_result['webpage_url'],
                        'webpage_url_basename': url_basename(ie_result['webpage_url']),
                        'extractor_key': ie_result['extractor_key'],
                    }
                )
                return r
            ie_result['entries'] = [
                self.process_ie_result(_fixup(r), download, extra_info)
                for r in ie_result['entries']
            ]
            return ie_result
        else:
            raise Exception('Invalid result type: %s' % result_type)
    def __process_playlist(self, ie_result, download):
        # We process each entry in the playlist
        playlist = ie_result.get('title') or ie_result.get('id')
        self.to_screen('[download] Downloading playlist: %s' % playlist)
        playlist_results = []
@ -1007,36 +1059,17 @@ class HaruhiDL(object):
                self.to_screen('[download] ' + reason)
                continue
-                entry_result = self.process_ie_result(entry,
+            entry_result = self.__process_iterable_entry(entry, download, extra)
-                                                      download=download,
+            # TODO: skip failed (empty) entries?
                                                      extra_info=extra)
            playlist_results.append(entry_result)
        ie_result['entries'] = playlist_results
        self.to_screen('[download] Finished downloading playlist: %s' % playlist)
        return ie_result
        elif result_type == 'compat_list':
            self.report_warning(
                'Extractor %s returned a compat_list result. '
                'It needs to be updated.' % ie_result.get('extractor'))
-            def _fixup(r):
+    @__handle_extraction_exceptions
-                self.add_extra_info(
+    def __process_iterable_entry(self, entry, download, extra_info):
-                    r,
+        return self.process_ie_result(
-                    {
+            entry, download=download, extra_info=extra_info)
                        'extractor': ie_result['extractor'],
                        'webpage_url': ie_result['webpage_url'],
                        'webpage_url_basename': url_basename(ie_result['webpage_url']),
                        'extractor_key': ie_result['extractor_key'],
                    }
                )
                return r
            ie_result['entries'] = [
                self.process_ie_result(_fixup(r), download, extra_info)
                for r in ie_result['entries']
            ]
            return ie_result
        else:
            raise Exception('Invalid result type: %s' % result_type)
    def _build_format_filter(self, filter_spec):
        " Returns a function to filter the formats according to the filter_spec "
@ -1077,7 +1110,7 @@ class HaruhiDL(object):
                '*=': lambda attr, value: value in attr,
            }
            str_operator_rex = re.compile(r'''(?x)
-                \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
+                \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id|language)
                \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
                \s*(?P<value>[a-zA-Z0-9._-]+)
                \s*$
@ -1220,6 +1253,8 @@ class HaruhiDL(object):
                        group = _parse_format_selection(tokens, inside_group=True)
                        current_selector = FormatSelector(GROUP, group, [])
                    elif string == '+':
                        if inside_merge:
                            raise syntax_error('Unexpected "+"', start)
                        video_selector = current_selector
                        audio_selector = _parse_format_selection(tokens, inside_merge=True)
                        if not video_selector or not audio_selector:
@ -1604,7 +1639,7 @@ class HaruhiDL(object):
        if req_format is None:
            req_format = self._default_format_spec(info_dict, download=download)
            if self.params.get('verbose'):
-                self.to_stdout('[debug] Default format spec: %s' % req_format)
+                self._write_string('[debug] Default format spec: %s\n' % req_format)
        format_selector = self.build_format_selector(req_format)
@ -1771,6 +1806,8 @@ class HaruhiDL(object):
                    os.makedirs(dn)
                return True
            except (OSError, IOError) as err:
                if isinstance(err, OSError) and err.errno == errno.EEXIST:
                    return True
                self.report_error('unable to create directory ' + error_to_compat_str(err))
                return False
@ -1865,7 +1902,7 @@ class HaruhiDL(object):
                    for ph in self._progress_hooks:
                        fd.add_progress_hook(ph)
                    if self.params.get('verbose'):
-                        self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
+                        self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
                    return fd.download(name, info)
                if info_dict.get('requested_formats') is not None:
@ -2404,7 +2441,7 @@ class HaruhiDL(object):
            thumb_ext = determine_ext(t['url'], 'jpg')
            suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
            thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
-            t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+            t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
                self.to_screen('[%s] %s: Thumbnail %sis already present' %
--- a/haruhi_dl/init.py
+++ b/haruhi_dl/init.py
@ -340,6 +340,7 @@ def _real_main(argv=None):
        'format': opts.format,
        'listformats': opts.listformats,
        'outtmpl': outtmpl,
        'outtmpl_na_placeholder': opts.outtmpl_na_placeholder,
        'autonumber_size': opts.autonumber_size,
        'autonumber_start': opts.autonumber_start,
        'restrictfilenames': opts.restrictfilenames,
--- a/haruhi_dl/downloader/fragment.py
+++ b/haruhi_dl/downloader/fragment.py
@ -97,12 +97,15 @@ class FragmentFD(FileDownloader):
    def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
        fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
-        success = ctx['dl'].download(fragment_filename, {
+        fragment_info_dict = {
            'url': frag_url,
            'http_headers': headers or info_dict.get('http_headers'),
-        })
+        }
        success = ctx['dl'].download(fragment_filename, fragment_info_dict)
        if not success:
            return False, None
        if fragment_info_dict.get('filetime'):
            ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
        down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
        ctx['fragment_filename_sanitized'] = frag_sanitized
        frag_content = down.read()
@ -258,6 +261,13 @@ class FragmentFD(FileDownloader):
            downloaded_bytes = ctx['complete_frags_downloaded_bytes']
        else:
            self.try_rename(ctx['tmpfilename'], ctx['filename'])
            if self.params.get('updatetime', True):
                filetime = ctx.get('fragment_filetime')
                if filetime:
                    try:
                        os.utime(ctx['filename'], (time.time(), filetime))
                    except Exception:
                        pass
            downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
        self._hook_progress({
--- a/haruhi_dl/downloader/hls.py
+++ b/haruhi_dl/downloader/hls.py
@ -42,11 +42,13 @@ class HlsFD(FragmentFD):
            # no segments will definitely be appended to the end of the playlist.
            # r'#EXT-X-PLAYLIST-TYPE:EVENT',  # media segments may be appended to the end of
            #                                 # event media playlists [4]
            r'#EXT-X-MAP:',  # media initialization [5]
            # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
            # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
            # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
            # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
            # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5
        )
        check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
        is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
@ -170,6 +172,10 @@ class HlsFD(FragmentFD):
                        iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
                        decrypt_info['KEY'] = decrypt_info.get('KEY') or self.hdl.urlopen(
                            self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
                        # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
                        # size (see https://github.com/hdl-org/haruhi-dl/pull/27660). Tests only care that the correct data downloaded,
                        # not what it decrypts to.
                        if not test:
                            frag_content = AES.new(
                                decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
                    self._append_fragment(ctx, frag_content)
--- a/haruhi_dl/downloader/http.py
+++ b/haruhi_dl/downloader/http.py
@ -109,7 +109,9 @@ class HttpFD(FileDownloader):
                try:
                    ctx.data = self.hdl.urlopen(request)
                except (compat_urllib_error.URLError, ) as err:
-                    if isinstance(err.reason, socket.timeout):
+                    # reason may not be available, e.g. for urllib2.HTTPError on python 2.6
                    reason = getattr(err, 'reason', None)
                    if isinstance(reason, socket.timeout):
                        raise RetryDownload(err)
                    raise err
                # When trying to resume, Content-Range HTTP header of response has to be checked
--- a/haruhi_dl/extractor/abcnews.py
+++ b/haruhi_dl/extractor/abcnews.py
@ -1,14 +1,15 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import calendar
 import re
 import time
 from .amp import AMPIE
 from .common import InfoExtractor
-from .youtube import YoutubeIE
+from ..utils import (
-from ..compat import compat_urlparse
+    parse_duration,
    parse_iso8601,
    try_get,
 )
 class AbcNewsVideoIE(AMPIE):
@ -18,8 +19,8 @@ class AbcNewsVideoIE(AMPIE):
                        (?:
                            abcnews\.go\.com/
                            (?:
-                                [^/]+/video/(?P<display_id>[0-9a-z-]+)-|
+                                (?:[^/]+/)*video/(?P<display_id>[0-9a-z-]+)-|
-                                video/embed\?.*?\bid=
+                                video/(?:embed|itemfeed)\?.*?\bid=
                            )|
                            fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
                        )
@ -36,6 +37,8 @@ class AbcNewsVideoIE(AMPIE):
            'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
            'duration': 180,
            'thumbnail': r're:^https?://.*\.jpg$',
            'timestamp': 1380454200,
            'upload_date': '20130929',
        },
        'params': {
            # m3u8 download
@ -47,6 +50,12 @@ class AbcNewsVideoIE(AMPIE):
    }, {
        'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
        'only_matching': True,
    }, {
        'url': 'http://abcnews.go.com/video/itemfeed?id=46979033',
        'only_matching': True,
    }, {
        'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761',
        'only_matching': True,
    }]
    def _real_extract(self, url):
@ -67,28 +76,23 @@ class AbcNewsIE(InfoExtractor):
    _VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
    _TESTS = [{
-        'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
+        # Youtube Embeds
        'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501',
        'info_dict': {
-            'id': '10505354',
+            'id': '51286501',
-            'ext': 'flv',
+            'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player",
-            'display_id': 'dramatic-video-rare-death-job-america',
+            'description': 'Billingsley went from a child actor to Hollywood power player.',
            'title': 'Occupational Hazards',
            'description': 'Nightline investigates the dangers that lurk at various jobs.',
            'thumbnail': r're:^https?://.*\.jpg$',
            'upload_date': '20100428',
            'timestamp': 1272412800,
        },
-        'add_ie': ['AbcNewsVideo'],
+        'playlist_count': 5,
    }, {
        'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
        'info_dict': {
            'id': '38897857',
            'ext': 'mp4',
            'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
            'title': 'Justin Timberlake Drops Hints For Secret Single',
            'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
-            'upload_date': '20160515',
+            'upload_date': '20160505',
-            'timestamp': 1463329500,
+            'timestamp': 1462442280,
        },
        'params': {
            # m3u8 download
@ -100,49 +104,55 @@ class AbcNewsIE(InfoExtractor):
    }, {
        'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
        'only_matching': True,
    }, {
        # inline.type == 'video'
        'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
        'only_matching': True,
    }]
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        story_id = self._match_id(url)
-        display_id = mobj.group('display_id')
+        webpage = self._download_webpage(url, story_id)
-        video_id = mobj.group('id')
+        story = self._parse_json(self._search_regex(
            r"window\['__abcnews__'\]\s*=\s*({.+?});",
            webpage, 'data'), story_id)['page']['content']['story']['everscroll'][0]
        article_contents = story.get('articleContents') or {}
-        webpage = self._download_webpage(url, video_id)
+        def entries():
-        video_url = self._search_regex(
+            featured_video = story.get('featuredVideo') or {}
-            r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
+            feed = try_get(featured_video, lambda x: x['video']['feed'])
-        full_video_url = compat_urlparse.urljoin(url, video_url)
+            if feed:
-
+                yield {
-        youtube_url = YoutubeIE._extract_url(webpage)
+                    '_type': 'url',
-
+                    'id': featured_video.get('id'),
-        timestamp = None
+                    'title': featured_video.get('name'),
-        date_str = self._html_search_regex(
+                    'url': feed,
-            r'<span[^>]+class="timestamp">([^<]+)</span>',
+                    'thumbnail': featured_video.get('images'),
-            webpage, 'timestamp', fatal=False)
+                    'description': featured_video.get('description'),
-        if date_str:
+                    'timestamp': parse_iso8601(featured_video.get('uploadDate')),
-            tz_offset = 0
+                    'duration': parse_duration(featured_video.get('duration')),
            if date_str.endswith(' ET'):  # Eastern Time
                tz_offset = -5
                date_str = date_str[:-3]
            date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
            for date_format in date_formats:
                try:
                    timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
                except ValueError:
                    continue
            if timestamp is not None:
                timestamp -= tz_offset * 3600
        entry = {
            '_type': 'url_transparent',
                    'ie_key': AbcNewsVideoIE.ie_key(),
            'url': full_video_url,
            'id': video_id,
            'display_id': display_id,
            'timestamp': timestamp,
                }
-        if youtube_url:
+            for inline in (article_contents.get('inlines') or []):
-            entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
+                inline_type = inline.get('type')
-            return self.playlist_result(entries)
+                if inline_type == 'iframe':
                    iframe_url = try_get(inline, lambda x: x['attrs']['src'])
                    if iframe_url:
                        yield self.url_result(iframe_url)
                elif inline_type == 'video':
                    video_id = inline.get('id')
                    if video_id:
                        yield {
                            '_type': 'url',
                            'id': video_id,
                            'url': 'http://abcnews.go.com/video/embed?id=' + video_id,
                            'thumbnail': inline.get('imgSrc') or inline.get('imgDefault'),
                            'description': inline.get('description'),
                            'duration': parse_duration(inline.get('duration')),
                            'ie_key': AbcNewsVideoIE.ie_key(),
                        }
-        return entry
+        return self.playlist_result(
            entries(), story_id, article_contents.get('headline'),
            article_contents.get('subHead'))
--- a/haruhi_dl/extractor/acast.py
+++ b/haruhi_dl/extractor/acast.py
@ -2,21 +2,48 @@
 from __future__ import unicode_literals
 import re
 import functools
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    clean_html,
-    float_or_none,
+    clean_podcast_url,
    int_or_none,
-    try_get,
+    parse_iso8601,
    unified_timestamp,
    OnDemandPagedList,
 )
-class ACastIE(InfoExtractor):
+class ACastBaseIE(InfoExtractor):
    def _extract_episode(self, episode, show_info):
        title = episode['title']
        info = {
            'id': episode['id'],
            'display_id': episode.get('episodeUrl'),
            'url': clean_podcast_url(episode['url']),
            'title': title,
            'description': clean_html(episode.get('description') or episode.get('summary')),
            'thumbnail': episode.get('image'),
            'timestamp': parse_iso8601(episode.get('publishDate')),
            'duration': int_or_none(episode.get('duration')),
            'filesize': int_or_none(episode.get('contentLength')),
            'season_number': int_or_none(episode.get('season')),
            'episode': title,
            'episode_number': int_or_none(episode.get('episode')),
        }
        info.update(show_info)
        return info
    def _extract_show_info(self, show):
        return {
            'creator': show.get('author'),
            'series': show.get('title'),
        }
    def _call_api(self, path, video_id, query=None):
        return self._download_json(
            'https://feeder.acast.com/api/v1/shows/' + path, video_id, query=query)
 class ACastIE(ACastBaseIE):
    IE_NAME = 'acast'
    _VALID_URL = r'''(?x)
                    https?://
@ -28,15 +55,15 @@ class ACastIE(InfoExtractor):
                    '''
    _TESTS = [{
        'url': 'https://www.acast.com/sparpodcast/2.raggarmordet-rosterurdetforflutna',
-        'md5': '16d936099ec5ca2d5869e3a813ee8dc4',
+        'md5': 'f5598f3ad1e4776fed12ec1407153e4b',
        'info_dict': {
            'id': '2a92b283-1a75-4ad8-8396-499c641de0d9',
            'ext': 'mp3',
            'title': '2. Raggarmordet - Röster ur det förflutna',
-            'description': 'md5:4f81f6d8cf2e12ee21a321d8bca32db4',
+            'description': 'md5:a992ae67f4d98f1c0141598f7bebbf67',
            'timestamp': 1477346700,
            'upload_date': '20161024',
-            'duration': 2766.602563,
+            'duration': 2766,
            'creator': 'Anton Berg & Martin Johnson',
            'series': 'Spår',
            'episode': '2. Raggarmordet - Röster ur det förflutna',
@ -45,7 +72,7 @@ class ACastIE(InfoExtractor):
        'url': 'http://embed.acast.com/adambuxton/ep.12-adam-joeschristmaspodcast2015',
        'only_matching': True,
    }, {
-        'url': 'https://play.acast.com/s/rattegangspodden/s04e09-styckmordet-i-helenelund-del-22',
+        'url': 'https://play.acast.com/s/rattegangspodden/s04e09styckmordetihelenelund-del2-2',
        'only_matching': True,
    }, {
        'url': 'https://play.acast.com/s/sparpodcast/2a92b283-1a75-4ad8-8396-499c641de0d9',
@ -54,40 +81,14 @@ class ACastIE(InfoExtractor):
    def _real_extract(self, url):
        channel, display_id = re.match(self._VALID_URL, url).groups()
-        s = self._download_json(
+        episode = self._call_api(
-            'https://feeder.acast.com/api/v1/shows/%s/episodes/%s' % (channel, display_id),
+            '%s/episodes/%s' % (channel, display_id),
-            display_id)
+            display_id, {'showInfo': 'true'})
-        media_url = s['url']
+        return self._extract_episode(
-        if re.search(r'[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}', display_id):
+            episode, self._extract_show_info(episode.get('show') or {}))
            episode_url = s.get('episodeUrl')
            if episode_url:
                display_id = episode_url
            else:
                channel, display_id = re.match(self._VALID_URL, s['link']).groups()
        cast_data = self._download_json(
            'https://play-api.acast.com/splash/%s/%s' % (channel, display_id),
            display_id)['result']
        e = cast_data['episode']
        title = e.get('name') or s['title']
        return {
            'id': compat_str(e['id']),
            'display_id': display_id,
            'url': media_url,
            'title': title,
            'description': e.get('summary') or clean_html(e.get('description') or s.get('description')),
            'thumbnail': e.get('image'),
            'timestamp': unified_timestamp(e.get('publishingDate') or s.get('publishDate')),
            'duration': float_or_none(e.get('duration') or s.get('duration')),
            'filesize': int_or_none(e.get('contentLength')),
            'creator': try_get(cast_data, lambda x: x['show']['author'], compat_str),
            'series': try_get(cast_data, lambda x: x['show']['name'], compat_str),
            'season_number': int_or_none(e.get('seasonNumber')),
            'episode': title,
            'episode_number': int_or_none(e.get('episodeNumber')),
        }
-class ACastChannelIE(InfoExtractor):
+class ACastChannelIE(ACastBaseIE):
    IE_NAME = 'acast:channel'
    _VALID_URL = r'''(?x)
                    https?://
@ -102,34 +103,24 @@ class ACastChannelIE(InfoExtractor):
        'info_dict': {
            'id': '4efc5294-5385-4847-98bd-519799ce5786',
            'title': 'Today in Focus',
-            'description': 'md5:9ba5564de5ce897faeb12963f4537a64',
+            'description': 'md5:c09ce28c91002ce4ffce71d6504abaae',
        },
-        'playlist_mincount': 35,
+        'playlist_mincount': 200,
    }, {
        'url': 'http://play.acast.com/s/ft-banking-weekly',
        'only_matching': True,
    }]
    _API_BASE_URL = 'https://play.acast.com/api/'
    _PAGE_SIZE = 10
    @classmethod
    def suitable(cls, url):
        return False if ACastIE.suitable(url) else super(ACastChannelIE, cls).suitable(url)
    def _fetch_page(self, channel_slug, page):
        casts = self._download_json(
            self._API_BASE_URL + 'channels/%s/acasts?page=%s' % (channel_slug, page),
            channel_slug, note='Download page %d of channel data' % page)
        for cast in casts:
            yield self.url_result(
                'https://play.acast.com/s/%s/%s' % (channel_slug, cast['url']),
                'ACast', cast['id'])
    def _real_extract(self, url):
-        channel_slug = self._match_id(url)
+        show_slug = self._match_id(url)
-        channel_data = self._download_json(
+        show = self._call_api(show_slug, show_slug)
-            self._API_BASE_URL + 'channels/%s' % channel_slug, channel_slug)
+        show_info = self._extract_show_info(show)
-        entries = OnDemandPagedList(functools.partial(
+        entries = []
-            self._fetch_page, channel_slug), self._PAGE_SIZE)
+        for episode in (show.get('episodes') or []):
-        return self.playlist_result(entries, compat_str(
+            entries.append(self._extract_episode(episode, show_info))
-            channel_data['id']), channel_data['name'], channel_data.get('description'))
+        return self.playlist_result(
            entries, show.get('id'), show.get('title'), show.get('description'))
--- a/haruhi_dl/extractor/adn.py
+++ b/haruhi_dl/extractor/adn.py
@ -10,6 +10,7 @@ import random
 from .common import InfoExtractor
 from ..aes import aes_cbc_decrypt
 from ..compat import (
    compat_HTTPError,
    compat_b64decode,
    compat_ord,
 )
@ -18,11 +19,14 @@ from ..utils import (
    bytes_to_long,
    ExtractorError,
    float_or_none,
    int_or_none,
    intlist_to_bytes,
    long_to_bytes,
    pkcs1pad,
    strip_or_none,
-    urljoin,
+    try_get,
    unified_strdate,
    urlencode_postdata,
 )
@ -31,16 +35,30 @@ class ADNIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?animedigitalnetwork\.fr/video/[^/]+/(?P<id>\d+)'
    _TEST = {
        'url': 'http://animedigitalnetwork.fr/video/blue-exorcist-kyoto-saga/7778-episode-1-debut-des-hostilites',
-        'md5': 'e497370d847fd79d9d4c74be55575c7a',
+        'md5': '0319c99885ff5547565cacb4f3f9348d',
        'info_dict': {
            'id': '7778',
            'ext': 'mp4',
-            'title': 'Blue Exorcist - Kyôto Saga - Épisode 1',
+            'title': 'Blue Exorcist - Kyôto Saga - Episode 1',
            'description': 'md5:2f7b5aa76edbc1a7a92cedcda8a528d5',
            'series': 'Blue Exorcist - Kyôto Saga',
            'duration': 1467,
            'release_date': '20170106',
            'comment_count': int,
            'average_rating': float,
            'season_number': 2,
            'episode': 'Début des hostilités',
            'episode_number': 1,
        }
    }
    _NETRC_MACHINE = 'animedigitalnetwork'
    _BASE_URL = 'http://animedigitalnetwork.fr'
-    _RSA_KEY = (0xc35ae1e4356b65a73b551493da94b8cb443491c0aa092a357a5aee57ffc14dda85326f42d716e539a34542a0d3f363adf16c5ec222d713d5997194030ee2e4f0d1fb328c01a81cf6868c090d50de8e169c6b13d1675b9eeed1cbc51e1fffca9b38af07f37abd790924cd3bee59d0257cfda4fe5f3f0534877e21ce5821447d1b, 65537)
+    _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
    _PLAYER_BASE_URL = _API_BASE_URL + 'player/'
    _HEADERS = {}
    _LOGIN_ERR_MESSAGE = 'Unable to log in'
    _RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
    _POS_ALIGN_MAP = {
        'start': 1,
        'end': 3,
@ -54,26 +72,24 @@ class ADNIE(InfoExtractor):
    def _ass_subtitles_timecode(seconds):
        return '%01d:%02d:%02d.%02d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 100)
-    def _get_subtitles(self, sub_path, video_id):
+    def _get_subtitles(self, sub_url, video_id):
-        if not sub_path:
+        if not sub_url:
            return None
        enc_subtitles = self._download_webpage(
-            urljoin(self._BASE_URL, sub_path),
+            sub_url, video_id, 'Downloading subtitles location', fatal=False) or '{}'
            video_id, 'Downloading subtitles location', fatal=False) or '{}'
        subtitle_location = (self._parse_json(enc_subtitles, video_id, fatal=False) or {}).get('location')
        if subtitle_location:
            enc_subtitles = self._download_webpage(
-                urljoin(self._BASE_URL, subtitle_location),
+                subtitle_location, video_id, 'Downloading subtitles data',
-                video_id, 'Downloading subtitles data', fatal=False,
+                fatal=False, headers={'Origin': 'https://animedigitalnetwork.fr'})
                headers={'Origin': 'https://animedigitalnetwork.fr'})
        if not enc_subtitles:
            return None
        # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js
        dec_subtitles = intlist_to_bytes(aes_cbc_decrypt(
            bytes_to_intlist(compat_b64decode(enc_subtitles[24:])),
-            bytes_to_intlist(binascii.unhexlify(self._K + '4b8ef13ec1872730')),
+            bytes_to_intlist(binascii.unhexlify(self._K + 'ab9f52f5baae7c72')),
            bytes_to_intlist(compat_b64decode(enc_subtitles[:24]))
        ))
        subtitles_json = self._parse_json(
@ -117,61 +133,100 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
            }])
        return subtitles
    def _real_initialize(self):
        username, password = self._get_login_info()
        if not username:
            return
        try:
            access_token = (self._download_json(
                self._API_BASE_URL + 'authentication/login', None,
                'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
                data=urlencode_postdata({
                    'password': password,
                    'rememberMe': False,
                    'source': 'Web',
                    'username': username,
                })) or {}).get('accessToken')
            if access_token:
                self._HEADERS = {'authorization': 'Bearer ' + access_token}
        except ExtractorError as e:
            message = None
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
                resp = self._parse_json(
                    e.cause.read().decode(), None, fatal=False) or {}
                message = resp.get('message') or resp.get('code')
            self.report_warning(message or self._LOGIN_ERR_MESSAGE)
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
-        player_config = self._parse_json(self._search_regex(
+        player = self._download_json(
-            r'playerConfig\s*=\s*({.+});', webpage,
+            video_base_url + 'configuration', video_id,
-            'player config', default='{}'), video_id, fatal=False)
+            'Downloading player config JSON metadata',
-        if not player_config:
+            headers=self._HEADERS)['player']
-            config_url = urljoin(self._BASE_URL, self._search_regex(
+        options = player['options']
                r'(?:id="player"|class="[^"]*adn-player-container[^"]*")[^>]+data-url="([^"]+)"',
                webpage, 'config url'))
            player_config = self._download_json(
                config_url, video_id,
                'Downloading player config JSON metadata')['player']
-        video_info = {}
+        user = options['user']
-        video_info_str = self._search_regex(
+        if not user.get('hasAccess'):
-            r'videoInfo\s*=\s*({.+});', webpage,
+            self.raise_login_required()
            'video info', fatal=False)
        if video_info_str:
            video_info = self._parse_json(
                video_info_str, video_id, fatal=False) or {}
-        options = player_config.get('options') or {}
+        token = self._download_json(
-        metas = options.get('metas') or {}
+            user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
-        links = player_config.get('links') or {}
+            video_id, 'Downloading access token', headers={
-        sub_path = player_config.get('subtitles')
+                'x-player-refresh-token': user['refreshToken']
-        error = None
+            }, data=b'')['token']
-        if not links:
+
-            links_url = player_config.get('linksurl') or options['videoUrl']
+        links_url = try_get(options, lambda x: x['video']['url']) or (video_base_url + 'link')
            token = options['token']
        self._K = ''.join([random.choice('0123456789abcdef') for _ in range(16)])
        message = bytes_to_intlist(json.dumps({
            'k': self._K,
                'e': 60,
            't': token,
        }))
        # Sometimes authentication fails for no good reason, retry with
        # a different random padding
        links_data = None
        for _ in range(3):
            padded_message = intlist_to_bytes(pkcs1pad(message, 128))
            n, e = self._RSA_KEY
            encrypted_message = long_to_bytes(pow(bytes_to_long(padded_message), e, n))
            authorization = base64.b64encode(encrypted_message).decode()
            try:
                links_data = self._download_json(
-                urljoin(self._BASE_URL, links_url), video_id,
+                    links_url, video_id, 'Downloading links JSON metadata', headers={
-                'Downloading links JSON metadata', headers={
+                        'X-Player-Token': authorization
-                    'Authorization': 'Bearer ' + authorization,
+                    }, query={
                        'freeWithAds': 'true',
                        'adaptive': 'false',
                        'withMetadata': 'true',
                        'source': 'Web'
                    })
                break
            except ExtractorError as e:
                if not isinstance(e.cause, compat_HTTPError):
                    raise e
                if e.cause.code == 401:
                    # This usually goes away with a different random pkcs1pad, so retry
                    continue
                error = self._parse_json(e.cause.read(), video_id)
                message = error.get('message')
                if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
                    self.raise_geo_restricted(msg=message)
                raise ExtractorError(message)
        else:
            raise ExtractorError('Giving up retrying')
        links = links_data.get('links') or {}
-            metas = metas or links_data.get('meta') or {}
+        metas = links_data.get('metadata') or {}
-            sub_path = sub_path or links_data.get('subtitles') or \
+        sub_url = (links.get('subtitles') or {}).get('all')
-                'index.php?option=com_vodapi&task=subtitles.getJSON&format=json&id=' + video_id
+        video_info = links_data.get('video') or {}
-            sub_path += '&token=' + token
+        title = metas['title']
            error = links_data.get('error')
        title = metas.get('title') or video_info['title']
        formats = []
-        for format_id, qualities in links.items():
+        for format_id, qualities in (links.get('streaming') or {}).items():
            if not isinstance(qualities, dict):
                continue
            for quality, load_balancer_url in qualities.items():
@ -189,19 +244,26 @@ Format: Marked,Start,End,Style,Name,MarginL,MarginR,MarginV,Effect,Text'''
                    for f in m3u8_formats:
                        f['language'] = 'fr'
                formats.extend(m3u8_formats)
        if not error:
            error = options.get('error')
        if not formats and error:
            raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
        self._sort_formats(formats)
        video = (self._download_json(
            self._API_BASE_URL + 'video/%s' % video_id, video_id,
            'Downloading additional video metadata', fatal=False) or {}).get('video') or {}
        show = video.get('show') or {}
        return {
            'id': video_id,
            'title': title,
-            'description': strip_or_none(metas.get('summary') or video_info.get('resume')),
+            'description': strip_or_none(metas.get('summary') or video.get('summary')),
-            'thumbnail': video_info.get('image'),
+            'thumbnail': video_info.get('image') or player.get('image'),
            'formats': formats,
-            'subtitles': self.extract_subtitles(sub_path, video_id),
+            'subtitles': self.extract_subtitles(sub_url, video_id),
-            'episode': metas.get('subtitle') or video_info.get('videoTitle'),
+            'episode': metas.get('subtitle') or video.get('name'),
-            'series': video_info.get('playlistTitle'),
+            'episode_number': int_or_none(video.get('shortNumber')),
            'series': show.get('title'),
            'season_number': int_or_none(video.get('season')),
            'duration': int_or_none(video_info.get('duration') or video.get('duration')),
            'release_date': unified_strdate(video.get('releaseDate')),
            'average_rating': float_or_none(video.get('rating') or metas.get('rating')),
            'comment_count': int_or_none(video.get('commentsCount')),
        }
--- a/haruhi_dl/extractor/aenetworks.py
+++ b/haruhi_dl/extractor/aenetworks.py
@ -5,20 +5,32 @@ import re
 from .theplatform import ThePlatformIE
 from ..utils import (
    extract_attributes,
    ExtractorError,
    GeoRestrictedError,
    int_or_none,
    smuggle_url,
    update_url_query,
-)
+    urlencode_postdata,
 from ..compat import (
    compat_urlparse,
 )
 class AENetworksBaseIE(ThePlatformIE):
    _BASE_URL_REGEX = r'''(?x)https?://
        (?:(?:www|play|watch)\.)?
        (?P<domain>
            (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
            fyi\.tv
        )/'''
    _THEPLATFORM_KEY = 'crazyjava'
    _THEPLATFORM_SECRET = 's3cr3t'
    _DOMAIN_MAP = {
        'history.com': ('HISTORY', 'history'),
        'aetv.com': ('AETV', 'aetv'),
        'mylifetime.com': ('LIFETIME', 'lifetime'),
        'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
        'fyi.tv': ('FYI', 'fyi'),
        'historyvault.com': (None, 'historyvault'),
        'biography.com': (None, 'biography'),
    }
    def _extract_aen_smil(self, smil_url, video_id, auth=None):
        query = {'mbr': 'true'}
@ -31,7 +43,7 @@ class AENetworksBaseIE(ThePlatformIE):
            'assetTypes': 'high_video_s3'
        }, {
            'assetTypes': 'high_video_s3',
-            'switch': 'hls_ingest_fastly'
+            'switch': 'hls_high_fastly',
        }]
        formats = []
        subtitles = {}
@ -44,6 +56,8 @@ class AENetworksBaseIE(ThePlatformIE):
                tp_formats, tp_subtitles = self._extract_theplatform_smil(
                    m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
            except ExtractorError as e:
                if isinstance(e, GeoRestrictedError):
                    raise
                last_e = e
                continue
            formats.extend(tp_formats)
@ -57,24 +71,45 @@ class AENetworksBaseIE(ThePlatformIE):
            'subtitles': subtitles,
        }
    def _extract_aetn_info(self, domain, filter_key, filter_value, url):
        requestor_id, brand = self._DOMAIN_MAP[domain]
        result = self._download_json(
            'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
            filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
        title = result['title']
        video_id = result['id']
        media_url = result['publicUrl']
        theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
            r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
        info = self._parse_theplatform_metadata(theplatform_metadata)
        auth = None
        if theplatform_metadata.get('AETN$isBehindWall'):
            resource = self._get_mvpd_resource(
                requestor_id, theplatform_metadata['title'],
                theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
                theplatform_metadata['ratings'][0]['rating'])
            auth = self._extract_mvpd_auth(
                url, video_id, requestor_id, resource)
        info.update(self._extract_aen_smil(media_url, video_id, auth))
        info.update({
            'title': title,
            'series': result.get('seriesName'),
            'season_number': int_or_none(result.get('tvSeasonNumber')),
            'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
        })
        return info
 class AENetworksIE(AENetworksBaseIE):
    IE_NAME = 'aenetworks'
    IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
-    _VALID_URL = r'''(?x)
+    _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id>
-                    https?://
+        shows/[^/]+/season-\d+/episode-\d+|
                        (?:www\.)?
                        (?P<domain>
                            (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
                            fyi\.tv
                        )/
        (?:
-                            shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|
+            (?:movie|special)s/[^/]+|
-                            movies/(?P<movie_display_id>[^/]+)(?:/full-movie)?|
+            (?:shows/[^/]+/)?videos
-                            specials/(?P<special_display_id>[^/]+)/(?:full-special|preview-)|
+        )/[^/?#&]+
-                            collections/[^/]+/(?P<collection_display_id>[^/]+)
+    )'''
                        )
                    '''
    _TESTS = [{
        'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
        'info_dict': {
@ -91,22 +126,23 @@ class AENetworksIE(AENetworksBaseIE):
            'skip_download': True,
        },
        'add_ie': ['ThePlatform'],
-    }, {
+        'skip': 'This video is only available for users of participating TV providers.',
        'url': 'http://www.history.com/shows/ancient-aliens/season-1',
        'info_dict': {
            'id': '71889446852',
        },
        'playlist_mincount': 5,
    }, {
        'url': 'http://www.mylifetime.com/shows/atlanta-plastic',
        'info_dict': {
            'id': 'SERIES4317',
            'title': 'Atlanta Plastic',
        },
        'playlist_mincount': 2,
    }, {
        'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
-        'only_matching': True
+        'info_dict': {
            'id': '600587331957',
            'ext': 'mp4',
            'title': 'Inlawful Entry',
            'description': 'md5:57c12115a2b384d883fe64ca50529e08',
            'timestamp': 1452634428,
            'upload_date': '20160112',
            'uploader': 'AENE-NEW',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
        'add_ie': ['ThePlatform'],
    }, {
        'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
        'only_matching': True
@ -117,78 +153,125 @@ class AENetworksIE(AENetworksBaseIE):
        'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
        'only_matching': True
    }, {
-        'url': 'https://www.lifetimemovieclub.com/movies/a-killer-among-us',
+        'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
        'only_matching': True
    }, {
        'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
        'only_matching': True
    }, {
        'url': 'https://www.historyvault.com/collections/america-the-story-of-us/westward',
        'only_matching': True
    }, {
        'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
        'only_matching': True
    }, {
        'url': 'http://www.history.com/videos/history-of-valentines-day',
        'only_matching': True
    }, {
        'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
        'only_matching': True
    }]
    _DOMAIN_TO_REQUESTOR_ID = {
        'history.com': 'HISTORY',
        'aetv.com': 'AETV',
        'mylifetime.com': 'LIFETIME',
        'lifetimemovieclub.com': 'LIFETIMEMOVIECLUB',
        'fyi.tv': 'FYI',
    }
    def _real_extract(self, url):
-        domain, show_path, movie_display_id, special_display_id, collection_display_id = re.match(self._VALID_URL, url).groups()
+        domain, canonical = re.match(self._VALID_URL, url).groups()
-        display_id = show_path or movie_display_id or special_display_id or collection_display_id
+        return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
        webpage = self._download_webpage(url, display_id, headers=self.geo_verification_headers())
        if show_path:
            url_parts = show_path.split('/')
            url_parts_len = len(url_parts)
            if url_parts_len == 1:
                entries = []
                for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage):
                    entries.append(self.url_result(
                        compat_urlparse.urljoin(url, season_url_path), 'AENetworks'))
                if entries:
                    return self.playlist_result(
                        entries, self._html_search_meta('aetn:SeriesId', webpage),
                        self._html_search_meta('aetn:SeriesTitle', webpage))
                else:
                    # single season
                    url_parts_len = 2
            if url_parts_len == 2:
                entries = []
                for episode_item in re.findall(r'(?s)<[^>]+class="[^"]*(?:episode|program)-item[^"]*"[^>]*>', webpage):
                    episode_attributes = extract_attributes(episode_item)
                    episode_url = compat_urlparse.urljoin(
                        url, episode_attributes['data-canonical'])
                    entries.append(self.url_result(
                        episode_url, 'AENetworks',
                        episode_attributes.get('data-videoid') or episode_attributes.get('data-video-id')))
                return self.playlist_result(
                    entries, self._html_search_meta('aetn:SeasonId', webpage))
-        video_id = self._html_search_meta('aetn:VideoID', webpage)
+
-        media_url = self._search_regex(
+class AENetworksListBaseIE(AENetworksBaseIE):
-            [r"media_url\s*=\s*'(?P<url>[^']+)'",
+    def _call_api(self, resource, slug, brand, fields):
-             r'data-media-url=(?P<url>(?:https?:)?//[^\s>]+)',
+        return self._download_json(
-             r'data-media-url=(["\'])(?P<url>(?:(?!\1).)+?)\1'],
+            'https://yoga.appsvcs.aetnd.com/graphql',
-            webpage, 'video url', group='url')
+            slug, query={'brand': brand}, data=urlencode_postdata({
-        theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
+                'query': '''{
-            r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
+  %s(slug: "%s") {
-        info = self._parse_theplatform_metadata(theplatform_metadata)
+    %s
-        auth = None
+  }
-        if theplatform_metadata.get('AETN$isBehindWall'):
+}''' % (resource, slug, fields),
-            requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain]
+            }))['data'][resource]
-            resource = self._get_mvpd_resource(
+
-                requestor_id, theplatform_metadata['title'],
+    def _real_extract(self, url):
-                theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
+        domain, slug = re.match(self._VALID_URL, url).groups()
-                theplatform_metadata['ratings'][0]['rating'])
+        _, brand = self._DOMAIN_MAP[domain]
-            auth = self._extract_mvpd_auth(
+        playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
-                url, video_id, requestor_id, resource)
+        base_url = 'http://watch.%s' % domain
-        info.update(self._search_json_ld(webpage, video_id, fatal=False))
+
-        info.update(self._extract_aen_smil(media_url, video_id, auth))
+        entries = []
-        return info
+        for item in (playlist.get(self._ITEMS_KEY) or []):
            doc = self._get_doc(item)
            canonical = doc.get('canonical')
            if not canonical:
                continue
            entries.append(self.url_result(
                base_url + canonical, AENetworksIE.ie_key(), doc.get('id')))
        description = None
        if self._PLAYLIST_DESCRIPTION_KEY:
            description = playlist.get(self._PLAYLIST_DESCRIPTION_KEY)
        return self.playlist_result(
            entries, playlist.get('id'),
            playlist.get(self._PLAYLIST_TITLE_KEY), description)
 class AENetworksCollectionIE(AENetworksListBaseIE):
    IE_NAME = 'aenetworks:collection'
    _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'(?:[^/]+/)*(?:list|collections)/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
    _TESTS = [{
        'url': 'https://watch.historyvault.com/list/america-the-story-of-us',
        'info_dict': {
            'id': '282',
            'title': 'America The Story of Us',
        },
        'playlist_mincount': 12,
    }, {
        'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
        'only_matching': True
    }, {
        'url': 'https://www.historyvault.com/collections/mysteryquest',
        'only_matching': True
    }]
    _RESOURCE = 'list'
    _ITEMS_KEY = 'items'
    _PLAYLIST_TITLE_KEY = 'display_title'
    _PLAYLIST_DESCRIPTION_KEY = None
    _FIELDS = '''id
    display_title
    items {
      ... on ListVideoItem {
        doc {
          canonical
          id
        }
      }
    }'''
    def _get_doc(self, item):
        return item.get('doc') or {}
 class AENetworksShowIE(AENetworksListBaseIE):
    IE_NAME = 'aenetworks:show'
    _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'shows/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
    _TESTS = [{
        'url': 'http://www.history.com/shows/ancient-aliens',
        'info_dict': {
            'id': 'SERIES1574',
            'title': 'Ancient Aliens',
            'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
        },
        'playlist_mincount': 150,
    }]
    _RESOURCE = 'series'
    _ITEMS_KEY = 'episodes'
    _PLAYLIST_TITLE_KEY = 'title'
    _PLAYLIST_DESCRIPTION_KEY = 'description'
    _FIELDS = '''description
    id
    title
    episodes {
      canonical
      id
    }'''
    def _get_doc(self, item):
        return item
 class HistoryTopicIE(AENetworksBaseIE):
@ -204,6 +287,7 @@ class HistoryTopicIE(AENetworksBaseIE):
            'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
            'timestamp': 1375819729,
            'upload_date': '20130806',
            'uploader': 'AENE-NEW',
        },
        'params': {
            # m3u8 download
@ -212,36 +296,47 @@ class HistoryTopicIE(AENetworksBaseIE):
        'add_ie': ['ThePlatform'],
    }]
-    def theplatform_url_result(self, theplatform_url, video_id, query):
+    def _real_extract(self, url):
-        return {
+        display_id = self._match_id(url)
-            '_type': 'url_transparent',
+        return self.url_result(
-            'id': video_id,
+            'http://www.history.com/videos/' + display_id,
-            'url': smuggle_url(
+            AENetworksIE.ie_key())
-                update_url_query(theplatform_url, query),
+
-                {
+
-                    'sig': {
+class HistoryPlayerIE(AENetworksBaseIE):
-                        'key': self._THEPLATFORM_KEY,
+    IE_NAME = 'history:player'
-                        'secret': self._THEPLATFORM_SECRET,
+    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
    _TESTS = []
    def _real_extract(self, url):
        domain, video_id = re.match(self._VALID_URL, url).groups()
        return self._extract_aetn_info(domain, 'id', video_id, url)
 class BiographyIE(AENetworksBaseIE):
    _VALID_URL = r'https?://(?:www\.)?biography\.com/video/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://www.biography.com/video/vincent-van-gogh-full-episode-2075049808',
        'info_dict': {
            'id': '30322987',
            'ext': 'mp4',
            'title': 'Vincent Van Gogh - Full Episode',
            'description': 'A full biography about the most influential 20th century painter, Vincent Van Gogh.',
            'timestamp': 1311970571,
            'upload_date': '20110729',
            'uploader': 'AENE-NEW',
        },
-                    'force_smil_url': True
+        'params': {
-                }),
+            # m3u8 download
-            'ie_key': 'ThePlatform',
+            'skip_download': True,
-        }
+        },
        'add_ie': ['ThePlatform'],
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
-        video_id = self._search_regex(
+        player_url = self._search_regex(
-            r'<phoenix-iframe[^>]+src="[^"]+\btpid=(\d+)', webpage, 'tpid')
+            r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
-        result = self._download_json(
+            webpage, 'player URL')
-            'https://feeds.video.aetnd.com/api/v2/history/videos',
+        return self.url_result(player_url, HistoryPlayerIE.ie_key())
            video_id, query={'filter[id]': video_id})['results'][0]
        title = result['title']
        info = self._extract_aen_smil(result['publicUrl'], video_id)
        info.update({
            'title': title,
            'description': result.get('description'),
            'duration': int_or_none(result.get('duration')),
            'timestamp': int_or_none(result.get('added'), 1000),
        })
        return info
--- a/haruhi_dl/extractor/aljazeera.py
+++ b/haruhi_dl/extractor/aljazeera.py
@ -1,13 +1,16 @@
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 class AlJazeeraIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?:programmes|video)/.*?/(?P<id>[^/]+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?P<type>program/[^/]+|(?:feature|video)s)/\d{4}/\d{1,2}/\d{1,2}/(?P<id>[^/?&#]+)'
    _TESTS = [{
-        'url': 'http://www.aljazeera.com/programmes/the-slum/2014/08/deliverance-201482883754237240.html',
+        'url': 'https://www.aljazeera.com/program/episode/2014/9/19/deliverance',
        'info_dict': {
            'id': '3792260579001',
            'ext': 'mp4',
@ -20,14 +23,34 @@ class AlJazeeraIE(InfoExtractor):
        'add_ie': ['BrightcoveNew'],
        'skip': 'Not accessible from Travis CI server',
    }, {
-        'url': 'http://www.aljazeera.com/video/news/2017/05/sierra-leone-709-carat-diamond-auctioned-170511100111930.html',
+        'url': 'https://www.aljazeera.com/videos/2017/5/11/sierra-leone-709-carat-diamond-to-be-auctioned-off',
        'only_matching': True,
    }, {
        'url': 'https://www.aljazeera.com/features/2017/8/21/transforming-pakistans-buses-into-art',
        'only_matching': True,
    }]
-    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/665003303001/default_default/index.html?videoId=%s'
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
    def _real_extract(self, url):
-        program_name = self._match_id(url)
+        post_type, name = re.match(self._VALID_URL, url).groups()
-        webpage = self._download_webpage(url, program_name)
+        post_type = {
-        brightcove_id = self._search_regex(
+            'features': 'post',
-            r'RenderPagesVideo\(\'(.+?)\'', webpage, 'brightcove id')
+            'program': 'episode',
-        return self.url_result(self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, 'BrightcoveNew', brightcove_id)
+            'videos': 'video',
        }[post_type.split('/')[0]]
        video = self._download_json(
            'https://www.aljazeera.com/graphql', name, query={
                'operationName': 'SingleArticleQuery',
                'variables': json.dumps({
                    'name': name,
                    'postType': post_type,
                }),
            }, headers={
                'wp-site': 'aje',
            })['data']['article']['video']
        video_id = video['id']
        account_id = video.get('accountId') or '665003303001'
        player_id = video.get('playerId') or 'BkeSH5BDb'
        return self.url_result(
            self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
            'BrightcoveNew', video_id)
--- a/haruhi_dl/extractor/amara.py
+++ b/haruhi_dl/extractor/amara.py
@ -0,0 +1,103 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from .youtube import YoutubeIE
 from .vimeo import VimeoIE
 from ..utils import (
    int_or_none,
    parse_iso8601,
    update_url_query,
 )
 class AmaraIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
    _TESTS = [{
        # Youtube
        'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
        'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
        'info_dict': {
            'id': 'h6ZuVdvYnfE',
            'ext': 'mp4',
            'title': 'Why jury trials are becoming less common',
            'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
            'thumbnail': r're:^https?://.*\.jpg$',
            'subtitles': dict,
            'upload_date': '20160813',
            'uploader': 'PBS NewsHour',
            'uploader_id': 'PBSNewsHour',
            'timestamp': 1549639570,
        }
    }, {
        # Vimeo
        'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
        'md5': '99392c75fa05d432a8f11df03612195e',
        'info_dict': {
            'id': '18622084',
            'ext': 'mov',
            'title': 'Vimeo at CES 2011!',
            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
            'thumbnail': r're:^https?://.*\.jpg$',
            'subtitles': dict,
            'timestamp': 1294763658,
            'upload_date': '20110111',
            'uploader': 'Sam Morrill',
            'uploader_id': 'sammorrill'
        }
    }, {
        # Direct Link
        'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
        'md5': 'd3970f08512738ee60c5807311ff5d3f',
        'info_dict': {
            'id': 's8KL7I3jLmh6',
            'ext': 'mp4',
            'title': 'The danger of a single story',
            'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
            'thumbnail': r're:^https?://.*\.jpg$',
            'subtitles': dict,
            'upload_date': '20091007',
            'timestamp': 1254942511,
        }
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        meta = self._download_json(
            'https://amara.org/api/videos/%s/' % video_id,
            video_id, query={'format': 'json'})
        title = meta['title']
        video_url = meta['all_urls'][0]
        subtitles = {}
        for language in (meta.get('languages') or []):
            subtitles_uri = language.get('subtitles_uri')
            if not (subtitles_uri and language.get('published')):
                continue
            subtitle = subtitles.setdefault(language.get('code') or 'en', [])
            for f in ('json', 'srt', 'vtt'):
                subtitle.append({
                    'ext': f,
                    'url': update_url_query(subtitles_uri, {'format': f}),
                })
        info = {
            'url': video_url,
            'id': video_id,
            'subtitles': subtitles,
            'title': title,
            'description': meta.get('description'),
            'thumbnail': meta.get('thumbnail'),
            'duration': int_or_none(meta.get('duration')),
            'timestamp': parse_iso8601(meta.get('created')),
        }
        for ie in (YoutubeIE, VimeoIE):
            if ie.suitable(video_url):
                info.update({
                    '_type': 'url_transparent',
                    'ie_key': ie.ie_key(),
                })
                break
        return info
--- a/haruhi_dl/extractor/amcnetworks.py
+++ b/haruhi_dl/extractor/amcnetworks.py
@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .theplatform import ThePlatformIE
 from ..utils import (
    int_or_none,
@ -11,25 +13,22 @@ from ..utils import (
 class AMCNetworksIE(ThePlatformIE):
-    _VALID_URL = r'https?://(?:www\.)?(?:amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?:movies|shows(?:/[^/]+)+)/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<site>amc|bbcamerica|ifc|(?:we|sundance)tv)\.com/(?P<id>(?:movies|shows(?:/[^/]+)+)/[^/?#&]+)'
    _TESTS = [{
-        'url': 'http://www.ifc.com/shows/maron/season-04/episode-01/step-1',
+        'url': 'https://www.bbcamerica.com/shows/the-graham-norton-show/videos/tina-feys-adorable-airline-themed-family-dinner--51631',
        'md5': '',
        'info_dict': {
-            'id': 's3MX01Nl4vPH',
+            'id': '4Lq1dzOnZGt0',
            'ext': 'mp4',
-            'title': 'Maron - Season 4 - Step 1',
+            'title': "The Graham Norton Show - Season 28 - Tina Fey's Adorable Airline-Themed Family Dinner",
-            'description': 'In denial about his current situation, Marc is reluctantly convinced by his friends to enter rehab. Starring Marc Maron and Constance Zimmer.',
+            'description': "It turns out child stewardesses are very generous with the wine! All-new episodes of 'The Graham Norton Show' premiere Fridays at 11/10c on BBC America.",
-            'age_limit': 17,
+            'upload_date': '20201120',
-            'upload_date': '20160505',
+            'timestamp': 1605904350,
            'timestamp': 1462468831,
            'uploader': 'AMCN',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
        'skip': 'Requires TV provider accounts',
    }, {
        'url': 'http://www.bbcamerica.com/shows/the-hunt/full-episodes/season-1/episode-01-the-hardest-challenge',
        'only_matching': True,
@ -55,32 +54,34 @@ class AMCNetworksIE(ThePlatformIE):
        'url': 'https://www.sundancetv.com/shows/riviera/full-episodes/season-1/episode-01-episode-1',
        'only_matching': True,
    }]
    _REQUESTOR_ID_MAP = {
        'amc': 'AMC',
        'bbcamerica': 'BBCA',
        'ifc': 'IFC',
        'sundancetv': 'SUNDANCE',
        'wetv': 'WETV',
    }
    def _real_extract(self, url):
-        display_id = self._match_id(url)
+        site, display_id = re.match(self._VALID_URL, url).groups()
-        webpage = self._download_webpage(url, display_id)
+        requestor_id = self._REQUESTOR_ID_MAP[site]
        properties = self._download_json(
            'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' % (requestor_id.lower(), display_id),
            display_id)['data']['properties']
        query = {
            'mbr': 'true',
            'manifest': 'm3u',
        }
-        media_url = self._search_regex(
+        tp_path = 'M_UwQC/media/' + properties['videoPid']
-            r'window\.platformLinkURL\s*=\s*[\'"]([^\'"]+)',
+        media_url = 'https://link.theplatform.com/s/' + tp_path
-            webpage, 'media url')
+        theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id)
        theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
            r'link\.theplatform\.com/s/([^?]+)',
            media_url, 'theplatform_path'), display_id)
        info = self._parse_theplatform_metadata(theplatform_metadata)
        video_id = theplatform_metadata['pid']
        title = theplatform_metadata['title']
        rating = try_get(
            theplatform_metadata, lambda x: x['ratings'][0]['rating'])
-        auth_required = self._search_regex(
+        video_category = properties.get('videoCategory')
-            r'window\.authRequired\s*=\s*(true|false);',
+        if video_category and video_category.endswith('-Auth'):
            webpage, 'auth required')
        if auth_required == 'true':
            requestor_id = self._search_regex(
                r'window\.requestor_id\s*=\s*[\'"]([^\'"]+)',
                webpage, 'requestor id')
            resource = self._get_mvpd_resource(
                requestor_id, title, video_id, rating)
            query['auth'] = self._extract_mvpd_auth(
--- a/haruhi_dl/extractor/americastestkitchen.py
+++ b/haruhi_dl/extractor/americastestkitchen.py
@ -1,82 +1,159 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    int_or_none,
    js_to_json,
    try_get,
    unified_strdate,
    unified_timestamp,
 )
 class AmericasTestKitchenIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?(?:americastestkitchen|cooks(?:country|illustrated))\.com/(?P<resource_type>episode|videos)/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
        'md5': 'b861c3e365ac38ad319cfd509c30577f',
        'info_dict': {
            'id': '5b400b9ee338f922cb06450c',
-            'title': 'Weeknight Japanese Suppers',
+            'title': 'Japanese Suppers',
            'ext': 'mp4',
-            'description': 'md5:3d0c1a44bb3b27607ce82652db25b4a8',
+            'description': 'md5:64e606bfee910627efc4b5f050de92b3',
            'thumbnail': r're:^https?://',
-            'timestamp': 1523664000,
+            'timestamp': 1523318400,
-            'upload_date': '20180414',
+            'upload_date': '20180410',
-            'release_date': '20180414',
+            'release_date': '20180410',
            'series': "America's Test Kitchen",
            'season_number': 18,
-            'episode': 'Weeknight Japanese Suppers',
+            'episode': 'Japanese Suppers',
            'episode_number': 15,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        # Metadata parsing behaves differently for newer episodes (705) as opposed to older episodes (582 above)
        'url': 'https://www.americastestkitchen.com/episode/705-simple-chicken-dinner',
        'md5': '06451608c57651e985a498e69cec17e5',
        'info_dict': {
            'id': '5fbe8c61bda2010001c6763b',
            'title': 'Simple Chicken Dinner',
            'ext': 'mp4',
            'description': 'md5:eb68737cc2fd4c26ca7db30139d109e7',
            'thumbnail': r're:^https?://',
            'timestamp': 1610755200,
            'upload_date': '20210116',
            'release_date': '20210116',
            'series': "America's Test Kitchen",
            'season_number': 21,
            'episode': 'Simple Chicken Dinner',
            'episode_number': 3,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
        'only_matching': True,
    }, {
        'url': 'https://www.cookscountry.com/episode/564-when-only-chocolate-will-do',
        'only_matching': True,
    }, {
        'url': 'https://www.cooksillustrated.com/videos/4478-beef-wellington',
        'only_matching': True,
    }]
    def _real_extract(self, url):
-        video_id = self._match_id(url)
+        resource_type, video_id = re.match(self._VALID_URL, url).groups()
        is_episode = resource_type == 'episode'
        if is_episode:
            resource_type = 'episodes'
-        webpage = self._download_webpage(url, video_id)
+        resource = self._download_json(
-
+            'https://www.americastestkitchen.com/api/v6/%s/%s' % (resource_type, video_id), video_id)
-        video_data = self._parse_json(
+        video = resource['video'] if is_episode else resource
-            self._search_regex(
+        episode = resource if is_episode else resource.get('episode') or {}
                r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
                webpage, 'initial context'),
            video_id, js_to_json)
        ep_data = try_get(
            video_data,
            (lambda x: x['episodeDetail']['content']['data'],
             lambda x: x['videoDetail']['content']['data']), dict)
        ep_meta = ep_data.get('full_video', {})
        zype_id = ep_data.get('zype_id') or ep_meta['zype_id']
        title = ep_data.get('title') or ep_meta.get('title')
        description = clean_html(ep_meta.get('episode_description') or ep_data.get(
            'description') or ep_meta.get('description'))
        thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
        release_date = unified_strdate(ep_data.get('aired_at'))
        season_number = int_or_none(ep_meta.get('season_number'))
        episode = ep_meta.get('title')
        episode_number = int_or_none(ep_meta.get('episode_number'))
        return {
            '_type': 'url_transparent',
-            'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id,
+            'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % video['zypeId'],
            'ie_key': 'Zype',
-            'title': title,
+            'description': clean_html(video.get('description')),
-            'description': description,
+            'timestamp': unified_timestamp(video.get('publishDate')),
-            'thumbnail': thumbnail,
+            'release_date': unified_strdate(video.get('publishDate')),
-            'release_date': release_date,
+            'episode_number': int_or_none(episode.get('number')),
-            'series': "America's Test Kitchen",
+            'season_number': int_or_none(episode.get('season')),
-            'season_number': season_number,
+            'series': try_get(episode, lambda x: x['show']['title']),
-            'episode': episode,
+            'episode': episode.get('title'),
            'episode_number': episode_number,
        }
 class AmericasTestKitchenSeasonIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?(?P<show>americastestkitchen|cookscountry)\.com/episodes/browse/season_(?P<id>\d+)'
    _TESTS = [{
        # ATK Season
        'url': 'https://www.americastestkitchen.com/episodes/browse/season_1',
        'info_dict': {
            'id': 'season_1',
            'title': 'Season 1',
        },
        'playlist_count': 13,
    }, {
        # Cooks Country Season
        'url': 'https://www.cookscountry.com/episodes/browse/season_12',
        'info_dict': {
            'id': 'season_12',
            'title': 'Season 12',
        },
        'playlist_count': 13,
    }]
    def _real_extract(self, url):
        show_name, season_number = re.match(self._VALID_URL, url).groups()
        season_number = int(season_number)
        slug = 'atk' if show_name == 'americastestkitchen' else 'cco'
        season = 'Season %d' % season_number
        season_search = self._download_json(
            'https://y1fnzxui30-dsn.algolia.net/1/indexes/everest_search_%s_season_desc_production' % slug,
            season, headers={
                'Origin': 'https://www.%s.com' % show_name,
                'X-Algolia-API-Key': '8d504d0099ed27c1b73708d22871d805',
                'X-Algolia-Application-Id': 'Y1FNZXUI30',
            }, query={
                'facetFilters': json.dumps([
                    'search_season_list:' + season,
                    'search_document_klass:episode',
                    'search_show_slug:' + slug,
                ]),
                'attributesToRetrieve': 'description,search_%s_episode_number,search_document_date,search_url,title' % slug,
                'attributesToHighlight': '',
                'hitsPerPage': 1000,
            })
        def entries():
            for episode in (season_search.get('hits') or []):
                search_url = episode.get('search_url')
                if not search_url:
                    continue
                yield {
                    '_type': 'url',
                    'url': 'https://www.%s.com%s' % (show_name, search_url),
                    'id': try_get(episode, lambda e: e['objectID'].split('_')[-1]),
                    'title': episode.get('title'),
                    'description': episode.get('description'),
                    'timestamp': unified_timestamp(episode.get('search_document_date')),
                    'season_number': season_number,
                    'episode_number': int_or_none(episode.get('search_%s_episode_number' % slug)),
                    'ie_key': AmericasTestKitchenIE.ie_key(),
                }
        return self.playlist_result(
            entries(), 'season_%d' % season_number, season)
--- a/haruhi_dl/extractor/amp.py
+++ b/haruhi_dl/extractor/amp.py
@ -8,6 +8,7 @@ from ..utils import (
    int_or_none,
    mimetype2ext,
    parse_iso8601,
    unified_timestamp,
    url_or_none,
 )
@ -88,7 +89,7 @@ class AMPIE(InfoExtractor):
        self._sort_formats(formats)
-        timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
+        timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
        return {
            'id': video_id,
--- a/haruhi_dl/extractor/animeondemand.py
+++ b/haruhi_dl/extractor/animeondemand.py
@ -116,8 +116,6 @@ class AnimeOnDemandIE(InfoExtractor):
            r'(?s)<div[^>]+itemprop="description"[^>]*>(.+?)</div>',
            webpage, 'anime description', default=None)
        entries = []
        def extract_info(html, video_id, num=None):
            title, description = [None] * 2
            formats = []
@ -233,7 +231,7 @@ class AnimeOnDemandIE(InfoExtractor):
                self._sort_formats(info['formats'])
                f = common_info.copy()
                f.update(info)
-                entries.append(f)
+                yield f
            # Extract teaser/trailer only when full episode is not available
            if not info['formats']:
@ -247,7 +245,7 @@ class AnimeOnDemandIE(InfoExtractor):
                        'title': m.group('title'),
                        'url': urljoin(url, m.group('href')),
                    })
-                    entries.append(f)
+                    yield f
        def extract_episodes(html):
            for num, episode_html in enumerate(re.findall(
@ -275,7 +273,8 @@ class AnimeOnDemandIE(InfoExtractor):
                    'episode_number': episode_number,
                }
-                extract_entries(episode_html, video_id, common_info)
+                for e in extract_entries(episode_html, video_id, common_info):
                    yield e
        def extract_film(html, video_id):
            common_info = {
@ -283,11 +282,18 @@ class AnimeOnDemandIE(InfoExtractor):
                'title': anime_title,
                'description': anime_description,
            }
-            extract_entries(html, video_id, common_info)
+            for e in extract_entries(html, video_id, common_info):
                yield e
-        extract_episodes(webpage)
+        def entries():
            has_episodes = False
            for e in extract_episodes(webpage):
                has_episodes = True
                yield e
-        if not entries:
+            if not has_episodes:
-            extract_film(webpage, anime_id)
+                for e in extract_film(webpage, anime_id):
                    yield e
-        return self.playlist_result(entries, anime_id, anime_title, anime_description)
+        return self.playlist_result(
            entries(), anime_id, anime_title, anime_description)
--- a/haruhi_dl/extractor/anvato.py
+++ b/haruhi_dl/extractor/anvato.py
@ -116,7 +116,76 @@ class AnvatoIE(InfoExtractor):
        'anvato_scripps_app_ios_prod_409c41960c60b308db43c3cc1da79cab9f1c3d93': 'WPxj5GraLTkYCyj3M7RozLqIycjrXOEcDGFMIJPn',
        'EZqvRyKBJLrgpClDPDF8I7Xpdp40Vx73': '4OxGd2dEakylntVKjKF0UK9PDPYB6A9W',
        'M2v78QkpleXm9hPp9jUXI63x5vA6BogR': 'ka6K32k7ZALmpINkjJUGUo0OE42Md1BQ',
-        'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ'
+        'nbcu_nbcd_desktop_web_prod_93d8ead38ce2024f8f544b78306fbd15895ae5e6_secure': 'NNemUkySjxLyPTKvZRiGntBIjEyK8uqicjMakIaQ',
        'X8POa4zPPaKVZHqmWjuEzfP31b1QM9VN': 'Dn5vOY9ooDw7VSl9qztjZI5o0g08mA0z',
        'M2v78QkBMpNJlSPp9diX5F2PBmBy6Bog': 'ka6K32kyo7nDZfNkjQCGWf1lpApXMd1B',
        'bvJ0dQpav07l0hG5JgfVLF2dv1vARwpP': 'BzoQW24GrJZoJfmNodiJKSPeB9B8NOxj',
        'lxQMLg2XZKuEZaWgsqubBxV9INZ6bryY': 'Vm2Mx6noKds9jB71h6urazwlTG3m9x8l',
        '04EnjvXeoSmkbJ9ckPs7oY0mcxv7PlyN': 'aXERQP9LMfQVlEDsgGs6eEA1SWznAQ8P',
        'mQbO2ge6BFRWVPYCYpU06YvNt80XLvAX': 'E2BV1NGmasN5v7eujECVPJgwflnLPm2A',
        'g43oeBzJrCml7o6fa5fRL1ErCdeD8z4K': 'RX34mZ6zVH4Nr6whbxIGLv9WSbxEKo8V',
        'VQrDJoP7mtdBzkxhXbSPwGB1coeElk4x': 'j2VejQx0VFKQepAF7dI0mJLKtOVJE18z',
        'WxA5NzLRjCrmq0NUgaU5pdMDuZO7RJ4w': 'lyY5ADLKaIOLEgAsGQCveEMAcqnx3rY9',
        'M4lpMXB71ie0PjMCjdFzVXq0SeRVqz49': 'n2zVkOqaLIv3GbLfBjcwW51LcveWOZ2e',
        'dyDZGEqN8u8nkJZcJns0oxYmtP7KbGAn': 'VXOEqQW9BtEVLajfZQSLEqxgS5B7qn2D',
        'E7QNjrVY5u5mGvgu67IoDgV1CjEND8QR': 'rz8AaDmdKIkLmPNhB5ILPJnjS5PnlL8d',
        'a4zrqjoKlfzg0dwHEWtP31VqcLBpjm4g': 'LY9J16gwETdGWa3hjBu5o0RzuoQDjqXQ',
        'dQP5BZroMsMVLO1hbmT5r2Enu86GjxA6': '7XR3oOdbPF6x3PRFLDCq9RkgsRjAo48V',
        'M4lKNBO1NFe0PjMCj1tzVXq0SeRVqzA9': 'n2zoRqGLRUv3GbLfBmTwW51LcveWOZYe',
        'nAZ7MZdpGCGg1pqFEbsoJOz2C60mv143': 'dYJgdqA9aT4yojETqGi7yNgoFADxqmXP',
        '3y1MERYgOuE9NzbFgwhV6Wv2F0YKvbyz': '081xpZDQgC4VadLTavhWQxrku56DAgXV',
        'bmQvmEXr5HWklBMCZOcpE2Z3HBYwqGyl': 'zxXPbVNyMiMAZldhr9FkOmA0fl4aKr2v',
        'wA7oDNYldfr6050Hwxi52lPZiVlB86Ap': 'ZYK16aA7ni0d3l3c34uwpxD7CbReMm8Q',
        'g43MbKMWmFml7o7sJoSRkXxZiXRvJ3QK': 'RX3oBJonvs4Nr6rUWBCGn3matRGqJPXV',
        'mA9VdlqpLS0raGaSDvtoqNrBTzb8XY4q': '0XN4OjBD3fnW7r7IbmtJB4AyfOmlrE2r',
        'mAajOwgkGt17oGoFmEuklMP9H0GnW54d': 'lXbBLPGyzikNGeGujAuAJGjZiwLRxyXR',
        'vy8vjJ9kbUwrRqRu59Cj5dWZfzYErlAb': 'K8l7gpwaGcBpnAnCLNCmPZRdin3eaQX0',
        'xQMWBpR8oHEZaWaSMGUb0avOHjLVYn4Y': 'm2MrN4vEaf9jB7BFy5Srb40jTrN67AYl',
        'xyKEmVO3miRr6D6UVkt7oB8jtD6aJEAv': 'g2ddDebqDfqdgKgswyUKwGjbTWwzq923',
        '7Qk0wa2D9FjKapacoJF27aLvUDKkLGA0': 'b2kgBEkephJaMkMTL7s1PLe4Ua6WyP2P',
        '3QLg6nqmNTJ5VvVTo7f508LPidz1xwyY': 'g2L1GgpraipmAOAUqmIbBnPxHOmw4MYa',
        '3y1B7zZjXTE9NZNSzZSVNPZaTNLjo6Qz': '081b5G6wzH4VagaURmcWbN5mT4JGEe2V',
        'lAqnwvkw6SG6D8DSqmUg6DRLUp0w3G4x': 'O2pbP0xPDFNJjpjIEvcdryOJtpkVM4X5',
        'awA7xd1N0Hr6050Hw2c52lPZiVlB864p': 'GZYKpn4aoT0d3l3c3PiwpxD7CbReMmXQ',
        'jQVqPLl9YHL1WGWtR1HDgWBGT63qRNyV': '6X03ne6vrU4oWyWUN7tQVoajikxJR3Ye',
        'GQRMR8mL7uZK797t7xH3eNzPIP5dOny1': 'm2vqPWGd4U31zWzSyasDRAoMT1PKRp8o',
        'zydq9RdmRhXLkNkfNoTJlMzaF0lWekQB': '3X7LnvE7vH5nkEkSqLiey793Un7dLB8e',
        'VQrDzwkB2IdBzjzu9MHPbEYkSB50gR4x': 'j2VebLzoKUKQeEesmVh0gM1eIp9jKz8z',
        'mAa2wMamBs17oGoFmktklMP9H0GnW54d': 'lXbgP74xZTkNGeGujVUAJGjZiwLRxy8R',
        '7yjB6ZLG6sW8R6RF2xcan1KGfJ5dNoyd': 'wXQkPorvPHZ45N5t4Jf6qwg5Tp4xvw29',
        'a4zPpNeWGuzg0m0iX3tPeanGSkRKWXQg': 'LY9oa3QAyHdGW9Wu3Ri5JGeEik7l1N8Q',
        'k2rneA2M38k25cXDwwSknTJlxPxQLZ6M': '61lyA2aEVDzklfdwmmh31saPxQx2VRjp',
        'bK9Zk4OvPnvxduLgxvi8VUeojnjA02eV': 'o5jANYjbeMb4nfBaQvcLAt1jzLzYx6ze',
        '5VD6EydM3R9orHmNMGInGCJwbxbQvGRw': 'w3zjmX7g4vnxzCxElvUEOiewkokXprkZ',
        '70X35QbVYVYNPUmP9YfbzI06YqYQk2R1': 'vG4Aj2BMjMjoztB7zeFOnCVPJpJ8lMOa',
        '26qYwQVG9p1Bks2GgBckjfDJOXOAMgG1': 'r4ev9X0mv5zqJc0yk5IBDcQOwZw8mnwQ',
        'rvVKpA56MBXWlSxMw3cobT5pdkd4Dm7q': '1J7ZkY53pZ645c93owcLZuveE7E8B3rL',
        'qN1zdy1zlYL23IWZGWtDvfV6WeWQWkJo': 'qN1zdy1zlYL23IWZGWtDvfV6WeWQWkJo',
        'jdKqRGF16dKsBviMDae7IGDl7oTjEbVV': 'Q09l7vhlNxPFErIOK6BVCe7KnwUW5DVV',
        '3QLkogW1OUJ5VvPsrDH56DY2u7lgZWyY': 'g2LRE1V9espmAOPhE4ubj4ZdUA57yDXa',
        'wyJvWbXGBSdbkEzhv0CW8meou82aqRy8': 'M2wolPvyBIpQGkbT4juedD4ruzQGdK2y',
        '7QkdZrzEkFjKap6IYDU2PB0oCNZORmA0': 'b2kN1l96qhJaMkPs9dt1lpjBfwqZoA8P',
        'pvA05113MHG1w3JTYxc6DVlRCjErVz4O': 'gQXeAbblBUnDJ7vujbHvbRd1cxlz3AXO',
        'mA9blJDZwT0raG1cvkuoeVjLC7ZWd54q': '0XN9jRPwMHnW7rvumgfJZOD9CJgVkWYr',
        '5QwRN5qKJTvGKlDTmnf7xwNZcjRmvEy9': 'R2GP6LWBJU1QlnytwGt0B9pytWwAdDYy',
        'eyn5rPPbkfw2KYxH32fG1q58CbLJzM40': 'p2gyqooZnS56JWeiDgfmOy1VugOQEBXn',
        '3BABn3b5RfPJGDwilbHe7l82uBoR05Am': '7OYZG7KMVhbPdKJS3xcWEN3AuDlLNmXj',
        'xA5zNGXD3HrmqMlF6OS5pdMDuZO7RJ4w': 'yY5DAm6r1IOLE3BCVMFveEMAcqnx3r29',
        'g43PgW3JZfml7o6fDEURL1ErCdeD8zyK': 'RX3aQn1zrS4Nr6whDgCGLv9WSbxEKo2V',
        'lAqp8WbGgiG6D8LTKJcg3O72CDdre1Qx': 'O2pnm6473HNJjpKuVosd3vVeh975yrX5',
        'wyJbYEDxKSdbkJ6S6RhW8meou82aqRy8': 'M2wPm7EgRSpQGlAh70CedD4ruzQGdKYy',
        'M4lgW28nLCe0PVdtaXszVXq0SeRVqzA9': 'n2zmJvg4jHv3G0ETNgiwW51LcveWOZ8e',
        '5Qw3OVvp9FvGKlDTmOC7xwNZcjRmvEQ9': 'R2GzDdml9F1Qlnytw9s0B9pytWwAdD8y',
        'vy8a98X7zCwrRqbHrLUjYzwDiK2b70Qb': 'K8lVwzyjZiBpnAaSGeUmnAgxuGOBxmY0',
        'g4eGjJLLoiqRD3Pf9oT5O03LuNbLRDQp': '6XqD59zzpfN4EwQuaGt67qNpSyRBlnYy',
        'g43OPp9boIml7o6fDOIRL1ErCdeD8z4K': 'RX33alNB4s4Nr6whDPUGLv9WSbxEKoXV',
        'xA2ng9OkBcGKzDbTkKsJlx7dUK8R3dA5': 'z2aPnJvzBfObkwGC3vFaPxeBhxoMqZ8K',
        'xyKEgBajZuRr6DEC0Kt7XpD1cnNW9gAv': 'g2ddlEBvRsqdgKaI4jUK9PrgfMexGZ23',
        'BAogww51jIMa2JnH1BcYpXM5F658RNAL': 'rYWDmm0KptlkGv4FGJFMdZmjs9RDE6XR',
        'BAokpg62VtMa2JnH1mHYpXM5F658RNAL': 'rYWryDnlNslkGv4FG4HMdZmjs9RDE62R',
        'a4z1Px5e2hzg0m0iMMCPeanGSkRKWXAg': 'LY9eorNQGUdGW9WuKKf5JGeEik7l1NYQ',
        'kAx69R58kF9nY5YcdecJdl2pFXP53WyX': 'gXyRxELpbfPvLeLSaRil0mp6UEzbZJ8L',
        'BAoY13nwViMa2J2uo2cY6BlETgmdwryL': 'rYWwKzJmNFlkGvGtNoUM9bzwIJVzB1YR',
    }
    _MCP_TO_ACCESS_KEY_TABLE = {
@ -189,19 +258,17 @@ class AnvatoIE(InfoExtractor):
        video_data_url += '&X-Anvato-Adst-Auth=' + base64.b64encode(auth_secret).decode('ascii')
        anvrid = md5_text(time.time() * 1000 * random.random())[:30]
-        payload = {
+        api = {
            'api': {
            'anvrid': anvrid,
                'anvstk': md5_text('%s|%s|%d|%s' % (
                    access_key, anvrid, server_time,
                    self._ANVACK_TABLE.get(access_key, self._API_KEY))),
            'anvts': server_time,
            },
        }
        api['anvstk'] = md5_text('%s|%s|%d|%s' % (
            access_key, anvrid, server_time,
            self._ANVACK_TABLE.get(access_key, self._API_KEY)))
        return self._download_json(
            video_data_url, video_id, transform_source=strip_jsonp,
-            data=json.dumps(payload).encode('utf-8'))
+            data=json.dumps({'api': api}).encode('utf-8'))
    def _get_anvato_videos(self, access_key, video_id):
        video_data = self._get_video_json(access_key, video_id)
@ -259,7 +326,7 @@ class AnvatoIE(InfoExtractor):
            'description': video_data.get('def_description'),
            'tags': video_data.get('def_tags', '').split(','),
            'categories': video_data.get('categories'),
-            'thumbnail': video_data.get('thumbnail'),
+            'thumbnail': video_data.get('src_image_url') or video_data.get('thumbnail'),
            'timestamp': int_or_none(video_data.get(
                'ts_published') or video_data.get('ts_added')),
            'uploader': video_data.get('mcp_id'),
--- a/haruhi_dl/extractor/aol.py
+++ b/haruhi_dl/extractor/aol.py
@ -3,7 +3,7 @@ from __future__ import unicode_literals
 import re
-from .common import InfoExtractor
+from .yahoo import YahooIE
 from ..compat import (
    compat_parse_qs,
    compat_urllib_parse_urlparse,
@ -15,9 +15,9 @@ from ..utils import (
 )
-class AolIE(InfoExtractor):
+class AolIE(YahooIE):
    IE_NAME = 'aol.com'
-    _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>[0-9a-f]+)'
+    _VALID_URL = r'(?:aol-video:|https?://(?:www\.)?aol\.(?:com|ca|co\.uk|de|jp)/video/(?:[^/]+/)*)(?P<id>\d{9}|[0-9a-f]{24}|[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12})'
    _TESTS = [{
        # video with 5min ID
@ -76,10 +76,16 @@ class AolIE(InfoExtractor):
    }, {
        'url': 'https://www.aol.jp/video/playlist/5a28e936a1334d000137da0c/5a28f3151e642219fde19831/',
        'only_matching': True,
    }, {
        # Yahoo video
        'url': 'https://www.aol.com/video/play/991e6700-ac02-11ea-99ff-357400036f61/24bbc846-3e30-3c46-915e-fe8ccd7fcc46/',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        if '-' in video_id:
            return self._extract_yahoo_video(video_id, 'us')
        response = self._download_json(
            'https://feedapi.b2c.on.aol.com/v1.0/app/videos/aolon/%s/details' % video_id,
--- a/haruhi_dl/extractor/apa.py
+++ b/haruhi_dl/extractor/apa.py
@ -6,25 +6,21 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
-    js_to_json,
+    int_or_none,
    url_or_none,
 )
 class APAIE(InfoExtractor):
-    _VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+    _VALID_URL = r'(?P<base_url>https?://[^/]+\.apa\.at)/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
    _TESTS = [{
        'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
        'md5': '2b12292faeb0a7d930c778c7a5b4759b',
        'info_dict': {
-            'id': 'jjv85FdZ',
+            'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
            'ext': 'mp4',
-            'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
+            'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 254,
            'timestamp': 1519211149,
            'upload_date': '20180221',
        },
    }, {
        'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
@ -46,9 +42,11 @@ class APAIE(InfoExtractor):
                webpage)]
    def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
        video_id, base_url = mobj.group('id', 'base_url')
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(
            '%s/player/%s' % (base_url, video_id), video_id)
        jwplatform_id = self._search_regex(
            r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
@ -59,16 +57,18 @@ class APAIE(InfoExtractor):
                'jwplatform:' + jwplatform_id, ie='JWPlatform',
                video_id=video_id)
-        sources = self._parse_json(
+        def extract(field, name=None):
-            self._search_regex(
+            return self._search_regex(
-                r'sources\s*=\s*(\[.+?\])\s*;', webpage, 'sources'),
+                r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field,
-            video_id, transform_source=js_to_json)
+                webpage, name or field, default=None, group='value')
        title = extract('title') or video_id
        description = extract('description')
        thumbnail = extract('poster', 'thumbnail')
        formats = []
-        for source in sources:
+        for format_id in ('hls', 'progressive'):
-            if not isinstance(source, dict):
+            source_url = url_or_none(extract(format_id))
                continue
            source_url = url_or_none(source.get('file'))
            if not source_url:
                continue
            ext = determine_ext(source_url)
@ -77,18 +77,19 @@ class APAIE(InfoExtractor):
                    source_url, video_id, 'mp4', entry_protocol='m3u8_native',
                    m3u8_id='hls', fatal=False))
            else:
                height = int_or_none(self._search_regex(
                    r'(\d+)\.mp4', source_url, 'height', default=None))
                formats.append({
                    'url': source_url,
                    'format_id': format_id,
                    'height': height,
                })
        self._sort_formats(formats)
        thumbnail = self._search_regex(
            r'image\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
            'thumbnail', fatal=False, group='url')
        return {
            'id': video_id,
-            'title': video_id,
+            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'formats': formats,
        }
--- a/haruhi_dl/extractor/aparat.py
+++ b/haruhi_dl/extractor/aparat.py
@ -3,6 +3,7 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    get_element_by_id,
    int_or_none,
    merge_dicts,
    mimetype2ext,
@ -39,23 +40,15 @@ class AparatIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id, fatal=False)
        if not webpage:
            # Note: There is an easier-to-parse configuration at
            # http://www.aparat.com/video/video/config/videohash/%video_id
            # but the URL in there does not work
            webpage = self._download_webpage(
                'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
                video_id)
-        options = self._parse_json(
+        options = self._parse_json(self._search_regex(
-            self._search_regex(
+            r'options\s*=\s*({.+?})\s*;', webpage, 'options'), video_id)
                r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
                webpage, 'options', group='value'),
            video_id)
        player = options['plugins']['sabaPlayerPlugin']
        formats = []
-        for sources in player['multiSRC']:
+        for sources in (options.get('multiSRC') or []):
            for item in sources:
                if not isinstance(item, dict):
                    continue
@ -85,11 +78,12 @@ class AparatIE(InfoExtractor):
        info = self._search_json_ld(webpage, video_id, default={})
        if not info.get('title'):
-            info['title'] = player['title']
+            info['title'] = get_element_by_id('videoTitle', webpage) or \
                self._html_search_meta(['og:title', 'twitter:title', 'DC.Title', 'title'], webpage, fatal=True)
        return merge_dicts(info, {
            'id': video_id,
            'thumbnail': url_or_none(options.get('poster')),
-            'duration': int_or_none(player.get('duration')),
+            'duration': int_or_none(options.get('duration')),
            'formats': formats,
        })
--- a/haruhi_dl/extractor/applepodcasts.py
+++ b/haruhi_dl/extractor/applepodcasts.py
@ -0,0 +1,61 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    clean_podcast_url,
    int_or_none,
    parse_iso8601,
    try_get,
 )
 class ApplePodcastsIE(InfoExtractor):
    _VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
        'md5': 'df02e6acb11c10e844946a39e7222b08',
        'info_dict': {
            'id': '1000482637777',
            'ext': 'mp3',
            'title': '207 - Whitney Webb Returns',
            'description': 'md5:13a73bade02d2e43737751e3987e1399',
            'upload_date': '20200705',
            'timestamp': 1593921600,
            'duration': 6425,
            'series': 'The Tim Dillon Show',
        }
    }, {
        'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
        'only_matching': True,
    }, {
        'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns?i=1000482637777',
        'only_matching': True,
    }, {
        'url': 'https://podcasts.apple.com/podcast/id1135137367?i=1000482637777',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        episode_id = self._match_id(url)
        webpage = self._download_webpage(url, episode_id)
        ember_data = self._parse_json(self._search_regex(
            r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
            webpage, 'ember data'), episode_id)
        episode = ember_data['data']['attributes']
        description = episode.get('description') or {}
        series = None
        for inc in (ember_data.get('included') or []):
            if inc.get('type') == 'media/podcast':
                series = try_get(inc, lambda x: x['attributes']['name'])
        return {
            'id': episode_id,
            'title': episode['name'],
            'url': clean_podcast_url(episode['assetUrl']),
            'description': description.get('standard') or description.get('short'),
            'timestamp': parse_iso8601(episode.get('releaseDateTime')),
            'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
            'series': series,
        }
--- a/haruhi_dl/extractor/archiveorg.py
+++ b/haruhi_dl/extractor/archiveorg.py
@ -2,15 +2,17 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    unified_strdate,
    clean_html,
    extract_attributes,
    unified_strdate,
    unified_timestamp,
 )
 class ArchiveOrgIE(InfoExtractor):
    IE_NAME = 'archive.org'
    IE_DESC = 'archive.org videos'
-    _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
+    _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
        'md5': '8af1d4cf447933ed3c7f4871162602db',
@ -19,8 +21,11 @@ class ArchiveOrgIE(InfoExtractor):
            'ext': 'ogg',
            'title': '1968 Demo - FJCC Conference Presentation Reel #1',
            'description': 'md5:da45c349df039f1cc8075268eb1b5c25',
-            'upload_date': '19681210',
+            'creator': 'SRI International',
-            'uploader': 'SRI International'
+            'release_date': '19681210',
            'uploader': 'SRI International',
            'timestamp': 1268695290,
            'upload_date': '20100315',
        }
    }, {
        'url': 'https://archive.org/details/Cops1922',
@ -29,22 +34,43 @@ class ArchiveOrgIE(InfoExtractor):
            'id': 'Cops1922',
            'ext': 'mp4',
            'title': 'Buster Keaton\'s "Cops" (1922)',
-            'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6',
+            'description': 'md5:43a603fd6c5b4b90d12a96b921212b9c',
            'timestamp': 1387699629,
            'upload_date': '20131222',
        }
    }, {
        'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
        'only_matching': True,
    }, {
        'url': 'https://archive.org/details/MSNBCW_20131125_040000_To_Catch_a_Predator/',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(
            'http://archive.org/embed/' + video_id, video_id)
-        jwplayer_playlist = self._parse_json(self._search_regex(
+
        playlist = None
        play8 = self._search_regex(
            r'(<[^>]+\bclass=["\']js-play8-playlist[^>]+>)', webpage,
            'playlist', default=None)
        if play8:
            attrs = extract_attributes(play8)
            playlist = attrs.get('value')
        if not playlist:
            # Old jwplayer fallback
            playlist = self._search_regex(
                r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
-            webpage, 'jwplayer playlist'), video_id)
+                webpage, 'jwplayer playlist', default='[]')
        jwplayer_playlist = self._parse_json(playlist, video_id, fatal=False)
        if jwplayer_playlist:
            info = self._parse_jwplayer_data(
                {'playlist': jwplayer_playlist}, video_id, base_url=url)
        else:
            # HTML5 media fallback
            info = self._parse_html5_media_entries(url, webpage, video_id)[0]
            info['id'] = video_id
        def get_optional(metadata, field):
            return metadata.get(field, [None])[0]
@ -58,8 +84,12 @@ class ArchiveOrgIE(InfoExtractor):
            'description': clean_html(get_optional(metadata, 'description')),
        })
        if info.get('_type') != 'playlist':
            creator = get_optional(metadata, 'creator')
            info.update({
-                'uploader': get_optional(metadata, 'creator'),
+                'creator': creator,
-                'upload_date': unified_strdate(get_optional(metadata, 'date')),
+                'release_date': unified_strdate(get_optional(metadata, 'date')),
                'uploader': get_optional(metadata, 'publisher') or creator,
                'timestamp': unified_timestamp(get_optional(metadata, 'publicdate')),
                'language': get_optional(metadata, 'language'),
            })
        return info
--- a/haruhi_dl/extractor/arcpublishing.py
+++ b/haruhi_dl/extractor/arcpublishing.py
@ -0,0 +1,174 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    extract_attributes,
    int_or_none,
    parse_iso8601,
    try_get,
 )
 class ArcPublishingIE(InfoExtractor):
    _UUID_REGEX = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
    _VALID_URL = r'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX
    _TESTS = [{
        # https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/
        'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
        'only_matching': True,
    }, {
        # https://www.bostonglobe.com/video/2020/12/30/metro/footage-released-showing-officer-talking-about-striking-protesters-with-car/
        'url': 'arcpublishing:bostonglobe:232b7ae6-7d73-432d-bc0a-85dbf0119ab1',
        'only_matching': True,
    }, {
        # https://www.actionnewsjax.com/video/live-stream/
        'url': 'arcpublishing:cmg:cfb1cf1b-3ab5-4d1b-86c5-a5515d311f2a',
        'only_matching': True,
    }, {
        # https://elcomercio.pe/videos/deportes/deporte-total-futbol-peruano-seleccion-peruana-la-valorizacion-de-los-peruanos-en-el-exterior-tras-un-2020-atipico-nnav-vr-video-noticia/
        'url': 'arcpublishing:elcomercio:27a7e1f8-2ec7-4177-874f-a4feed2885b3',
        'only_matching': True,
    }, {
        # https://www.clickondetroit.com/video/community/2020/05/15/events-surrounding-woodward-dream-cruise-being-canceled/
        'url': 'arcpublishing:gmg:c8793fb2-8d44-4242-881e-2db31da2d9fe',
        'only_matching': True,
    }, {
        # https://www.wabi.tv/video/2020/12/30/trenton-company-making-equipment-pfizer-covid-vaccine/
        'url': 'arcpublishing:gray:0b0ba30e-032a-4598-8810-901d70e6033e',
        'only_matching': True,
    }, {
        # https://www.lateja.cr/el-mundo/video-china-aprueba-con-condiciones-su-primera/dfcbfa57-527f-45ff-a69b-35fe71054143/video/
        'url': 'arcpublishing:gruponacion:dfcbfa57-527f-45ff-a69b-35fe71054143',
        'only_matching': True,
    }, {
        # https://www.fifthdomain.com/video/2018/03/09/is-america-vulnerable-to-a-cyber-attack/
        'url': 'arcpublishing:mco:aa0ca6fe-1127-46d4-b32c-be0d6fdb8055',
        'only_matching': True,
    }, {
        # https://www.vl.no/kultur/2020/12/09/en-melding-fra-en-lytter-endret-julelista-til-lewi-bergrud/
        'url': 'arcpublishing:mentormedier:47a12084-650b-4011-bfd0-3699b6947b2d',
        'only_matching': True,
    }, {
        # https://www.14news.com/2020/12/30/whiskey-theft-caught-camera-henderson-liquor-store/
        'url': 'arcpublishing:raycom:b89f61f8-79fa-4c09-8255-e64237119bf7',
        'only_matching': True,
    }, {
        # https://www.theglobeandmail.com/world/video-ethiopian-woman-who-became-symbol-of-integration-in-italy-killed-on/
        'url': 'arcpublishing:tgam:411b34c1-8701-4036-9831-26964711664b',
        'only_matching': True,
    }, {
        # https://www.pilotonline.com/460f2931-8130-4719-8ea1-ffcb2d7cb685-132.html
        'url': 'arcpublishing:tronc:460f2931-8130-4719-8ea1-ffcb2d7cb685',
        'only_matching': True,
    }]
    _POWA_DEFAULTS = [
        (['cmg', 'prisa'], '%s-config-prod.api.cdn.arcpublishing.com/video'),
        ([
            'adn', 'advancelocal', 'answers', 'bonnier', 'bostonglobe', 'demo',
            'gmg', 'gruponacion', 'infobae', 'mco', 'nzme', 'pmn', 'raycom',
            'spectator', 'tbt', 'tgam', 'tronc', 'wapo', 'wweek',
        ], 'video-api-cdn.%s.arcpublishing.com/api'),
    ]
    @staticmethod
    def _extract_urls(webpage, **kw):
        entries = []
        # https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview
        for powa_el in re.findall(r'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE._UUID_REGEX, webpage):
            powa = extract_attributes(powa_el) or {}
            org = powa.get('data-org')
            uuid = powa.get('data-uuid')
            if org and uuid:
                entries.append('arcpublishing:%s:%s' % (org, uuid))
        return entries
    def _real_extract(self, url):
        org, uuid = re.match(self._VALID_URL, url).groups()
        for orgs, tmpl in self._POWA_DEFAULTS:
            if org in orgs:
                base_api_tmpl = tmpl
                break
        else:
            base_api_tmpl = '%s-prod-cdn.video-api.arcpublishing.com/api'
        if org == 'wapo':
            org = 'washpost'
        video = self._download_json(
            'https://%s/v1/ansvideos/findByUuid' % (base_api_tmpl % org),
            uuid, query={'uuid': uuid})[0]
        title = video['headlines']['basic']
        is_live = video.get('status') == 'live'
        urls = []
        formats = []
        for s in video.get('streams', []):
            s_url = s.get('url')
            if not s_url or s_url in urls:
                continue
            urls.append(s_url)
            stream_type = s.get('stream_type')
            if stream_type == 'smil':
                smil_formats = self._extract_smil_formats(
                    s_url, uuid, fatal=False)
                for f in smil_formats:
                    if f['url'].endswith('/cfx/st'):
                        f['app'] = 'cfx/st'
                        if not f['play_path'].startswith('mp4:'):
                            f['play_path'] = 'mp4:' + f['play_path']
                        if isinstance(f['tbr'], float):
                            f['vbr'] = f['tbr'] * 1000
                            del f['tbr']
                            f['format_id'] = 'rtmp-%d' % f['vbr']
                formats.extend(smil_formats)
            elif stream_type in ('ts', 'hls'):
                m3u8_formats = self._extract_m3u8_formats(
                    s_url, uuid, 'mp4', 'm3u8' if is_live else 'm3u8_native',
                    m3u8_id='hls', fatal=False)
                if all([f.get('acodec') == 'none' for f in m3u8_formats]):
                    continue
                for f in m3u8_formats:
                    if f.get('acodec') == 'none':
                        f['preference'] = -40
                    elif f.get('vcodec') == 'none':
                        f['preference'] = -50
                    height = f.get('height')
                    if not height:
                        continue
                    vbr = self._search_regex(
                        r'[_x]%d[_-](\d+)' % height, f['url'], 'vbr', default=None)
                    if vbr:
                        f['vbr'] = int(vbr)
                formats.extend(m3u8_formats)
            else:
                vbr = int_or_none(s.get('bitrate'))
                formats.append({
                    'format_id': '%s-%d' % (stream_type, vbr) if vbr else stream_type,
                    'vbr': vbr,
                    'width': int_or_none(s.get('width')),
                    'height': int_or_none(s.get('height')),
                    'filesize': int_or_none(s.get('filesize')),
                    'url': s_url,
                    'preference': -1,
                })
        self._sort_formats(
            formats, ('preference', 'width', 'height', 'vbr', 'filesize', 'tbr', 'ext', 'format_id'))
        subtitles = {}
        for subtitle in (try_get(video, lambda x: x['subtitles']['urls'], list) or []):
            subtitle_url = subtitle.get('url')
            if subtitle_url:
                subtitles.setdefault('en', []).append({'url': subtitle_url})
        return {
            'id': uuid,
            'title': self._live_title(title) if is_live else title,
            'thumbnail': try_get(video, lambda x: x['promo_image']['url']),
            'description': try_get(video, lambda x: x['subheadlines']['basic']),
            'formats': formats,
            'duration': int_or_none(video.get('duration'), 100),
            'timestamp': parse_iso8601(video.get('created_date')),
            'subtitles': subtitles,
            'is_live': is_live,
        }
--- a/haruhi_dl/extractor/ard.py
+++ b/haruhi_dl/extractor/ard.py
@ -187,13 +187,13 @@ class ARDMediathekIE(ARDMediathekBaseIE):
            if doc.tag == 'rss':
                return GenericIE()._extract_rss(url, video_id, doc)
-        title = self._html_search_regex(
+        title = self._og_search_title(webpage, default=None) or self._html_search_regex(
            [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
             r'<meta name="dcterms\.title" content="(.*?)"/>',
             r'<h4 class="headline">(.*?)</h4>',
             r'<title[^>]*>(.*?)</title>'],
            webpage, 'title')
-        description = self._html_search_meta(
+        description = self._og_search_description(webpage, default=None) or self._html_search_meta(
            'dcterms.abstract', webpage, 'description', default=None)
        if description is None:
            description = self._html_search_meta(
@ -249,18 +249,18 @@ class ARDMediathekIE(ARDMediathekBaseIE):
 class ARDIE(InfoExtractor):
-    _VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
+    _VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?:video-?)?(?P<id>[0-9]+))\.html'
    _TESTS = [{
-        # available till 14.02.2019
+        # available till 7.01.2022
-        'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
+        'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
-        'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
+        'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
        'info_dict': {
-            'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
+            'display_id': 'maischberger-die-woche',
-            'id': '102',
+            'id': '100',
            'ext': 'mp4',
-            'duration': 4435.0,
+            'duration': 3687.0,
-            'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
+            'title': 'maischberger. die woche vom 7. Januar 2021',
-            'upload_date': '20180214',
+            'upload_date': '20210107',
            'thumbnail': r're:^https?://.*\.jpg$',
        },
    }, {
@ -284,20 +284,42 @@ class ARDIE(InfoExtractor):
        formats = []
        for a in video_node.findall('.//asset'):
            file_name = xpath_text(a, './fileName', default=None)
            if not file_name:
                continue
            format_type = a.attrib.get('type')
            format_url = url_or_none(file_name)
            if format_url:
                ext = determine_ext(file_name)
                if ext == 'm3u8':
                    formats.extend(self._extract_m3u8_formats(
                        format_url, display_id, 'mp4', entry_protocol='m3u8_native',
                        m3u8_id=format_type or 'hls', fatal=False))
                    continue
                elif ext == 'f4m':
                    formats.extend(self._extract_f4m_formats(
                        update_url_query(format_url, {'hdcore': '3.7.0'}),
                        display_id, f4m_id=format_type or 'hds', fatal=False))
                    continue
            f = {
-                'format_id': a.attrib['type'],
+                'format_id': format_type,
-                'width': int_or_none(a.find('./frameWidth').text),
+                'width': int_or_none(xpath_text(a, './frameWidth')),
-                'height': int_or_none(a.find('./frameHeight').text),
+                'height': int_or_none(xpath_text(a, './frameHeight')),
-                'vbr': int_or_none(a.find('./bitrateVideo').text),
+                'vbr': int_or_none(xpath_text(a, './bitrateVideo')),
-                'abr': int_or_none(a.find('./bitrateAudio').text),
+                'abr': int_or_none(xpath_text(a, './bitrateAudio')),
-                'vcodec': a.find('./codecVideo').text,
+                'vcodec': xpath_text(a, './codecVideo'),
-                'tbr': int_or_none(a.find('./totalBitrate').text),
+                'tbr': int_or_none(xpath_text(a, './totalBitrate')),
            }
-            if a.find('./serverPrefix').text:
+            server_prefix = xpath_text(a, './serverPrefix', default=None)
-                f['url'] = a.find('./serverPrefix').text
+            if server_prefix:
-                f['playpath'] = a.find('./fileName').text
+                f.update({
                    'url': server_prefix,
                    'playpath': file_name,
                })
            else:
-                f['url'] = a.find('./fileName').text
+                if not format_url:
                    continue
                f['url'] = format_url
            formats.append(f)
        self._sort_formats(formats)
@ -315,17 +337,17 @@ class ARDIE(InfoExtractor):
 class ARDBetaMediathekIE(ARDMediathekBaseIE):
    _VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
    _TESTS = [{
-        'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
+        'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
-        'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
+        'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
        'info_dict': {
            'display_id': 'die-robuste-roswita',
-            'id': '70153354',
+            'id': '78566716',
            'title': 'Die robuste Roswita',
-            'description': r're:^Der Mord.*trüber ist als die Ilm.',
+            'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita',
            'duration': 5316,
-            'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard',
+            'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard',
-            'timestamp': 1577047500,
+            'timestamp': 1596658200,
-            'upload_date': '20191222',
+            'upload_date': '20200805',
            'ext': 'mp4',
        },
    }, {
--- a/haruhi_dl/extractor/arte.py
+++ b/haruhi_dl/extractor/arte.py
@ -4,23 +4,57 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import (
    compat_str,
    compat_urlparse,
 )
 from ..utils import (
    ExtractorError,
    int_or_none,
    qualities,
    try_get,
    unified_strdate,
    url_or_none,
 )
 # There are different sources of video in arte.tv, the extraction process
 # is different for each one. The videos usually expire in 7 days, so we can't
 # add tests.
 class ArteTVBaseIE(InfoExtractor):
-    def _extract_from_json_url(self, json_url, video_id, lang, title=None):
+    _ARTE_LANGUAGES = 'fr|de|en|es|it|pl'
-        info = self._download_json(json_url, video_id)
+    _API_BASE = 'https://api.arte.tv/api/player/v1'
 class ArteTVIE(ArteTVBaseIE):
    _VALID_URL = r'''(?x)
                    https?://
                        (?:
                            (?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
                            api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
                        )
                        /(?P<id>\d{6}-\d{3}-[AF])
                    ''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
    _TESTS = [{
        'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
        'info_dict': {
            'id': '088501-000-A',
            'ext': 'mp4',
            'title': 'Mexico: Stealing Petrol to Survive',
            'upload_date': '20190628',
        },
    }, {
        'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
        'only_matching': True,
    }, {
        'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        lang = mobj.group('lang') or mobj.group('lang_2')
        info = self._download_json(
            '%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id)
        player_info = info['videoJsonPlayer']
        vsr = try_get(player_info, lambda x: x['VSR'], dict)
@ -37,18 +71,11 @@ class ArteTVBaseIE(InfoExtractor):
        if not upload_date_str:
            upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
-        title = (player_info.get('VTI') or title or player_info['VID']).strip()
+        title = (player_info.get('VTI') or player_info['VID']).strip()
        subtitle = player_info.get('VSU', '').strip()
        if subtitle:
            title += ' - %s' % subtitle
        info_dict = {
            'id': player_info['VID'],
            'title': title,
            'description': player_info.get('VDE'),
            'upload_date': unified_strdate(upload_date_str),
            'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
        }
        qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
        LANGS = {
@ -65,6 +92,10 @@ class ArteTVBaseIE(InfoExtractor):
        formats = []
        for format_id, format_dict in vsr.items():
            f = dict(format_dict)
            format_url = url_or_none(f.get('url'))
            streamer = f.get('streamer')
            if not format_url and not streamer:
                continue
            versionCode = f.get('versionCode')
            l = re.escape(langcode)
@ -107,6 +138,16 @@ class ArteTVBaseIE(InfoExtractor):
            else:
                lang_pref = -1
            media_type = f.get('mediaType')
            if media_type == 'hls':
                m3u8_formats = self._extract_m3u8_formats(
                    format_url, video_id, 'mp4', entry_protocol='m3u8_native',
                    m3u8_id=format_id, fatal=False)
                for m3u8_format in m3u8_formats:
                    m3u8_format['language_preference'] = lang_pref
                formats.extend(m3u8_formats)
                continue
            format = {
                'format_id': format_id,
                'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
@ -118,7 +159,7 @@ class ArteTVBaseIE(InfoExtractor):
                'quality': qfunc(f.get('quality')),
            }
-            if f.get('mediaType') == 'rtmp':
+            if media_type == 'rtmp':
                format['url'] = f['streamer']
                format['play_path'] = 'mp4:' + f['url']
                format['ext'] = 'flv'
@ -127,56 +168,50 @@ class ArteTVBaseIE(InfoExtractor):
            formats.append(format)
        self._check_formats(formats, video_id)
        self._sort_formats(formats)
-        info_dict['formats'] = formats
+        return {
-        return info_dict
+            'id': player_info.get('VID') or video_id,
            'title': title,
            'description': player_info.get('VDE'),
            'upload_date': unified_strdate(upload_date_str),
            'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
            'formats': formats,
        }
-class ArteTVPlus7IE(ArteTVBaseIE):
+class ArteTVEmbedIE(InfoExtractor):
-    IE_NAME = 'arte.tv:+7'
+    _VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
    _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>\d{6}-\d{3}-[AF])'
    _TESTS = [{
-        'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
+        'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
        'info_dict': {
-            'id': '088501-000-A',
+            'id': '100605-013-A',
            'ext': 'mp4',
-            'title': 'Mexico: Stealing Petrol to Survive',
+            'title': 'United we Stream November Lockdown Edition #13',
-            'upload_date': '20190628',
+            'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
            'upload_date': '20201116',
        },
    }, {
        'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
        'only_matching': True,
    }]
-    def _real_extract(self, url):
+    @staticmethod
-        lang, video_id = re.match(self._VALID_URL, url).groups()
+    def _extract_urls(webpage):
-        return self._extract_from_json_url(
+        return [url for _, url in re.findall(
-            'https://api.arte.tv/api/player/v1/config/%s/%s' % (lang, video_id),
+            r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1',
-            video_id, lang)
+            webpage)]
 class ArteTVEmbedIE(ArteTVPlus7IE):
    IE_NAME = 'arte.tv:embed'
    _VALID_URL = r'''(?x)
        https://www\.arte\.tv
        /player/v3/index\.php\?json_url=
        (?P<json_url>
            https?://api\.arte\.tv/api/player/v1/config/
            (?P<lang>[^/]+)/(?P<id>\d{6}-\d{3}-[AF])
        )
    '''
    _TESTS = []
    def _real_extract(self, url):
-        json_url, lang, video_id = re.match(self._VALID_URL, url).groups()
+        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
-        return self._extract_from_json_url(json_url, video_id, lang)
+        json_url = qs['json_url'][0]
        video_id = ArteTVIE._match_id(json_url)
        return self.url_result(
            json_url, ie=ArteTVIE.ie_key(), video_id=video_id)
 class ArteTVPlaylistIE(ArteTVBaseIE):
-    IE_NAME = 'arte.tv:playlist'
+    _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
    _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>fr|de|en|es|it|pl)/videos/(?P<id>RC-\d{6})'
    _TESTS = [{
        'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
        'info_dict': {
@ -185,17 +220,35 @@ class ArteTVPlaylistIE(ArteTVBaseIE):
            'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
        },
        'playlist_mincount': 6,
    }, {
        'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        lang, playlist_id = re.match(self._VALID_URL, url).groups()
        collection = self._download_json(
-            'https://api.arte.tv/api/player/v1/collectionData/%s/%s?source=videos'
+            '%s/collectionData/%s/%s?source=videos'
-            % (lang, playlist_id), playlist_id)
+            % (self._API_BASE, lang, playlist_id), playlist_id)
        entries = []
        for video in collection['videos']:
            if not isinstance(video, dict):
                continue
            video_url = url_or_none(video.get('url')) or url_or_none(video.get('jsonUrl'))
            if not video_url:
                continue
            video_id = video.get('programId')
            entries.append({
                '_type': 'url_transparent',
                'url': video_url,
                'id': video_id,
                'title': video.get('title'),
                'alt_title': video.get('subtitle'),
                'thumbnail': url_or_none(try_get(video, lambda x: x['mainImage']['url'], compat_str)),
                'duration': int_or_none(video.get('durationSeconds')),
                'view_count': int_or_none(video.get('views')),
                'ie_key': ArteTVIE.ie_key(),
            })
        title = collection.get('title')
        description = collection.get('shortDescription') or collection.get('teaserText')
        entries = [
            self._extract_from_json_url(
                video['jsonUrl'], video.get('programId') or playlist_id, lang)
            for video in collection['videos'] if video.get('jsonUrl')]
        return self.playlist_result(entries, playlist_id, title, description)
--- a/haruhi_dl/extractor/asiancrush.py
+++ b/haruhi_dl/extractor/asiancrush.py
@ -1,27 +1,91 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import functools
 import re
 from .common import InfoExtractor
 from .kaltura import KalturaIE
-from ..utils import extract_attributes
+from ..utils import (
    extract_attributes,
    int_or_none,
    OnDemandPagedList,
    parse_age_limit,
    strip_or_none,
    try_get,
 )
-class AsianCrushIE(InfoExtractor):
+class AsianCrushBaseIE(InfoExtractor):
-    _VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|cocoro\.tv))'
+    _VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))'
-    _VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % _VALID_URL_BASE
+    _KALTURA_KEYS = [
        'video_url', 'progressive_url', 'download_url', 'thumbnail_url',
        'widescreen_thumbnail_url', 'screencap_widescreen',
    ]
    _API_SUFFIX = {'retrocrush.tv': '-ott'}
    def _call_api(self, host, endpoint, video_id, query, resource):
        return self._download_json(
            'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id,
            'Downloading %s JSON metadata' % resource, query=query,
            headers=self.geo_verification_headers())['objects']
    def _download_object_data(self, host, object_id, resource):
        return self._call_api(
            host, 'search', object_id, {'id': object_id}, resource)[0]
    def _get_object_description(self, obj):
        return strip_or_none(obj.get('long_description') or obj.get('short_description'))
    def _parse_video_data(self, video):
        title = video['name']
        entry_id, partner_id = [None] * 2
        for k in self._KALTURA_KEYS:
            k_url = video.get(k)
            if k_url:
                mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url)
                if mobj:
                    partner_id, entry_id = mobj.groups()
                    break
        meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or []
        categories = list(filter(None, [c.get('name') for c in meta_categories]))
        show_info = video.get('show_info') or {}
        return {
            '_type': 'url_transparent',
            'url': 'kaltura:%s:%s' % (partner_id, entry_id),
            'ie_key': KalturaIE.ie_key(),
            'id': entry_id,
            'title': title,
            'description': self._get_object_description(video),
            'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')),
            'categories': categories,
            'series': show_info.get('show_name'),
            'season_number': int_or_none(show_info.get('season_num')),
            'season_id': show_info.get('season_id'),
            'episode_number': int_or_none(show_info.get('episode_num')),
        }
 class AsianCrushIE(AsianCrushBaseIE):
    _VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE
    _TESTS = [{
-        'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
+        'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt',
        'md5': 'c3b740e48d0ba002a42c0b72857beae6',
        'info_dict': {
            'id': '1_y4tmjm5r',
            'ext': 'mp4',
            'title': 'Women Who Flirt',
-            'description': 'md5:7e986615808bcfb11756eb503a751487',
+            'description': 'md5:b65c7e0ae03a85585476a62a186f924c',
            'timestamp': 1496936429,
            'upload_date': '20170608',
            'uploader_id': 'craig@crifkin.com',
            'age_limit': 13,
            'categories': 'count:5',
            'duration': 5812,
        },
    }, {
        'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
@ -41,67 +105,35 @@ class AsianCrushIE(InfoExtractor):
    }, {
        'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
        'only_matching': True,
    }, {
        'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears',
        'only_matching': True,
    }]
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        host, video_id = re.match(self._VALID_URL, url).groups()
        host = mobj.group('host')
        video_id = mobj.group('id')
        if host == 'cocoro.tv':
            webpage = self._download_webpage(url, video_id)
-
+            embed_vars = self._parse_json(self._search_regex(
        entry_id, partner_id, title = [None] * 3
        vars = self._parse_json(
            self._search_regex(
                r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
-                default='{}'), video_id, fatal=False)
+                default='{}'), video_id, fatal=False) or {}
-        if vars:
+            video_id = embed_vars.get('entry_id') or video_id
            entry_id = vars.get('entry_id')
            partner_id = vars.get('partner_id')
            title = vars.get('vid_label')
-        if not entry_id:
+        video = self._download_object_data(host, video_id, 'video')
-            entry_id = self._search_regex(
+        return self._parse_video_data(video)
                r'\bentry_id["\']\s*:\s*["\'](\d+)', webpage, 'entry id')
        player = self._download_webpage(
            'https://api.%s/embeddedVideoPlayer' % host, video_id,
            query={'id': entry_id})
        kaltura_id = self._search_regex(
            r'entry_id["\']\s*:\s*(["\'])(?P<id>(?:(?!\1).)+)\1', player,
            'kaltura id', group='id')
        if not partner_id:
            partner_id = self._search_regex(
                r'/p(?:artner_id)?/(\d+)', player, 'partner id',
                default='513551')
        description = self._html_search_regex(
            r'(?s)<div[^>]+\bclass=["\']description["\'][^>]*>(.+?)</div>',
            webpage, 'description', fatal=False)
        return {
            '_type': 'url_transparent',
            'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
            'ie_key': KalturaIE.ie_key(),
            'id': video_id,
            'title': title,
            'description': description,
        }
-class AsianCrushPlaylistIE(InfoExtractor):
+class AsianCrushPlaylistIE(AsianCrushBaseIE):
-    _VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushIE._VALID_URL_BASE
+    _VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE
    _TESTS = [{
-        'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
+        'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai',
        'info_dict': {
-            'id': '12481',
+            'id': '6447',
-            'title': 'Scholar Who Walks the Night',
+            'title': 'Fruity Samurai',
-            'description': 'md5:7addd7c5132a09fd4741152d96cce886',
+            'description': 'md5:7535174487e4a202d3872a7fc8f2f154',
        },
-        'playlist_count': 20,
+        'playlist_count': 13,
    }, {
        'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
        'only_matching': True,
@ -111,11 +143,27 @@ class AsianCrushPlaylistIE(InfoExtractor):
    }, {
        'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
        'only_matching': True,
    }, {
        'url': 'https://www.retrocrush.tv/series/012355s/true-tears',
        'only_matching': True,
    }]
    _PAGE_SIZE = 1000000000
    def _fetch_page(self, domain, parent_id, page):
        videos = self._call_api(
            domain, 'getreferencedobjects', parent_id, {
                'max': self._PAGE_SIZE,
                'object_type': 'video',
                'parent_id': parent_id,
                'start': page * self._PAGE_SIZE,
            }, 'page %d' % (page + 1))
        for video in videos:
            yield self._parse_video_data(video)
    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
+        host, playlist_id = re.match(self._VALID_URL, url).groups()
        if host == 'cocoro.tv':
            webpage = self._download_webpage(url, playlist_id)
            entries = []
@ -141,5 +189,12 @@ class AsianCrushPlaylistIE(InfoExtractor):
            description = self._og_search_description(
                webpage, default=None) or self._html_search_meta(
                'twitter:description', webpage, 'description', fatal=False)
        else:
            show = self._download_object_data(host, playlist_id, 'show')
            title = show.get('name')
            description = self._get_object_description(show)
            entries = OnDemandPagedList(
                functools.partial(self._fetch_page, host, playlist_id),
                self._PAGE_SIZE)
        return self.playlist_result(entries, playlist_id, title, description)
--- a/haruhi_dl/extractor/awaan.py
+++ b/haruhi_dl/extractor/awaan.py
@ -48,6 +48,7 @@ class AWAANBaseIE(InfoExtractor):
            'duration': int_or_none(video_data.get('duration')),
            'timestamp': parse_iso8601(video_data.get('create_time'), ' '),
            'is_live': is_live,
            'uploader_id': video_data.get('user_id'),
        }
@ -107,6 +108,7 @@ class AWAANLiveIE(AWAANBaseIE):
            'title': 're:Dubai Al Oula [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
            'upload_date': '20150107',
            'timestamp': 1420588800,
            'uploader_id': '71',
        },
        'params': {
            # m3u8 download
--- a/haruhi_dl/extractor/azmedien.py
+++ b/haruhi_dl/extractor/azmedien.py
@ -47,7 +47,7 @@ class AZMedienIE(InfoExtractor):
        'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
        'only_matching': True
    }]
-    _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/cb9f2f81ed22e9b47f4ca64ea3cc5a5d13e88d1d'
+    _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/a4016f65fe62b81dc6664dd9f4910e4ab40383be'
    _PARTNER_ID = '1719221'
    def _real_extract(self, url):
--- a/haruhi_dl/extractor/bandcamp.py
+++ b/haruhi_dl/extractor/bandcamp.py
@ -1,3 +1,4 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import random
@ -5,10 +6,7 @@ import re
 import time
 from .common import InfoExtractor
-from ..compat import (
+from ..compat import compat_str
    compat_str,
    compat_urlparse,
 )
 from ..utils import (
    ExtractorError,
    float_or_none,
@ -17,30 +15,32 @@ from ..utils import (
    parse_filesize,
    str_or_none,
    try_get,
    unescapeHTML,
    update_url_query,
    unified_strdate,
    unified_timestamp,
    url_or_none,
    urljoin,
 )
 class BandcampIE(InfoExtractor):
-    _VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
+    _VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'http://haruhi-dl.bandcamp.com/track/haruhi-dl-test-song',
        'md5': 'c557841d5e50261777a6585648adf439',
        'info_dict': {
            'id': '1812978515',
            'ext': 'mp3',
-            'title': "haruhi-dl  \"'/\\\u00e4\u21ad - haruhi-dl test song \"'/\\\u00e4\u21ad",
+            'title': "haruhi-dl  \"'/\\ä↭ - haruhi-dl  \"'/\\ä↭ - haruhi-dl test song \"'/\\ä↭",
            'duration': 9.8485,
            'uploader': 'haruhi-dl  "\'/\\ä↭',
            'upload_date': '20121129',
            'timestamp': 1354224127,
        },
        '_skip': 'There is a limit of 200 free downloads / month for the test song'
    }, {
        # free download
        'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
        'md5': '853e35bf34aa1d6fe2615ae612564b36',
        'info_dict': {
            'id': '2650410135',
            'ext': 'aiff',
@ -79,11 +79,16 @@ class BandcampIE(InfoExtractor):
        },
    }]
    def _extract_data_attr(self, webpage, video_id, attr='tralbum', fatal=True):
        return self._parse_json(self._html_search_regex(
            r'data-%s=(["\'])({.+?})\1' % attr, webpage,
            attr + ' data', group=2), video_id, fatal=fatal)
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        title = self._match_id(url)
        title = mobj.group('title')
        webpage = self._download_webpage(url, title)
-        thumbnail = self._html_search_meta('og:image', webpage, default=None)
+        tralbum = self._extract_data_attr(webpage, title)
        thumbnail = self._og_search_thumbnail(webpage)
        track_id = None
        track = None
@ -91,10 +96,7 @@ class BandcampIE(InfoExtractor):
        duration = None
        formats = []
-        track_info = self._parse_json(
+        track_info = try_get(tralbum, lambda x: x['trackinfo'][0], dict)
            self._search_regex(
                r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n',
                webpage, 'track info', default='{}'), title)
        if track_info:
            file_ = track_info.get('file')
            if isinstance(file_, dict):
@ -111,37 +113,25 @@ class BandcampIE(InfoExtractor):
                        'abr': int_or_none(abr_str),
                    })
            track = track_info.get('title')
-            track_id = str_or_none(track_info.get('track_id') or track_info.get('id'))
+            track_id = str_or_none(
                track_info.get('track_id') or track_info.get('id'))
            track_number = int_or_none(track_info.get('track_num'))
            duration = float_or_none(track_info.get('duration'))
-        def extract(key):
+        embed = self._extract_data_attr(webpage, title, 'embed', False)
-            return self._search_regex(
+        current = tralbum.get('current') or {}
-                r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key,
+        artist = embed.get('artist') or current.get('artist') or tralbum.get('artist')
                webpage, key, default=None, group='value')
        artist = extract('artist')
        album = extract('album_title')
        timestamp = unified_timestamp(
-            extract('publish_date') or extract('album_publish_date'))
+            current.get('publish_date') or tralbum.get('album_publish_date'))
        release_date = unified_strdate(extract('album_release_date'))
-        download_link = self._search_regex(
+        download_link = tralbum.get('freeDownloadPage')
            r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
            'download link', default=None, group='url')
        if download_link:
-            track_id = self._search_regex(
+            track_id = compat_str(tralbum['id'])
                r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
                webpage, 'track id')
            download_webpage = self._download_webpage(
                download_link, track_id, 'Downloading free downloads page')
-            blob = self._parse_json(
+            blob = self._extract_data_attr(download_webpage, track_id, 'blob')
                self._search_regex(
                    r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
                    'blob', group='blob'),
                track_id, transform_source=unescapeHTML)
            info = try_get(
                blob, (lambda x: x['digital_items'][0],
@ -207,20 +197,20 @@ class BandcampIE(InfoExtractor):
            'thumbnail': thumbnail,
            'uploader': artist,
            'timestamp': timestamp,
-            'release_date': release_date,
+            'release_date': unified_strdate(tralbum.get('album_release_date')),
            'duration': duration,
            'track': track,
            'track_number': track_number,
            'track_id': track_id,
            'artist': artist,
-            'album': album,
+            'album': embed.get('album_title'),
            'formats': formats,
        }
-class BandcampAlbumIE(InfoExtractor):
+class BandcampAlbumIE(BandcampIE):
    IE_NAME = 'Bandcamp:album'
-    _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
+    _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<id>[^/?#&]+))?'
    _TESTS = [{
        'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@ -230,7 +220,10 @@ class BandcampAlbumIE(InfoExtractor):
                'info_dict': {
                    'id': '1353101989',
                    'ext': 'mp3',
-                    'title': 'Intro',
+                    'title': 'Blazo - Intro',
                    'timestamp': 1311756226,
                    'upload_date': '20110727',
                    'uploader': 'Blazo',
                }
            },
            {
@ -238,7 +231,10 @@ class BandcampAlbumIE(InfoExtractor):
                'info_dict': {
                    'id': '38097443',
                    'ext': 'mp3',
-                    'title': 'Kero One - Keep It Alive (Blazo remix)',
+                    'title': 'Blazo - Kero One - Keep It Alive (Blazo remix)',
                    'timestamp': 1311757238,
                    'upload_date': '20110727',
                    'uploader': 'Blazo',
                }
            },
        ],
@ -274,6 +270,7 @@ class BandcampAlbumIE(InfoExtractor):
            'title': '"Entropy" EP',
            'uploader_id': 'jstrecords',
            'id': 'entropy-ep',
            'description': 'md5:0ff22959c943622972596062f2f366a5',
        },
        'playlist_mincount': 3,
    }, {
@ -283,6 +280,7 @@ class BandcampAlbumIE(InfoExtractor):
            'id': 'we-are-the-plague',
            'title': 'WE ARE THE PLAGUE',
            'uploader_id': 'insulters',
            'description': 'md5:b3cf845ee41b2b1141dc7bde9237255f',
        },
        'playlist_count': 2,
    }]
@ -294,41 +292,34 @@ class BandcampAlbumIE(InfoExtractor):
                else super(BandcampAlbumIE, cls).suitable(url))
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        uploader_id, album_id = re.match(self._VALID_URL, url).groups()
        uploader_id = mobj.group('subdomain')
        album_id = mobj.group('album_id')
        playlist_id = album_id or uploader_id
        webpage = self._download_webpage(url, playlist_id)
-        track_elements = re.findall(
+        tralbum = self._extract_data_attr(webpage, playlist_id)
-            r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
+        track_info = tralbum.get('trackinfo')
-        if not track_elements:
+        if not track_info:
            raise ExtractorError('The page doesn\'t contain any tracks')
        # Only tracks with duration info have songs
        entries = [
            self.url_result(
-                compat_urlparse.urljoin(url, t_path),
+                urljoin(url, t['title_link']), BandcampIE.ie_key(),
-                ie=BandcampIE.ie_key(),
+                str_or_none(t.get('track_id') or t.get('id')), t.get('title'))
-                video_title=self._search_regex(
+            for t in track_info
-                    r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
+            if t.get('duration')]
-                    elem_content, 'track title', fatal=False))
+
-            for elem_content, t_path in track_elements
+        current = tralbum.get('current') or {}
            if self._html_search_meta('duration', elem_content, default=None)]
        title = self._html_search_regex(
            r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
            webpage, 'title', fatal=False)
        if title:
            title = title.replace(r'\"', '"')
        return {
            '_type': 'playlist',
            'uploader_id': uploader_id,
            'id': playlist_id,
-            'title': title,
+            'title': current.get('title'),
            'description': current.get('about'),
            'entries': entries,
        }
-class BandcampWeeklyIE(InfoExtractor):
+class BandcampWeeklyIE(BandcampIE):
    IE_NAME = 'Bandcamp:weekly'
    _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
    _TESTS = [{
@ -343,29 +334,23 @@ class BandcampWeeklyIE(InfoExtractor):
            'release_date': '20170404',
            'series': 'Bandcamp Weekly',
            'episode': 'Magic Moments',
            'episode_number': 208,
            'episode_id': '224',
-        }
+        },
        'params': {
            'format': 'opus-lo',
        },
    }, {
        'url': 'https://bandcamp.com/?blah/blah@&show=228',
        'only_matching': True
    }]
    def _real_extract(self, url):
-        video_id = self._match_id(url)
+        show_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(url, show_id)
-        blob = self._parse_json(
+        blob = self._extract_data_attr(webpage, show_id, 'blob')
            self._search_regex(
                r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
                'blob', group='blob'),
            video_id, transform_source=unescapeHTML)
-        show = blob['bcw_show']
+        show = blob['bcw_data'][show_id]
        # This is desired because any invalid show id redirects to `bandcamp.com`
        # which happens to expose the latest Bandcamp Weekly episode.
        show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
        formats = []
        for format_id, format_url in show['audio_stream'].items():
@ -390,20 +375,8 @@ class BandcampWeeklyIE(InfoExtractor):
        if subtitle:
            title += ' - %s' % subtitle
        episode_number = None
        seq = blob.get('bcw_seq')
        if seq and isinstance(seq, list):
            try:
                episode_number = next(
                    int_or_none(e.get('episode_number'))
                    for e in seq
                    if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
            except StopIteration:
                pass
        return {
-            'id': video_id,
+            'id': show_id,
            'title': title,
            'description': show.get('desc') or show.get('short_desc'),
            'duration': float_or_none(show.get('audio_duration')),
@ -411,7 +384,6 @@ class BandcampWeeklyIE(InfoExtractor):
            'release_date': unified_strdate(show.get('published_date')),
            'series': 'Bandcamp Weekly',
            'episode': show.get('subtitle'),
-            'episode_number': episode_number,
+            'episode_id': show_id,
            'episode_id': compat_str(video_id),
            'formats': formats
        }
--- a/haruhi_dl/extractor/bbc.py
+++ b/haruhi_dl/extractor/bbc.py
@ -49,22 +49,17 @@ class BBCCoUkIE(InfoExtractor):
    _LOGIN_URL = 'https://account.bbc.com/signin'
    _NETRC_MACHINE = 'bbc'
-    _MEDIASELECTOR_URLS = [
+    _MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
    _MEDIA_SETS = [
        # Provides HQ HLS streams with even better quality that pc mediaset but fails
        # with geolocation in some cases when it's even not geo restricted at all (e.g.
        # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
-        'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
+        'iptv-all',
-        'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
+        'pc',
    ]
    _MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
    _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
    _NAMESPACES = (
        _MEDIASELECTION_NS,
        _EMP_PLAYLIST_NS,
    )
    _TESTS = [
        {
            'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
@ -261,8 +256,6 @@ class BBCCoUkIE(InfoExtractor):
            'only_matching': True,
        }]
    _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
    def _login(self):
        username, password = self._get_login_info()
        if username is None:
@ -307,22 +300,14 @@ class BBCCoUkIE(InfoExtractor):
    def _extract_items(self, playlist):
        return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
    def _findall_ns(self, element, xpath):
        elements = []
        for ns in self._NAMESPACES:
            elements.extend(element.findall(xpath % ns))
        return elements
    def _extract_medias(self, media_selection):
-        error = media_selection.find('./{%s}error' % self._MEDIASELECTION_NS)
+        error = media_selection.get('result')
-        if error is None:
+        if error:
-            media_selection.find('./{%s}error' % self._EMP_PLAYLIST_NS)
+            raise BBCCoUkIE.MediaSelectionError(error)
-        if error is not None:
+        return media_selection.get('media') or []
            raise BBCCoUkIE.MediaSelectionError(error.get('id'))
        return self._findall_ns(media_selection, './{%s}media')
    def _extract_connections(self, media):
-        return self._findall_ns(media, './{%s}connection')
+        return media.get('connection') or []
    def _get_subtitles(self, media, programme_id):
        subtitles = {}
@ -334,13 +319,13 @@ class BBCCoUkIE(InfoExtractor):
                cc_url, programme_id, 'Downloading captions', fatal=False)
            if not isinstance(captions, compat_etree_Element):
                continue
-            lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
+            subtitles['en'] = [
            subtitles[lang] = [
                {
                    'url': connection.get('href'),
                    'ext': 'ttml',
                },
            ]
            break
        return subtitles
    def _raise_extractor_error(self, media_selection_error):
@ -350,10 +335,10 @@ class BBCCoUkIE(InfoExtractor):
    def _download_media_selector(self, programme_id):
        last_exception = None
-        for mediaselector_url in self._MEDIASELECTOR_URLS:
+        for media_set in self._MEDIA_SETS:
            try:
                return self._download_media_selector_url(
-                    mediaselector_url % programme_id, programme_id)
+                    self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
            except BBCCoUkIE.MediaSelectionError as e:
                if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
                    last_exception = e
@ -362,8 +347,8 @@ class BBCCoUkIE(InfoExtractor):
        self._raise_extractor_error(last_exception)
    def _download_media_selector_url(self, url, programme_id=None):
-        media_selection = self._download_xml(
+        media_selection = self._download_json(
-            url, programme_id, 'Downloading media selection XML',
+            url, programme_id, 'Downloading media selection JSON',
            expected_status=(403, 404))
        return self._process_media_selector(media_selection, programme_id)
@ -377,7 +362,6 @@ class BBCCoUkIE(InfoExtractor):
            if kind in ('video', 'audio'):
                bitrate = int_or_none(media.get('bitrate'))
                encoding = media.get('encoding')
                service = media.get('service')
                width = int_or_none(media.get('width'))
                height = int_or_none(media.get('height'))
                file_size = int_or_none(media.get('media_file_size'))
@ -392,8 +376,6 @@ class BBCCoUkIE(InfoExtractor):
                    supplier = connection.get('supplier')
                    transfer_format = connection.get('transferFormat')
                    format_id = supplier or conn_kind or protocol
                    if service:
                        format_id = '%s_%s' % (service, format_id)
                    # ASX playlist
                    if supplier == 'asx':
                        for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
@ -408,20 +390,11 @@ class BBCCoUkIE(InfoExtractor):
                        formats.extend(self._extract_m3u8_formats(
                            href, programme_id, ext='mp4', entry_protocol='m3u8_native',
                            m3u8_id=format_id, fatal=False))
                        if re.search(self._USP_RE, href):
                            usp_formats = self._extract_m3u8_formats(
                                re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
                                programme_id, ext='mp4', entry_protocol='m3u8_native',
                                m3u8_id=format_id, fatal=False)
                            for f in usp_formats:
                                if f.get('height') and f['height'] > 720:
                                    continue
                                formats.append(f)
                    elif transfer_format == 'hds':
                        formats.extend(self._extract_f4m_formats(
                            href, programme_id, f4m_id=format_id, fatal=False))
                    else:
-                        if not service and not supplier and bitrate:
+                        if not supplier and bitrate:
                            format_id += '-%d' % bitrate
                        fmt = {
                            'format_id': format_id,
@ -554,7 +527,7 @@ class BBCCoUkIE(InfoExtractor):
        webpage = self._download_webpage(url, group_id, 'Downloading video page')
        error = self._search_regex(
-            r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
+            r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
            webpage, 'error', default=None)
        if error:
            raise ExtractorError(error, expected=True)
@ -607,16 +580,9 @@ class BBCIE(BBCCoUkIE):
    IE_DESC = 'BBC'
    _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
-    _MEDIASELECTOR_URLS = [
+    _MEDIA_SETS = [
-        # Provides HQ HLS streams but fails with geolocation in some cases when it's
+        'mobile-tablet-main',
-        # even not geo restricted at all
+        'pc',
        'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
        # Provides more formats, namely direct mp4 links, but fails on some videos with
        # notukerror for non UK (?) users (e.g.
        # http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
        'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s',
        # Provides fewer formats, but works everywhere for everybody (hopefully)
        'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s',
    ]
    _TESTS = [{
@ -981,7 +947,7 @@ class BBCIE(BBCCoUkIE):
        group_id = self._search_regex(
            r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
            webpage, 'group id', default=None)
-        if playlist_id:
+        if group_id:
            return self.url_result(
                'https://www.bbc.co.uk/programmes/%s' % group_id,
                ie=BBCCoUkIE.ie_key())
@ -1092,10 +1058,26 @@ class BBCIE(BBCCoUkIE):
            self._search_regex(
                r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
                'bbcthree config', default='{}'),
-            playlist_id, transform_source=js_to_json, fatal=False)
+            playlist_id, transform_source=js_to_json, fatal=False) or {}
-        if bbc3_config:
+        payload = bbc3_config.get('payload') or {}
        if payload:
            clip = payload.get('currentClip') or {}
            clip_vpid = clip.get('vpid')
            clip_title = clip.get('title')
            if clip_vpid and clip_title:
                formats, subtitles = self._download_media_selector(clip_vpid)
                self._sort_formats(formats)
                return {
                    'id': clip_vpid,
                    'title': clip_title,
                    'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
                    'description': clip.get('description'),
                    'duration': parse_duration(clip.get('duration')),
                    'formats': formats,
                    'subtitles': subtitles,
                }
            bbc3_playlist = try_get(
-                bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'],
+                payload, lambda x: x['content']['bbcMedia']['playlist'],
                dict)
            if bbc3_playlist:
                playlist_title = bbc3_playlist.get('title') or playlist_title
@ -1118,6 +1100,39 @@ class BBCIE(BBCCoUkIE):
                return self.playlist_result(
                    entries, playlist_id, playlist_title, playlist_description)
        initial_data = self._parse_json(self._search_regex(
            r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
            'preload state', default='{}'), playlist_id, fatal=False)
        if initial_data:
            def parse_media(media):
                if not media:
                    return
                for item in (try_get(media, lambda x: x['media']['items'], list) or []):
                    item_id = item.get('id')
                    item_title = item.get('title')
                    if not (item_id and item_title):
                        continue
                    formats, subtitles = self._download_media_selector(item_id)
                    self._sort_formats(formats)
                    entries.append({
                        'id': item_id,
                        'title': item_title,
                        'thumbnail': item.get('holdingImageUrl'),
                        'formats': formats,
                        'subtitles': subtitles,
                    })
            for resp in (initial_data.get('data') or {}).values():
                name = resp.get('name')
                if name == 'media-experience':
                    parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
                elif name == 'article':
                    for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
                        if block.get('type') != 'media':
                            continue
                        parse_media(block.get('model'))
            return self.playlist_result(
                entries, playlist_id, playlist_title, playlist_description)
        def extract_all(pattern):
            return list(filter(None, map(
                lambda s: self._parse_json(s, playlist_id, fatal=False),
--- a/haruhi_dl/extractor/beampro.py
+++ b/haruhi_dl/extractor/beampro.py
@ -1,194 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    clean_html,
    compat_str,
    float_or_none,
    int_or_none,
    parse_iso8601,
    try_get,
    urljoin,
 )
 class BeamProBaseIE(InfoExtractor):
    _API_BASE = 'https://mixer.com/api/v1'
    _RATINGS = {'family': 0, 'teen': 13, '18+': 18}
    def _extract_channel_info(self, chan):
        user_id = chan.get('userId') or try_get(chan, lambda x: x['user']['id'])
        return {
            'uploader': chan.get('token') or try_get(
                chan, lambda x: x['user']['username'], compat_str),
            'uploader_id': compat_str(user_id) if user_id else None,
            'age_limit': self._RATINGS.get(chan.get('audience')),
        }
 class BeamProLiveIE(BeamProBaseIE):
    IE_NAME = 'Mixer:live'
    _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/(?P<id>[^/?#&]+)'
    _TEST = {
        'url': 'http://mixer.com/niterhayven',
        'info_dict': {
            'id': '261562',
            'ext': 'mp4',
            'title': 'Introducing The Witcher 3 //  The Grind Starts Now!',
            'description': 'md5:0b161ac080f15fe05d18a07adb44a74d',
            'thumbnail': r're:https://.*\.jpg$',
            'timestamp': 1483477281,
            'upload_date': '20170103',
            'uploader': 'niterhayven',
            'uploader_id': '373396',
            'age_limit': 18,
            'is_live': True,
            'view_count': int,
        },
        'skip': 'niterhayven is offline',
        'params': {
            'skip_download': True,
        },
    }
    _MANIFEST_URL_TEMPLATE = '%s/channels/%%s/manifest.%%s' % BeamProBaseIE._API_BASE
    @classmethod
    def suitable(cls, url):
        return False if BeamProVodIE.suitable(url) else super(BeamProLiveIE, cls).suitable(url)
    def _real_extract(self, url):
        channel_name = self._match_id(url)
        chan = self._download_json(
            '%s/channels/%s' % (self._API_BASE, channel_name), channel_name)
        if chan.get('online') is False:
            raise ExtractorError(
                '{0} is offline'.format(channel_name), expected=True)
        channel_id = chan['id']
        def manifest_url(kind):
            return self._MANIFEST_URL_TEMPLATE % (channel_id, kind)
        formats = self._extract_m3u8_formats(
            manifest_url('m3u8'), channel_name, ext='mp4', m3u8_id='hls',
            fatal=False)
        formats.extend(self._extract_smil_formats(
            manifest_url('smil'), channel_name, fatal=False))
        self._sort_formats(formats)
        info = {
            'id': compat_str(chan.get('id') or channel_name),
            'title': self._live_title(chan.get('name') or channel_name),
            'description': clean_html(chan.get('description')),
            'thumbnail': try_get(
                chan, lambda x: x['thumbnail']['url'], compat_str),
            'timestamp': parse_iso8601(chan.get('updatedAt')),
            'is_live': True,
            'view_count': int_or_none(chan.get('viewersTotal')),
            'formats': formats,
        }
        info.update(self._extract_channel_info(chan))
        return info
 class BeamProVodIE(BeamProBaseIE):
    IE_NAME = 'Mixer:vod'
    _VALID_URL = r'https?://(?:\w+\.)?(?:beam\.pro|mixer\.com)/[^/?#&]+\?.*?\bvod=(?P<id>[^?#&]+)'
    _TESTS = [{
        'url': 'https://mixer.com/willow8714?vod=2259830',
        'md5': 'b2431e6e8347dc92ebafb565d368b76b',
        'info_dict': {
            'id': '2259830',
            'ext': 'mp4',
            'title': 'willow8714\'s Channel',
            'duration': 6828.15,
            'thumbnail': r're:https://.*source\.png$',
            'timestamp': 1494046474,
            'upload_date': '20170506',
            'uploader': 'willow8714',
            'uploader_id': '6085379',
            'age_limit': 13,
            'view_count': int,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'https://mixer.com/streamer?vod=IxFno1rqC0S_XJ1a2yGgNw',
        'only_matching': True,
    }, {
        'url': 'https://mixer.com/streamer?vod=Rh3LY0VAqkGpEQUe2pN-ig',
        'only_matching': True,
    }]
    @staticmethod
    def _extract_format(vod, vod_type):
        if not vod.get('baseUrl'):
            return []
        if vod_type == 'hls':
            filename, protocol = 'manifest.m3u8', 'm3u8_native'
        elif vod_type == 'raw':
            filename, protocol = 'source.mp4', 'https'
        else:
            assert False
        data = vod.get('data') if isinstance(vod.get('data'), dict) else {}
        format_id = [vod_type]
        if isinstance(data.get('Height'), compat_str):
            format_id.append('%sp' % data['Height'])
        return [{
            'url': urljoin(vod['baseUrl'], filename),
            'format_id': '-'.join(format_id),
            'ext': 'mp4',
            'protocol': protocol,
            'width': int_or_none(data.get('Width')),
            'height': int_or_none(data.get('Height')),
            'fps': int_or_none(data.get('Fps')),
            'tbr': int_or_none(data.get('Bitrate'), 1000),
        }]
    def _real_extract(self, url):
        vod_id = self._match_id(url)
        vod_info = self._download_json(
            '%s/recordings/%s' % (self._API_BASE, vod_id), vod_id)
        state = vod_info.get('state')
        if state != 'AVAILABLE':
            raise ExtractorError(
                'VOD %s is not available (state: %s)' % (vod_id, state),
                expected=True)
        formats = []
        thumbnail_url = None
        for vod in vod_info['vods']:
            vod_type = vod.get('format')
            if vod_type in ('hls', 'raw'):
                formats.extend(self._extract_format(vod, vod_type))
            elif vod_type == 'thumbnail':
                thumbnail_url = urljoin(vod.get('baseUrl'), 'source.png')
        self._sort_formats(formats)
        info = {
            'id': vod_id,
            'title': vod_info.get('name') or vod_id,
            'duration': float_or_none(vod_info.get('duration')),
            'thumbnail': thumbnail_url,
            'timestamp': parse_iso8601(vod_info.get('createdAt')),
            'view_count': int_or_none(vod_info.get('viewsTotal')),
            'formats': formats,
        }
        info.update(self._extract_channel_info(vod_info.get('channel') or {}))
        return info
--- a/haruhi_dl/extractor/bfmtv.py
+++ b/haruhi_dl/extractor/bfmtv.py
@ -0,0 +1,103 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import extract_attributes
 class BFMTVBaseIE(InfoExtractor):
    _VALID_URL_BASE = r'https?://(?:www\.)?bfmtv\.com/'
    _VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
    _VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block"[^>]*>)'
    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
    def _brightcove_url_result(self, video_id, video_block):
        account_id = video_block.get('accountid') or '876450612001'
        player_id = video_block.get('playerid') or 'I2qBTln4u'
        return self.url_result(
            self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
            'BrightcoveNew', video_id)
 class BFMTVIE(BFMTVBaseIE):
    IE_NAME = 'bfmtv'
    _VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'V'
    _TESTS = [{
        'url': 'https://www.bfmtv.com/politique/emmanuel-macron-l-islam-est-une-religion-qui-vit-une-crise-aujourd-hui-partout-dans-le-monde_VN-202010020146.html',
        'info_dict': {
            'id': '6196747868001',
            'ext': 'mp4',
            'title': 'Emmanuel Macron: "L\'Islam est une religion qui vit une crise aujourd’hui, partout dans le monde"',
            'description': 'Le Président s\'exprime sur la question du séparatisme depuis les Mureaux, dans les Yvelines.',
            'uploader_id': '876450610001',
            'upload_date': '20201002',
            'timestamp': 1601629620,
        },
    }]
    def _real_extract(self, url):
        bfmtv_id = self._match_id(url)
        webpage = self._download_webpage(url, bfmtv_id)
        video_block = extract_attributes(self._search_regex(
            self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
        return self._brightcove_url_result(video_block['videoid'], video_block)
 class BFMTVLiveIE(BFMTVIE):
    IE_NAME = 'bfmtv:live'
    _VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
    _TESTS = [{
        'url': 'https://www.bfmtv.com/en-direct/',
        'info_dict': {
            'id': '5615950982001',
            'ext': 'mp4',
            'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
            'uploader_id': '876450610001',
            'upload_date': '20171018',
            'timestamp': 1508329950,
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'https://www.bfmtv.com/economie/en-direct/',
        'only_matching': True,
    }]
 class BFMTVArticleIE(BFMTVBaseIE):
    IE_NAME = 'bfmtv:article'
    _VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'A'
    _TESTS = [{
        'url': 'https://www.bfmtv.com/sante/covid-19-un-responsable-de-l-institut-pasteur-se-demande-quand-la-france-va-se-reconfiner_AV-202101060198.html',
        'info_dict': {
            'id': '202101060198',
            'title': 'Covid-19: un responsable de l\'Institut Pasteur se demande "quand la France va se reconfiner"',
            'description': 'md5:947974089c303d3ac6196670ae262843',
        },
        'playlist_count': 2,
    }, {
        'url': 'https://www.bfmtv.com/international/pour-bolsonaro-le-bresil-est-en-faillite-mais-il-ne-peut-rien-faire_AD-202101060232.html',
        'only_matching': True,
    }, {
        'url': 'https://www.bfmtv.com/sante/covid-19-oui-le-vaccin-de-pfizer-distribue-en-france-a-bien-ete-teste-sur-des-personnes-agees_AN-202101060275.html',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        bfmtv_id = self._match_id(url)
        webpage = self._download_webpage(url, bfmtv_id)
        entries = []
        for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
            video_block = extract_attributes(video_block_el)
            video_id = video_block.get('videoid')
            if not video_id:
                continue
            entries.append(self._brightcove_url_result(video_id, video_block))
        return self.playlist_result(
            entries, bfmtv_id, self._og_search_title(webpage, fatal=False),
            self._html_search_meta(['og:description', 'description'], webpage))
--- a/haruhi_dl/extractor/bibeltv.py
+++ b/haruhi_dl/extractor/bibeltv.py
@ -0,0 +1,30 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 class BibelTVIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?:crn/)?(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://www.bibeltv.de/mediathek/videos/329703-sprachkurs-in-malaiisch',
        'md5': '252f908192d611de038b8504b08bf97f',
        'info_dict': {
            'id': 'ref:329703',
            'ext': 'mp4',
            'title': 'Sprachkurs in Malaiisch',
            'description': 'md5:3e9f197d29ee164714e67351cf737dfe',
            'timestamp': 1608316701,
            'uploader_id': '5840105145001',
            'upload_date': '20201218',
        }
    }, {
        'url': 'https://www.bibeltv.de/mediathek/videos/crn/326374',
        'only_matching': True,
    }]
    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5840105145001/default_default/index.html?videoId=ref:%s'
    def _real_extract(self, url):
        crn_id = self._match_id(url)
        return self.url_result(
            self.BRIGHTCOVE_URL_TEMPLATE % crn_id, 'BrightcoveNew')
--- a/haruhi_dl/extractor/bleacherreport.py
+++ b/haruhi_dl/extractor/bleacherreport.py
@ -90,13 +90,19 @@ class BleacherReportCMSIE(AMPIE):
    _VALID_URL = r'https?://(?:www\.)?bleacherreport\.com/video_embed\?id=(?P<id>[0-9a-f-]{36}|\d{5})'
    _TESTS = [{
        'url': 'http://bleacherreport.com/video_embed?id=8fd44c2f-3dc5-4821-9118-2c825a98c0e1&library=video-cms',
-        'md5': '2e4b0a997f9228ffa31fada5c53d1ed1',
+        'md5': '670b2d73f48549da032861130488c681',
        'info_dict': {
            'id': '8fd44c2f-3dc5-4821-9118-2c825a98c0e1',
-            'ext': 'flv',
+            'ext': 'mp4',
            'title': 'Cena vs. Rollins Would Expose the Heavyweight Division',
            'description': 'md5:984afb4ade2f9c0db35f3267ed88b36e',
            'upload_date': '20150723',
            'timestamp': 1437679032,
        },
        'expected_warnings': [
            'Unable to download f4m manifest'
        ]
    }]
    def _real_extract(self, url):
--- a/haruhi_dl/extractor/bongacams.py
+++ b/haruhi_dl/extractor/bongacams.py
@ -0,0 +1,60 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    int_or_none,
    try_get,
    urlencode_postdata,
 )
 class BongaCamsIE(InfoExtractor):
    _VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)'
    _TESTS = [{
        'url': 'https://de.bongacams.com/azumi-8',
        'only_matching': True,
    }, {
        'url': 'https://cn.bongacams.com/azumi-8',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        host = mobj.group('host')
        channel_id = mobj.group('id')
        amf = self._download_json(
            'https://%s/tools/amf.php' % host, channel_id,
            data=urlencode_postdata((
                ('method', 'getRoomData'),
                ('args[]', channel_id),
                ('args[]', 'false'),
            )), headers={'X-Requested-With': 'XMLHttpRequest'})
        server_url = amf['localData']['videoServerUrl']
        uploader_id = try_get(
            amf, lambda x: x['performerData']['username'], compat_str) or channel_id
        uploader = try_get(
            amf, lambda x: x['performerData']['displayName'], compat_str)
        like_count = int_or_none(try_get(
            amf, lambda x: x['performerData']['loversCount']))
        formats = self._extract_m3u8_formats(
            '%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id),
            channel_id, 'mp4', m3u8_id='hls', live=True)
        self._sort_formats(formats)
        return {
            'id': channel_id,
            'title': self._live_title(uploader or uploader_id),
            'uploader': uploader,
            'uploader_id': uploader_id,
            'like_count': like_count,
            'age_limit': 18,
            'is_live': True,
            'formats': formats,
        }
--- a/haruhi_dl/extractor/box.py
+++ b/haruhi_dl/extractor/box.py
@ -0,0 +1,98 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
    parse_iso8601,
    # try_get,
    update_url_query,
 )
 class BoxIE(InfoExtractor):
    _VALID_URL = r'https?://(?:[^.]+\.)?app\.box\.com/s/(?P<shared_name>[^/]+)/file/(?P<id>\d+)'
    _TEST = {
        'url': 'https://mlssoccer.app.box.com/s/0evd2o3e08l60lr4ygukepvnkord1o1x/file/510727257538',
        'md5': '1f81b2fd3960f38a40a3b8823e5fcd43',
        'info_dict': {
            'id': '510727257538',
            'ext': 'mp4',
            'title': 'Garber   St. Louis will be 28th MLS team  +scarving.mp4',
            'uploader': 'MLS Video',
            'timestamp': 1566320259,
            'upload_date': '20190820',
            'uploader_id': '235196876',
        }
    }
    def _real_extract(self, url):
        shared_name, file_id = re.match(self._VALID_URL, url).groups()
        webpage = self._download_webpage(url, file_id)
        request_token = self._parse_json(self._search_regex(
            r'Box\.config\s*=\s*({.+?});', webpage,
            'Box config'), file_id)['requestToken']
        access_token = self._download_json(
            'https://app.box.com/app-api/enduserapp/elements/tokens', file_id,
            'Downloading token JSON metadata',
            data=json.dumps({'fileIDs': [file_id]}).encode(), headers={
                'Content-Type': 'application/json',
                'X-Request-Token': request_token,
                'X-Box-EndUser-API': 'sharedName=' + shared_name,
            })[file_id]['read']
        shared_link = 'https://app.box.com/s/' + shared_name
        f = self._download_json(
            'https://api.box.com/2.0/files/' + file_id, file_id,
            'Downloading file JSON metadata', headers={
                'Authorization': 'Bearer ' + access_token,
                'BoxApi': 'shared_link=' + shared_link,
                'X-Rep-Hints': '[dash]',  # TODO: extract `hls` formats
            }, query={
                'fields': 'authenticated_download_url,created_at,created_by,description,extension,is_download_available,name,representations,size'
            })
        title = f['name']
        query = {
            'access_token': access_token,
            'shared_link': shared_link
        }
        formats = []
        # for entry in (try_get(f, lambda x: x['representations']['entries'], list) or []):
        #     entry_url_template = try_get(
        #         entry, lambda x: x['content']['url_template'])
        #     if not entry_url_template:
        #         continue
        #     representation = entry.get('representation')
        #     if representation == 'dash':
        #         TODO: append query to every fragment URL
        #         formats.extend(self._extract_mpd_formats(
        #             entry_url_template.replace('{+asset_path}', 'manifest.mpd'),
        #             file_id, query=query))
        authenticated_download_url = f.get('authenticated_download_url')
        if authenticated_download_url and f.get('is_download_available'):
            formats.append({
                'ext': f.get('extension') or determine_ext(title),
                'filesize': f.get('size'),
                'format_id': 'download',
                'url': update_url_query(authenticated_download_url, query),
            })
        self._sort_formats(formats)
        creator = f.get('created_by') or {}
        return {
            'id': file_id,
            'title': title,
            'formats': formats,
            'description': f.get('description') or None,
            'uploader': creator.get('name'),
            'timestamp': parse_iso8601(f.get('created_at')),
            'uploader_id': creator.get('id'),
        }
--- a/haruhi_dl/extractor/bravotv.py
+++ b/haruhi_dl/extractor/bravotv.py
@ -12,7 +12,7 @@ from ..utils import (
 class BravoTVIE(AdobePassIE):
-    _VALID_URL = r'https?://(?:www\.)?bravotv\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<req_id>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
    _TESTS = [{
        'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
        'md5': 'e34684cfea2a96cd2ee1ef3a60909de9',
@ -28,10 +28,13 @@ class BravoTVIE(AdobePassIE):
    }, {
        'url': 'http://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
        'only_matching': True,
    }, {
        'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
        'only_matching': True,
    }]
    def _real_extract(self, url):
-        display_id = self._match_id(url)
+        site, display_id = re.match(self._VALID_URL, url).groups()
        webpage = self._download_webpage(url, display_id)
        settings = self._parse_json(self._search_regex(
            r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>({.+?})</script>', webpage, 'drupal settings'),
@ -53,11 +56,14 @@ class BravoTVIE(AdobePassIE):
                tp_path = release_pid = tve['release_pid']
            if tve.get('entitlement') == 'auth':
                adobe_pass = settings.get('tve_adobe_auth', {})
                if site == 'bravotv':
                    site = 'bravo'
                resource = self._get_mvpd_resource(
-                    adobe_pass.get('adobePassResourceId', 'bravo'),
+                    adobe_pass.get('adobePassResourceId') or site,
                    tve['title'], release_pid, tve.get('rating'))
                query['auth'] = self._extract_mvpd_auth(
-                    url, release_pid, adobe_pass.get('adobePassRequestorId', 'bravo'), resource)
+                    url, release_pid,
                    adobe_pass.get('adobePassRequestorId') or site, resource)
        else:
            shared_playlist = settings['ls_playlist']
            account_pid = shared_playlist['account_pid']
--- a/haruhi_dl/extractor/brightcove.py
+++ b/haruhi_dl/extractor/brightcove.py
@ -28,6 +28,7 @@ from ..utils import (
    parse_iso8601,
    smuggle_url,
    str_or_none,
    try_get,
    unescapeHTML,
    unsmuggle_url,
    UnsupportedError,
@ -470,13 +471,18 @@ class BrightcoveNewIE(AdobePassIE):
    def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
        title = json_data['name'].strip()
        num_drm_sources = 0
        formats = []
-        for source in json_data.get('sources', []):
+        sources = json_data.get('sources') or []
        for source in sources:
            container = source.get('container')
            ext = mimetype2ext(source.get('type'))
            src = source.get('src')
            # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
-            if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
+            if container == 'WVM' or source.get('key_systems'):
                num_drm_sources += 1
                continue
            elif ext == 'ism':
                continue
            elif ext == 'm3u8' or container == 'M2TS':
                if not src:
@ -533,20 +539,15 @@ class BrightcoveNewIE(AdobePassIE):
                        'format_id': build_format_id('rtmp'),
                    })
                formats.append(f)
        if not formats:
            # for sonyliv.com DRM protected videos
            s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
            if s3_source_url:
                formats.append({
                    'url': s3_source_url,
                    'format_id': 'source',
                })
        if not formats:
            errors = json_data.get('errors')
-        if not formats and errors:
+            if errors:
                error = errors[0]
                raise ExtractorError(
                    error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
            if sources and num_drm_sources == len(sources):
                raise ExtractorError('This video is DRM protected.', expected=True)
        self._sort_formats(formats)
@ -600,11 +601,14 @@ class BrightcoveNewIE(AdobePassIE):
        store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
        def extract_policy_key():
            base_url = 'http://players.brightcove.net/%s/%s_%s/' % (account_id, player_id, embed)
            config = self._download_json(
                base_url + 'config.json', video_id, fatal=False) or {}
            policy_key = try_get(
                config, lambda x: x['video_cloud']['policy_key'])
            if not policy_key:
                webpage = self._download_webpage(
-                'http://players.brightcove.net/%s/%s_%s/index.min.js'
+                    base_url + 'index.min.js', video_id)
                % (account_id, player_id, embed), video_id)
            policy_key = None
                catalog = self._search_regex(
                    r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
--- a/haruhi_dl/extractor/canvas.py
+++ b/haruhi_dl/extractor/canvas.py
@ -8,18 +8,20 @@ from .gigya import GigyaBaseIE
 from ..compat import compat_HTTPError
 from ..utils import (
    ExtractorError,
-    strip_or_none,
+    clean_html,
    extract_attributes,
    float_or_none,
    get_element_by_class,
    int_or_none,
    merge_dicts,
    parse_iso8601,
    str_or_none,
    strip_or_none,
    url_or_none,
 )
 class CanvasIE(InfoExtractor):
-    _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza|dako)/assets/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
        'md5': '68993eda72ef62386a15ea2cf3c93107',
@ -37,6 +39,7 @@ class CanvasIE(InfoExtractor):
        'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
        'only_matching': True,
    }]
    _GEO_BYPASS = False
    _HLS_ENTRY_PROTOCOLS_MAP = {
        'HLS': 'm3u8_native',
        'HLS_AES': 'm3u8',
@ -47,6 +50,8 @@ class CanvasIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        site_id, video_id = mobj.group('site_id'), mobj.group('id')
        data = None
        if site_id != 'vrtvideo':
            # Old API endpoint, serves more formats but may fail for some videos
            data = self._download_json(
                'https://mediazone.vrt.be/api/v1/%s/assets/%s'
@ -55,21 +60,24 @@ class CanvasIE(InfoExtractor):
        # New API endpoint
        if not data:
            headers = self.geo_verification_headers()
            headers.update({'Content-Type': 'application/json'})
            token = self._download_json(
                '%s/tokens' % self._REST_API_BASE, video_id,
-                'Downloading token', data=b'',
+                'Downloading token', data=b'', headers=headers)['vrtPlayerToken']
                headers={'Content-Type': 'application/json'})['vrtPlayerToken']
            data = self._download_json(
                '%s/videos/%s' % (self._REST_API_BASE, video_id),
-                video_id, 'Downloading video JSON', fatal=False, query={
+                video_id, 'Downloading video JSON', query={
                    'vrtPlayerToken': token,
                    'client': '%s@PROD' % site_id,
                }, expected_status=400)
-            message = data.get('message')
+            if not data.get('title'):
-            if message and not data.get('title'):
+                code = data.get('code')
-                if data.get('code') == 'AUTHENTICATION_REQUIRED':
+                if code == 'AUTHENTICATION_REQUIRED':
-                    self.raise_login_required(message)
+                    self.raise_login_required()
-                raise ExtractorError(message, expected=True)
+                elif code == 'INVALID_LOCATION':
                    self.raise_geo_restricted(countries=['BE'])
                raise ExtractorError(data.get('message') or code, expected=True)
        title = data['title']
        description = data.get('description')
@ -205,20 +213,24 @@ class CanvasEenIE(InfoExtractor):
 class VrtNUIE(GigyaBaseIE):
    IE_DESC = 'VrtNU.be'
-    _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?vrt\.be/vrtnu/a-z/(?:[^/]+/){2}(?P<id>[^/?#&]+)'
    _TESTS = [{
        # Available via old API endpoint
-        'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
+        'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1989/postbus-x-s1989a1/',
        'info_dict': {
-            'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
+            'id': 'pbs-pub-e8713dac-899e-41de-9313-81269f4c04ac$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
            'ext': 'mp4',
-            'title': 'De zwarte weduwe',
+            'title': 'Postbus X - Aflevering 1 (Seizoen 1989)',
-            'description': 'md5:db1227b0f318c849ba5eab1fef895ee4',
+            'description': 'md5:b704f669eb9262da4c55b33d7c6ed4b7',
            'duration': 1457.04,
            'thumbnail': r're:^https?://.*\.jpg$',
-            'season': 'Season 1',
+            'series': 'Postbus X',
-            'season_number': 1,
+            'season': 'Seizoen 1989',
            'season_number': 1989,
            'episode': 'De zwarte weduwe',
            'episode_number': 1,
            'timestamp': 1595822400,
            'upload_date': '20200727',
        },
        'skip': 'This video is only available for registered users',
        'params': {
@ -300,69 +312,73 @@ class VrtNUIE(GigyaBaseIE):
    def _real_extract(self, url):
        display_id = self._match_id(url)
-        webpage, urlh = self._download_webpage_handle(url, display_id)
+        webpage = self._download_webpage(url, display_id)
        attrs = extract_attributes(self._search_regex(
            r'(<nui-media[^>]+>)', webpage, 'media element'))
        video_id = attrs['videoid']
        publication_id = attrs.get('publicationid')
        if publication_id:
            video_id = publication_id + '$' + video_id
        page = (self._parse_json(self._search_regex(
            r'digitalData\s*=\s*({.+?});', webpage, 'digial data',
            default='{}'), video_id, fatal=False) or {}).get('page') or {}
        info = self._search_json_ld(webpage, display_id, default={})
        # title is optional here since it may be extracted by extractor
        # that is delegated from here
        title = strip_or_none(self._html_search_regex(
            r'(?ms)<h1 class="content__heading">(.+?)</h1>',
            webpage, 'title', default=None))
        description = self._html_search_regex(
            r'(?ms)<div class="content__description">(.+?)</div>',
            webpage, 'description', default=None)
        season = self._html_search_regex(
            [r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s*
                    <span>seizoen\ (.+?)</span>\s*
                </div>''',
             r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'],
            webpage, 'season', default=None)
        season_number = int_or_none(season)
        episode_number = int_or_none(self._html_search_regex(
            r'''(?xms)<div\ class="content__episode">\s*
                    <abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span>
                </div>''',
            webpage, 'episode_number', default=None))
        release_date = parse_iso8601(self._html_search_regex(
            r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"',
            webpage, 'release_date', default=None))
        # If there's a ? or a # in the URL, remove them and everything after
        clean_url = urlh.geturl().split('?')[0].split('#')[0].strip('/')
        securevideo_url = clean_url + '.mssecurevideo.json'
        try:
            video = self._download_json(securevideo_url, display_id)
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
                self.raise_login_required()
            raise
        # We are dealing with a '../<show>.relevant' URL
        redirect_url = video.get('url')
        if redirect_url:
            return self.url_result(self._proto_relative_url(redirect_url, 'https:'))
        # There is only one entry, but with an unknown key, so just get
        # the first one
        video_id = list(video.values())[0].get('videoid')
        return merge_dicts(info, {
            '_type': 'url_transparent',
            'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
            'ie_key': CanvasIE.ie_key(),
            'id': video_id,
            'display_id': display_id,
            'season_number': int_or_none(page.get('episode_season')),
        })
 class DagelijkseKostIE(InfoExtractor):
    IE_DESC = 'dagelijksekost.een.be'
    _VALID_URL = r'https?://dagelijksekost\.een\.be/gerechten/(?P<id>[^/?#&]+)'
    _TEST = {
        'url': 'https://dagelijksekost.een.be/gerechten/hachis-parmentier-met-witloof',
        'md5': '30bfffc323009a3e5f689bef6efa2365',
        'info_dict': {
            'id': 'md-ast-27a4d1ff-7d7b-425e-b84f-a4d227f592fa',
            'display_id': 'hachis-parmentier-met-witloof',
            'ext': 'mp4',
            'title': 'Hachis parmentier met witloof',
            'description': 'md5:9960478392d87f63567b5b117688cdc5',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 283.02,
        },
        'expected_warnings': ['is not a supported codec'],
    }
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        title = strip_or_none(get_element_by_class(
            'dish-metadata__title', webpage
        ) or self._html_search_meta(
            'twitter:title', webpage))
        description = clean_html(get_element_by_class(
            'dish-description', webpage)
        ) or self._html_search_meta(
            ('description', 'twitter:description', 'og:description'),
            webpage)
        video_id = self._html_search_regex(
            r'data-url=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
            group='id')
        return {
            '_type': 'url_transparent',
            'url': 'https://mediazone.vrt.be/api/v1/dako/assets/%s' % video_id,
            'ie_key': CanvasIE.ie_key(),
            'id': video_id,
            'display_id': display_id,
            'title': title,
            'description': description,
-            'season': season,
+        }
            'season_number': season_number,
            'episode_number': episode_number,
            'release_date': release_date,
        })
--- a/haruhi_dl/extractor/cbslocal.py
+++ b/haruhi_dl/extractor/cbslocal.py
@ -11,7 +11,47 @@ from ..utils import (
 class CBSLocalIE(AnvatoIE):
-    _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
+    _VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/'
    _VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
        'info_dict': {
            'id': '3580809',
            'ext': 'mp4',
            'title': 'A Very Blue Anniversary',
            'description': 'CBS2’s Cindy Hsu has more.',
            'thumbnail': 're:^https?://.*',
            'timestamp': int,
            'upload_date': r're:^\d{8}$',
            'uploader': 'CBS',
            'subtitles': {
                'en': 'mincount:5',
            },
            'categories': [
                'Stations\\Spoken Word\\WCBSTV',
                'Syndication\\AOL',
                'Syndication\\MSN',
                'Syndication\\NDN',
                'Syndication\\Yahoo',
                'Content\\News',
                'Content\\News\\Local News',
            ],
            'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
        },
        'params': {
            'skip_download': True,
        },
    }]
    def _real_extract(self, url):
        mcp_id = self._match_id(url)
        return self.url_result(
            'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
 class CBSLocalArticleIE(AnvatoIE):
    _VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
    _TESTS = [{
        # Anvato backend
@ -52,31 +92,6 @@ class CBSLocalIE(AnvatoIE):
            # m3u8 download
            'skip_download': True,
        },
    }, {
        'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
        'info_dict': {
            'id': '3580809',
            'ext': 'mp4',
            'title': 'A Very Blue Anniversary',
            'description': 'CBS2’s Cindy Hsu has more.',
            'thumbnail': 're:^https?://.*',
            'timestamp': int,
            'upload_date': r're:^\d{8}$',
            'uploader': 'CBS',
            'subtitles': {
                'en': 'mincount:5',
            },
            'categories': [
                'Stations\\Spoken Word\\WCBSTV',
                'Syndication\\AOL',
                'Syndication\\MSN',
                'Syndication\\NDN',
                'Syndication\\Yahoo',
                'Content\\News',
                'Content\\News\\Local News',
            ],
            'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
        },
    }]
    def _real_extract(self, url):
--- a/haruhi_dl/extractor/ccma.py
+++ b/haruhi_dl/extractor/ccma.py
@ -1,15 +1,18 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import calendar
 import datetime
 import re
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    extract_timezone,
    int_or_none,
    parse_duration,
    parse_iso8601,
    parse_resolution,
    try_get,
    url_or_none,
 )
@ -24,8 +27,9 @@ class CCMAIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'L\'espot de La Marató de TV3',
            'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
-            'timestamp': 1470918540,
+            'timestamp': 1478608140,
-            'upload_date': '20160811',
+            'upload_date': '20161108',
            'age_limit': 0,
        }
    }, {
        'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
@ -35,8 +39,24 @@ class CCMAIE(InfoExtractor):
            'ext': 'mp3',
            'title': 'El Consell de Savis analitza el derbi',
            'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
-            'upload_date': '20171205',
+            'upload_date': '20170512',
-            'timestamp': 1512507300,
+            'timestamp': 1494622500,
            'vcodec': 'none',
            'categories': ['Esports'],
        }
    }, {
        'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
        'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
        'info_dict': {
            'id': '6031387',
            'ext': 'mp4',
            'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
            'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
            'timestamp': 1582577700,
            'upload_date': '20200224',
            'subtitles': 'mincount:4',
            'age_limit': 16,
            'series': 'Crims',
        }
    }]
@ -72,17 +92,28 @@ class CCMAIE(InfoExtractor):
        informacio = media['informacio']
        title = informacio['titol']
-        durada = informacio.get('durada', {})
+        durada = informacio.get('durada') or {}
        duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
-        timestamp = parse_iso8601(informacio.get('data_emissio', {}).get('utc'))
+        tematica = try_get(informacio, lambda x: x['tematica']['text'])
        timestamp = None
        data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
        try:
            timezone, data_utc = extract_timezone(data_utc)
            timestamp = calendar.timegm((datetime.datetime.strptime(
                data_utc, '%Y-%d-%mT%H:%M:%S') - timezone).timetuple())
        except TypeError:
            pass
        subtitles = {}
-        subtitols = media.get('subtitols', {})
+        subtitols = media.get('subtitols') or []
-        if subtitols:
+        if isinstance(subtitols, dict):
-            sub_url = subtitols.get('url')
+            subtitols = [subtitols]
        for st in subtitols:
            sub_url = st.get('url')
            if sub_url:
                subtitles.setdefault(
-                    subtitols.get('iso') or subtitols.get('text') or 'ca', []).append({
+                    st.get('iso') or st.get('text') or 'ca', []).append({
                        'url': sub_url,
                    })
@ -97,6 +128,16 @@ class CCMAIE(InfoExtractor):
                    'height': int_or_none(imatges.get('alcada')),
                }]
        age_limit = None
        codi_etic = try_get(informacio, lambda x: x['codi_etic']['id'])
        if codi_etic:
            codi_etic_s = codi_etic.split('_')
            if len(codi_etic_s) == 2:
                if codi_etic_s[1] == 'TP':
                    age_limit = 0
                else:
                    age_limit = int_or_none(codi_etic_s[1])
        return {
            'id': media_id,
            'title': title,
@ -106,4 +147,9 @@ class CCMAIE(InfoExtractor):
            'thumbnails': thumbnails,
            'subtitles': subtitles,
            'formats': formats,
            'age_limit': age_limit,
            'alt_title': informacio.get('titol_complet'),
            'episode_number': int_or_none(informacio.get('capitol')),
            'categories': [tematica] if tematica else None,
            'series': informacio.get('programa'),
        }
--- a/haruhi_dl/extractor/cnbc.py
+++ b/haruhi_dl/extractor/cnbc.py
@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import smuggle_url
@ -38,7 +39,7 @@ class CNBCIE(InfoExtractor):
 class CNBCVideoIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/]+/)+(?P<id>[^./?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?cnbc\.com(?P<path>/video/(?:[^/]+/)+(?P<id>[^./?#&]+)\.html)'
    _TEST = {
        'url': 'https://www.cnbc.com/video/2018/07/19/trump-i-dont-necessarily-agree-with-raising-rates.html',
        'info_dict': {
@ -56,11 +57,15 @@ class CNBCVideoIE(InfoExtractor):
    }
    def _real_extract(self, url):
-        display_id = self._match_id(url)
+        path, display_id = re.match(self._VALID_URL, url).groups()
-        webpage = self._download_webpage(url, display_id)
+        video_id = self._download_json(
-        video_id = self._search_regex(
+            'https://webql-redesign.cnbcfm.com/graphql', display_id, query={
-            r'content_id["\']\s*:\s*["\'](\d+)', webpage, display_id,
+                'query': '''{
-            'video id')
+  page(path: "%s") {
    vcpsId
  }
 }''' % path,
            })['data']['page']['vcpsId']
        return self.url_result(
-            'http://video.cnbc.com/gallery/?video=%s' % video_id,
+            'http://video.cnbc.com/gallery/?video=%d' % video_id,
            CNBCIE.ie_key())
--- a/haruhi_dl/extractor/cnn.py
+++ b/haruhi_dl/extractor/cnn.py
@ -96,7 +96,10 @@ class CNNIE(TurnerBaseIE):
            config['data_src'] % path, page_title, {
                'default': {
                    'media_src': config['media_src'],
-                }
+                },
                'f4m': {
                    'host': 'cnn-vh.akamaihd.net',
                },
            })
--- a/haruhi_dl/extractor/comedycentral.py
+++ b/haruhi_dl/extractor/comedycentral.py
@ -1,142 +1,51 @@
 from __future__ import unicode_literals
 from .mtv import MTVServicesInfoExtractor
 from .common import InfoExtractor
 class ComedyCentralIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
+    _VALID_URL = r'https?://(?:www\.)?cc\.com/(?:episodes|video(?:-clips)?)/(?P<id>[0-9a-z]{6})'
        (video-clips|episodes|cc-studios|video-collections|shows(?=/[^/]+/(?!full-episodes)))
        /(?P<title>.*)'''
    _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
    _TESTS = [{
-        'url': 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother',
+        'url': 'http://www.cc.com/video-clips/5ke9v2/the-daily-show-with-trevor-noah-doc-rivers-and-steve-ballmer---the-nba-player-strike',
-        'md5': 'c4f48e9eda1b16dd10add0744344b6d8',
+        'md5': 'b8acb347177c680ff18a292aa2166f80',
        'info_dict': {
-            'id': 'cef0cbb3-e776-4bc9-b62e-8016deccb354',
+            'id': '89ccc86e-1b02-4f83-b0c9-1d9592ecd025',
            'ext': 'mp4',
-            'title': 'CC:Stand-Up|August 18, 2013|1|0101|Uncensored - Too Good of a Mother',
+            'title': 'The Daily Show with Trevor Noah|August 28, 2020|25|25149|Doc Rivers and Steve Ballmer - The NBA Player Strike',
-            'description': 'After a certain point, breastfeeding becomes c**kblocking.',
+            'description': 'md5:5334307c433892b85f4f5e5ac9ef7498',
-            'timestamp': 1376798400,
+            'timestamp': 1598670000,
-            'upload_date': '20130818',
+            'upload_date': '20200829',
        },
    }, {
-        'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/interviews/6yx39d/exclusive-rand-paul-extended-interview',
+        'url': 'http://www.cc.com/episodes/pnzzci/drawn-together--american-idol--parody-clip-show-season-3-ep-314',
        'only_matching': True,
    }]
 class ComedyCentralFullEpisodesIE(MTVServicesInfoExtractor):
    _VALID_URL = r'''(?x)https?://(?:www\.)?cc\.com/
        (?:full-episodes|shows(?=/[^/]+/full-episodes))
        /(?P<id>[^?]+)'''
    _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
    _TESTS = [{
        'url': 'http://www.cc.com/full-episodes/pv391a/the-daily-show-with-trevor-noah-november-28--2016---ryan-speedo-green-season-22-ep-22028',
        'info_dict': {
            'description': 'Donald Trump is accused of exploiting his president-elect status for personal gain, Cuban leader Fidel Castro dies, and Ryan Speedo Green discusses "Sing for Your Life."',
            'title': 'November 28, 2016 - Ryan Speedo Green',
        },
        'playlist_count': 4,
    }, {
-        'url': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
+        'url': 'https://www.cc.com/video/k3sdvm/the-daily-show-with-jon-stewart-exclusive-the-fourth-estate',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        playlist_id = self._match_id(url)
        webpage = self._download_webpage(url, playlist_id)
        mgid = self._extract_triforce_mgid(webpage, data_zone='t2_lc_promo1')
        videos_info = self._get_videos_info(mgid)
        return videos_info
 class ToshIE(MTVServicesInfoExtractor):
    IE_DESC = 'Tosh.0'
    _VALID_URL = r'^https?://tosh\.cc\.com/video-(?:clips|collections)/[^/]+/(?P<videotitle>[^/?#]+)'
    _FEED_URL = 'http://tosh.cc.com/feeds/mrss'
    _TESTS = [{
        'url': 'http://tosh.cc.com/video-clips/68g93d/twitter-users-share-summer-plans',
        'info_dict': {
            'description': 'Tosh asked fans to share their summer plans.',
            'title': 'Twitter Users Share Summer Plans',
        },
        'playlist': [{
            'md5': 'f269e88114c1805bb6d7653fecea9e06',
            'info_dict': {
                'id': '90498ec2-ed00-11e0-aca6-0026b9414f30',
                'ext': 'mp4',
                'title': 'Tosh.0|June 9, 2077|2|211|Twitter Users Share Summer Plans',
                'description': 'Tosh asked fans to share their summer plans.',
                'thumbnail': r're:^https?://.*\.jpg',
                # It's really reported to be published on year 2077
                'upload_date': '20770610',
                'timestamp': 3390510600,
                'subtitles': {
                    'en': 'mincount:3',
                },
            },
        }]
    }, {
        'url': 'http://tosh.cc.com/video-collections/x2iz7k/just-plain-foul/m5q4fp',
        'only_matching': True,
    }]
 class ComedyCentralTVIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/(?:staffeln|shows)/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?comedycentral\.tv/folgen/(?P<id>[0-9a-z]{6})'
    _TESTS = [{
-        'url': 'http://www.comedycentral.tv/staffeln/7436-the-mindy-project-staffel-4',
+        'url': 'https://www.comedycentral.tv/folgen/pxdpec/josh-investigates-klimawandel-staffel-1-ep-1',
        'info_dict': {
-            'id': 'local_playlist-f99b626bdfe13568579a',
+            'id': '15907dc3-ec3c-11e8-a442-0e40cf2fc285',
-            'ext': 'flv',
+            'ext': 'mp4',
-            'title': 'Episode_the-mindy-project_shows_season-4_episode-3_full-episode_part1',
+            'title': 'Josh Investigates',
            'description': 'Steht uns das Ende der Welt bevor?',
        },
        'params': {
            # rtmp download
            'skip_download': True,
        },
    }, {
        'url': 'http://www.comedycentral.tv/shows/1074-workaholics',
        'only_matching': True,
    }, {
        'url': 'http://www.comedycentral.tv/shows/1727-the-mindy-project/bonus',
        'only_matching': True,
    }]
    _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
    _GEO_COUNTRIES = ['DE']
-    def _real_extract(self, url):
+    def _get_feed_query(self, uri):
-        video_id = self._match_id(url)
+        return {
-
+            'accountOverride': 'intl.mtvi.com',
-        webpage = self._download_webpage(url, video_id)
+            'arcEp': 'web.cc.tv',
-
+            'ep': 'b9032c3a',
-        mrss_url = self._search_regex(
+            'imageEp': 'web.cc.tv',
-            r'data-mrss=(["\'])(?P<url>(?:(?!\1).)+)\1',
+            'mgid': uri,
            webpage, 'mrss url', group='url')
        return self._get_videos_info_from_url(mrss_url, video_id)
 class ComedyCentralShortnameIE(InfoExtractor):
    _VALID_URL = r'^:(?P<id>tds|thedailyshow|theopposition)$'
    _TESTS = [{
        'url': ':tds',
        'only_matching': True,
    }, {
        'url': ':thedailyshow',
        'only_matching': True,
    }, {
        'url': ':theopposition',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        shortcut_map = {
            'tds': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
            'thedailyshow': 'http://www.cc.com/shows/the-daily-show-with-trevor-noah/full-episodes',
            'theopposition': 'http://www.cc.com/shows/the-opposition-with-jordan-klepper/full-episodes',
        }
        return self.url_result(shortcut_map[video_id])
--- a/haruhi_dl/extractor/common.py
+++ b/haruhi_dl/extractor/common.py
@ -336,8 +336,8 @@ class InfoExtractor(object):
    object, each element of which is a valid dictionary by this specification.
    Additionally, playlists can have "id", "title", "description", "uploader",
-    "uploader_id", "uploader_url" attributes with the same semantics as videos
+    "uploader_id", "uploader_url", "duration" attributes with the same semantics
-    (see above).
+    as videos (see above).
    _type "multi_video" indicates that there are multiple videos that
@ -1239,8 +1239,16 @@ class InfoExtractor(object):
            'ViewAction': 'view',
        }
        def extract_interaction_type(e):
            interaction_type = e.get('interactionType')
            if isinstance(interaction_type, dict):
                interaction_type = interaction_type.get('@type')
            return str_or_none(interaction_type)
        def extract_interaction_statistic(e):
            interaction_statistic = e.get('interactionStatistic')
            if isinstance(interaction_statistic, dict):
                interaction_statistic = [interaction_statistic]
            if not isinstance(interaction_statistic, list):
                return
            for is_e in interaction_statistic:
@ -1248,8 +1256,8 @@ class InfoExtractor(object):
                    continue
                if is_e.get('@type') != 'InteractionCounter':
                    continue
-                interaction_type = is_e.get('interactionType')
+                interaction_type = extract_interaction_type(is_e)
-                if not isinstance(interaction_type, compat_str):
+                if not interaction_type:
                    continue
                # For interaction count some sites provide string instead of
                # an integer (as per spec) with non digit characters (e.g. ",")
@ -1474,9 +1482,10 @@ class InfoExtractor(object):
        try:
            self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
            return True
-        except ExtractorError:
+        except ExtractorError as e:
            self.to_screen(
-                '%s: %s URL is invalid, skipping' % (video_id, item))
+                '%s: %s URL is invalid, skipping: %s'
                % (video_id, item, error_to_compat_str(e.cause)))
            return False
    def http_scheme(self):
@ -2612,7 +2621,15 @@ class InfoExtractor(object):
        return entries
    def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
        signed = 'hdnea=' in manifest_url
        if not signed:
            # https://learn.akamai.com/en-us/webhelp/media-services-on-demand/stream-packaging-user-guide/GUID-BE6C0F73-1E06-483B-B0EA-57984B91B7F9.html
            manifest_url = re.sub(
                r'(?:b=[\d,-]+|(?:__a__|attributes)=off|__b__=\d+)&?',
                '', manifest_url).strip('?')
        formats = []
        hdcore_sign = 'hdcore=3.7.0'
        f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
        hds_host = hosts.get('hds')
@ -2625,13 +2642,38 @@ class InfoExtractor(object):
        for entry in f4m_formats:
            entry.update({'extra_param_to_segment_url': hdcore_sign})
        formats.extend(f4m_formats)
        m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
        hls_host = hosts.get('hls')
        if hls_host:
            m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
-        formats.extend(self._extract_m3u8_formats(
+        m3u8_formats = self._extract_m3u8_formats(
            m3u8_url, video_id, 'mp4', 'm3u8_native',
-            m3u8_id='hls', fatal=False))
+            m3u8_id='hls', fatal=False)
        formats.extend(m3u8_formats)
        http_host = hosts.get('http')
        if http_host and m3u8_formats and not signed:
            REPL_REGEX = r'https?://[^/]+/i/([^,]+),([^/]+),([^/]+)\.csmil/.+'
            qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',')
            qualities_length = len(qualities)
            if len(m3u8_formats) in (qualities_length, qualities_length + 1):
                i = 0
                for f in m3u8_formats:
                    if f['vcodec'] != 'none':
                        for protocol in ('http', 'https'):
                            http_f = f.copy()
                            del http_f['manifest_url']
                            http_url = re.sub(
                                REPL_REGEX, protocol + r'://%s/\g<1>%s\3' % (http_host, qualities[i]), f['url'])
                            http_f.update({
                                'format_id': http_f['format_id'].replace('hls-', protocol + '-'),
                                'url': http_url,
                                'protocol': protocol,
                            })
                            formats.append(http_f)
                        i += 1
        return formats
    def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
--- a/haruhi_dl/extractor/condenast.py
+++ b/haruhi_dl/extractor/condenast.py
@ -16,6 +16,8 @@ from ..utils import (
    mimetype2ext,
    orderedSet,
    parse_iso8601,
    strip_or_none,
    try_get,
 )
@ -82,6 +84,7 @@ class CondeNastIE(InfoExtractor):
            'uploader': 'gq',
            'upload_date': '20170321',
            'timestamp': 1490126427,
            'description': 'How much grimmer would things be if these people were competent?',
        },
    }, {
        # JS embed
@ -93,7 +96,7 @@ class CondeNastIE(InfoExtractor):
            'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
            'uploader': 'arstechnica',
            'upload_date': '20150916',
-            'timestamp': 1442434955,
+            'timestamp': 1442434920,
        }
    }, {
        'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
@ -196,6 +199,13 @@ class CondeNastIE(InfoExtractor):
            })
        self._sort_formats(formats)
        subtitles = {}
        for t, caption in video_info.get('captions', {}).items():
            caption_url = caption.get('src')
            if not (t in ('vtt', 'srt', 'tml') and caption_url):
                continue
            subtitles.setdefault('en', []).append({'url': caption_url})
        return {
            'id': video_id,
            'formats': formats,
@ -208,6 +218,7 @@ class CondeNastIE(InfoExtractor):
            'season': video_info.get('season_title'),
            'timestamp': parse_iso8601(video_info.get('premiere_date')),
            'categories': video_info.get('categories'),
            'subtitles': subtitles,
        }
    def _real_extract(self, url):
@ -224,6 +235,14 @@ class CondeNastIE(InfoExtractor):
        if url_type == 'series':
            return self._extract_series(url, webpage)
        else:
            video = try_get(self._parse_json(self._search_regex(
                r'__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
                'preload state', '{}'), display_id),
                lambda x: x['transformed']['video'])
            if video:
                params = {'videoId': video['id']}
                info = {'description': strip_or_none(video.get('description'))}
            else:
                params = self._extract_video_params(webpage, display_id)
                info = self._search_json_ld(
--- a/haruhi_dl/extractor/cspan.py
+++ b/haruhi_dl/extractor/cspan.py
@ -8,9 +8,14 @@ from ..utils import (
    ExtractorError,
    extract_attributes,
    find_xpath_attr,
    get_element_by_attribute,
    get_element_by_class,
    int_or_none,
    js_to_json,
    merge_dicts,
    parse_iso8601,
    smuggle_url,
    str_to_int,
    unescapeHTML,
 )
 from .senateisvp import SenateISVPIE
@ -98,6 +103,48 @@ class CSpanIE(InfoExtractor):
                    bc_attr['data-bcid'])
                return self.url_result(smuggle_url(bc_url, {'source_url': url}))
        def add_referer(formats):
            for f in formats:
                f.setdefault('http_headers', {})['Referer'] = url
        # As of 01.12.2020 this path looks to cover all cases making the rest
        # of the code unnecessary
        jwsetup = self._parse_json(
            self._search_regex(
                r'(?s)jwsetup\s*=\s*({.+?})\s*;', webpage, 'jwsetup',
                default='{}'),
            video_id, transform_source=js_to_json, fatal=False)
        if jwsetup:
            info = self._parse_jwplayer_data(
                jwsetup, video_id, require_title=False, m3u8_id='hls',
                base_url=url)
            add_referer(info['formats'])
            for subtitles in info['subtitles'].values():
                for subtitle in subtitles:
                    ext = determine_ext(subtitle['url'])
                    if ext == 'php':
                        ext = 'vtt'
                    subtitle['ext'] = ext
            ld_info = self._search_json_ld(webpage, video_id, default={})
            title = get_element_by_class('video-page-title', webpage) or \
                self._og_search_title(webpage)
            description = get_element_by_attribute('itemprop', 'description', webpage) or \
                self._html_search_meta(['og:description', 'description'], webpage)
            return merge_dicts(info, ld_info, {
                'title': title,
                'thumbnail': get_element_by_attribute('itemprop', 'thumbnailUrl', webpage),
                'description': description,
                'timestamp': parse_iso8601(get_element_by_attribute('itemprop', 'uploadDate', webpage)),
                'location': get_element_by_attribute('itemprop', 'contentLocation', webpage),
                'duration': int_or_none(self._search_regex(
                    r'jwsetup\.seclength\s*=\s*(\d+);',
                    webpage, 'duration', fatal=False)),
                'view_count': str_to_int(self._search_regex(
                    r"<span[^>]+class='views'[^>]*>([\d,]+)\s+Views</span>",
                    webpage, 'views', fatal=False)),
            })
        # Obsolete
        # We first look for clipid, because clipprog always appears before
        patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')]
        results = list(filter(None, (re.search(p, webpage) for p in patterns)))
@ -165,6 +212,7 @@ class CSpanIE(InfoExtractor):
                formats = self._extract_m3u8_formats(
                    path, video_id, 'mp4', entry_protocol='m3u8_native',
                    m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }]
            add_referer(formats)
            self._sort_formats(formats)
            entries.append({
                'id': '%s_%d' % (video_id, partnum + 1),
--- a/haruhi_dl/extractor/ctv.py
+++ b/haruhi_dl/extractor/ctv.py
@ -0,0 +1,52 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 class CTVIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?ctv\.ca/(?P<id>(?:show|movie)s/[^/]+/[^/?#&]+)'
    _TESTS = [{
        'url': 'https://www.ctv.ca/shows/your-morning/wednesday-december-23-2020-s5e88',
        'info_dict': {
            'id': '2102249',
            'ext': 'flv',
            'title': 'Wednesday, December 23, 2020',
            'thumbnail': r're:^https?://.*\.jpg$',
            'description': 'Your Morning delivers original perspectives and unique insights into the headlines of the day.',
            'timestamp': 1608732000,
            'upload_date': '20201223',
            'series': 'Your Morning',
            'season': '2020-2021',
            'season_number': 5,
            'episode_number': 88,
            'tags': ['Your Morning'],
            'categories': ['Talk Show'],
            'duration': 7467.126,
        },
    }, {
        'url': 'https://www.ctv.ca/movies/adam-sandlers-eight-crazy-nights/adam-sandlers-eight-crazy-nights',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        content = self._download_json(
            'https://www.ctv.ca/space-graphql/graphql', display_id, query={
                'query': '''{
  resolvedPath(path: "/%s") {
    lastSegment {
      content {
        ... on AxisContent {
          axisId
          videoPlayerDestCode
        }
      }
    }
  }
 }''' % display_id,
            })['data']['resolvedPath']['lastSegment']['content']
        video_id = content['axisId']
        return self.url_result(
            '9c9media:%s:%s' % (content['videoPlayerDestCode'], video_id),
            'NineCNineMedia', video_id)
--- a/haruhi_dl/extractor/discoverynetworks.py
+++ b/haruhi_dl/extractor/discoverynetworks.py
@ -7,7 +7,7 @@ from .dplay import DPlayIE
 class DiscoveryNetworksDeIE(DPlayIE):
-    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
    _TESTS = [{
        'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
@ -29,6 +29,9 @@ class DiscoveryNetworksDeIE(DPlayIE):
    }, {
        'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
        'only_matching': True,
    }, {
        'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
        'only_matching': True,
    }]
    def _real_extract(self, url):
--- a/haruhi_dl/extractor/dplay.py
+++ b/haruhi_dl/extractor/dplay.py
@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
@ -10,16 +11,23 @@ from ..utils import (
    ExtractorError,
    float_or_none,
    int_or_none,
    strip_or_none,
    unified_timestamp,
 )
 class DPlayIE(InfoExtractor):
    _PATH_REGEX = r'/(?P<id>[^/]+/[^/?#]+)'
    _VALID_URL = r'''(?x)https?://
        (?P<domain>
-            (?:www\.)?(?P<host>dplay\.(?P<country>dk|fi|jp|se|no))|
+            (?:www\.)?(?P<host>d
                (?:
                    play\.(?P<country>dk|fi|jp|se|no)|
                    iscoveryplus\.(?P<plus_country>dk|es|fi|it|se|no)
                )
            )|
            (?P<subdomain_country>es|it)\.dplay\.com
-        )/[^/]+/(?P<id>[^/]+/[^/?#]+)'''
+        )/[^/]+''' + _PATH_REGEX
    _TESTS = [{
        # non geo restricted, via secure api, unsigned download hls URL
@ -126,23 +134,67 @@ class DPlayIE(InfoExtractor):
    }, {
        'url': 'https://www.dplay.jp/video/gold-rush/24086',
        'only_matching': True,
    }, {
        'url': 'https://www.discoveryplus.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
        'only_matching': True,
    }, {
        'url': 'https://www.discoveryplus.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
        'only_matching': True,
    }, {
        'url': 'https://www.discoveryplus.no/videoer/i-kongens-klr/sesong-1-episode-7',
        'only_matching': True,
    }, {
        'url': 'https://www.discoveryplus.it/videos/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
        'only_matching': True,
    }, {
        'url': 'https://www.discoveryplus.es/videos/la-fiebre-del-oro/temporada-8-episodio-1',
        'only_matching': True,
    }, {
        'url': 'https://www.discoveryplus.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
        'only_matching': True,
    }]
    def _process_errors(self, e, geo_countries):
        info = self._parse_json(e.cause.read().decode('utf-8'), None)
        error = info['errors'][0]
        error_code = error.get('code')
        if error_code == 'access.denied.geoblocked':
            self.raise_geo_restricted(countries=geo_countries)
        elif error_code in ('access.denied.missingpackage', 'invalid.token'):
            raise ExtractorError(
                'This video is only available for registered users. You may want to use --cookies.', expected=True)
        raise ExtractorError(info['errors'][0]['detail'], expected=True)
    def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
        headers['Authorization'] = 'Bearer ' + self._download_json(
            disco_base + 'token', display_id, 'Downloading token',
            query={
                'realm': realm,
            })['data']['attributes']['token']
    def _download_video_playback_info(self, disco_base, video_id, headers):
        streaming = self._download_json(
            disco_base + 'playback/videoPlaybackInfo/' + video_id,
            video_id, headers=headers)['data']['attributes']['streaming']
        streaming_list = []
        for format_id, format_dict in streaming.items():
            streaming_list.append({
                'type': format_id,
                'url': format_dict.get('url'),
            })
        return streaming_list
    def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
        geo_countries = [country.upper()]
        self._initialize_geo_bypass({
            'countries': geo_countries,
        })
        disco_base = 'https://%s/' % disco_host
        token = self._download_json(
            disco_base + 'token', display_id, 'Downloading token',
            query={
                'realm': realm,
            })['data']['attributes']['token']
        headers = {
            'Referer': url,
            'Authorization': 'Bearer ' + token,
        }
        self._update_disco_api_headers(headers, disco_base, display_id, realm)
        try:
            video = self._download_json(
                disco_base + 'content/videos/' + display_id, display_id,
                headers=headers, query={
@ -153,31 +205,28 @@ class DPlayIE(InfoExtractor):
                    'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
                    'include': 'images,primaryChannel,show,tags'
                })
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
                self._process_errors(e, geo_countries)
            raise
        video_id = video['data']['id']
        info = video['data']['attributes']
        title = info['name'].strip()
        formats = []
        try:
-            streaming = self._download_json(
+            streaming = self._download_video_playback_info(
-                disco_base + 'playback/videoPlaybackInfo/' + video_id,
+                disco_base, video_id, headers)
                display_id, headers=headers)['data']['attributes']['streaming']
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
+                self._process_errors(e, geo_countries)
                error = info['errors'][0]
                error_code = error.get('code')
                if error_code == 'access.denied.geoblocked':
                    self.raise_geo_restricted(countries=geo_countries)
                elif error_code == 'access.denied.missingpackage':
                    self.raise_login_required()
                raise ExtractorError(info['errors'][0]['detail'], expected=True)
            raise
-        for format_id, format_dict in streaming.items():
+        for format_dict in streaming:
            if not isinstance(format_dict, dict):
                continue
            format_url = format_dict.get('url')
            if not format_url:
                continue
            format_id = format_dict.get('type')
            ext = determine_ext(format_url)
            if format_id == 'dash' or ext == 'mpd':
                formats.extend(self._extract_mpd_formats(
@ -225,7 +274,7 @@ class DPlayIE(InfoExtractor):
            'id': video_id,
            'display_id': display_id,
            'title': title,
-            'description': info.get('description'),
+            'description': strip_or_none(info.get('description')),
            'duration': float_or_none(info.get('videoDuration'), 1000),
            'timestamp': unified_timestamp(info.get('publishStart')),
            'series': series,
@ -241,7 +290,80 @@ class DPlayIE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        display_id = mobj.group('id')
        domain = mobj.group('domain').lstrip('www.')
-        country = mobj.group('country') or mobj.group('subdomain_country')
+        country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country')
-        host = 'disco-api.' + domain if domain.startswith('dplay.') else 'eu2-prod.disco-api.com'
+        host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
        return self._get_disco_api_info(
            url, display_id, host, 'dplay' + country, country)
 class DiscoveryPlusIE(DPlayIE):
    _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX
    _TESTS = [{
        'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
        'info_dict': {
            'id': '1140794',
            'display_id': 'property-brothers-forever-home/food-and-family',
            'ext': 'mp4',
            'title': 'Food and Family',
            'description': 'The brothers help a Richmond family expand their single-level home.',
            'duration': 2583.113,
            'timestamp': 1609304400,
            'upload_date': '20201230',
            'creator': 'HGTV',
            'series': 'Property Brothers: Forever Home',
            'season_number': 1,
            'episode_number': 1,
        },
        'skip': 'Available for Premium users',
    }]
    def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
        headers['x-disco-client'] = 'WEB:UNKNOWN:dplus_us:15.0.0'
    def _download_video_playback_info(self, disco_base, video_id, headers):
        return self._download_json(
            disco_base + 'playback/v3/videoPlaybackInfo',
            video_id, headers=headers, data=json.dumps({
                'deviceInfo': {
                    'adBlocker': False,
                },
                'videoId': video_id,
                'wisteriaProperties': {
                    'platform': 'desktop',
                    'product': 'dplus_us',
                },
            }).encode('utf-8'))['data']['attributes']['streaming']
    def _real_extract(self, url):
        display_id = self._match_id(url)
        return self._get_disco_api_info(
            url, display_id, 'us1-prod-direct.discoveryplus.com', 'go', 'us')
 class HGTVDeIE(DPlayIE):
    _VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX
    _TESTS = [{
        'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
        'info_dict': {
            'id': '151205',
            'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
            'ext': 'mp4',
            'title': 'Wer braucht schon eine Toilette',
            'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
            'duration': 1177.024,
            'timestamp': 1595705400,
            'upload_date': '20200725',
            'creator': 'HGTV',
            'series': 'Tiny House - klein, aber oho',
            'season_number': 3,
            'episode_number': 3,
        },
        'params': {
            'format': 'bestvideo',
        },
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        return self._get_disco_api_info(
            url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
--- a/haruhi_dl/extractor/drtv.py
+++ b/haruhi_dl/extractor/drtv.py
@ -29,7 +29,7 @@ class DRTVIE(InfoExtractor):
                    https?://
                        (?:
                            (?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*|
-                            (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode)/
+                            (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/
                        )
                        (?P<id>[\da-z_-]+)
                    '''
@ -111,6 +111,9 @@ class DRTVIE(InfoExtractor):
    }, {
        'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769',
        'only_matching': True,
    }, {
        'url': 'https://www.dr.dk/drtv/program/jagten_220924',
        'only_matching': True,
    }]
    def _real_extract(self, url):
--- a/haruhi_dl/extractor/egghead.py
+++ b/haruhi_dl/extractor/egghead.py
@ -12,7 +12,14 @@ from ..utils import (
 )
-class EggheadCourseIE(InfoExtractor):
+class EggheadBaseIE(InfoExtractor):
    def _call_api(self, path, video_id, resource, fatal=True):
        return self._download_json(
            'https://app.egghead.io/api/v1/' + path,
            video_id, 'Downloading %s JSON' % resource, fatal=fatal)
 class EggheadCourseIE(EggheadBaseIE):
    IE_DESC = 'egghead.io course'
    IE_NAME = 'egghead:course'
    _VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
@ -28,10 +35,9 @@ class EggheadCourseIE(InfoExtractor):
    def _real_extract(self, url):
        playlist_id = self._match_id(url)
-
+        series_path = 'series/' + playlist_id
-        lessons = self._download_json(
+        lessons = self._call_api(
-            'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
+            series_path + '/lessons', playlist_id, 'course lessons')
            playlist_id, 'Downloading course lessons JSON')
        entries = []
        for lesson in lessons:
@ -44,9 +50,8 @@ class EggheadCourseIE(InfoExtractor):
            entries.append(self.url_result(
                lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
-        course = self._download_json(
+        course = self._call_api(
-            'https://egghead.io/api/v1/series/%s' % playlist_id,
+            series_path, playlist_id, 'course', False) or {}
            playlist_id, 'Downloading course JSON', fatal=False) or {}
        playlist_id = course.get('id')
        if playlist_id:
@ -57,7 +62,7 @@ class EggheadCourseIE(InfoExtractor):
            course.get('description'))
-class EggheadLessonIE(InfoExtractor):
+class EggheadLessonIE(EggheadBaseIE):
    IE_DESC = 'egghead.io lesson'
    IE_NAME = 'egghead:lesson'
    _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
@ -74,7 +79,7 @@ class EggheadLessonIE(InfoExtractor):
            'upload_date': '20161209',
            'duration': 304,
            'view_count': 0,
-            'tags': ['javascript', 'free'],
+            'tags': 'count:2',
        },
        'params': {
            'skip_download': True,
@ -88,8 +93,8 @@ class EggheadLessonIE(InfoExtractor):
    def _real_extract(self, url):
        display_id = self._match_id(url)
-        lesson = self._download_json(
+        lesson = self._call_api(
-            'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
+            'lessons/' + display_id, display_id, 'lesson')
        lesson_id = compat_str(lesson['id'])
        title = lesson['title']
--- a/haruhi_dl/extractor/eporner.py
+++ b/haruhi_dl/extractor/eporner.py
@ -16,7 +16,7 @@ from ..utils import (
 class EpornerIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
+    _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:(?:hd-porn|embed)/|video-)(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
    _TESTS = [{
        'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
        'md5': '39d486f046212d8e1b911c52ab4691f8',
@ -43,7 +43,10 @@ class EpornerIE(InfoExtractor):
        'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
        'only_matching': True,
    }, {
-        'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
+        'url': 'http://www.eporner.com/embed/3YRUtzMcWn0',
        'only_matching': True,
    }, {
        'url': 'https://www.eporner.com/video-FJsA19J3Y3H/one-of-the-greats/',
        'only_matching': True,
    }]
@ -57,7 +60,7 @@ class EpornerIE(InfoExtractor):
        video_id = self._match_id(urlh.geturl())
        hash = self._search_regex(
-            r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
+            r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash')
        title = self._og_search_title(webpage, default=None) or self._html_search_regex(
            r'<title>(.+?) - EPORNER', webpage, 'title')
@ -115,8 +118,8 @@ class EpornerIE(InfoExtractor):
        duration = parse_duration(self._html_search_meta(
            'duration', webpage, default=None))
        view_count = str_to_int(self._search_regex(
-            r'id="cinemaviews">\s*([0-9,]+)\s*<small>views',
+            r'id=["\']cinemaviews1["\'][^>]*>\s*([0-9,]+)',
-            webpage, 'view count', fatal=False))
+            webpage, 'view count', default=None))
        return merge_dicts(json_ld, {
            'id': video_id,
--- a/haruhi_dl/extractor/everyonesmixtape.py
+++ b/haruhi_dl/extractor/everyonesmixtape.py
@ -1,77 +0,0 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    sanitized_Request,
 )
 class EveryonesMixtapeIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$'
    _TESTS = [{
        'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5',
        'info_dict': {
            'id': '5bfseWNmlds',
            'ext': 'mp4',
            'title': "Passion Pit - \"Sleepyhead\" (Official Music Video)",
            'uploader': 'FKR.TV',
            'uploader_id': 'frenchkissrecords',
            'description': "Music video for \"Sleepyhead\" from Passion Pit's debut EP Chunk Of Change.\nBuy on iTunes: https://itunes.apple.com/us/album/chunk-of-change-ep/id300087641\n\nDirected by The Wilderness.\n\nhttp://www.passionpitmusic.com\nhttp://www.frenchkissrecords.com",
            'upload_date': '20081015'
        },
        'params': {
            'skip_download': True,  # This is simply YouTube
        }
    }, {
        'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi',
        'info_dict': {
            'id': 'm7m0jJAbMQi',
            'title': 'Driving',
        },
        'playlist_count': 24
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        playlist_id = mobj.group('id')
        pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id
        pllist_req = sanitized_Request(pllist_url)
        pllist_req.add_header('X-Requested-With', 'XMLHttpRequest')
        playlist_list = self._download_json(
            pllist_req, playlist_id, note='Downloading playlist metadata')
        try:
            playlist_no = next(playlist['id']
                               for playlist in playlist_list
                               if playlist['code'] == playlist_id)
        except StopIteration:
            raise ExtractorError('Playlist id not found')
        pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no
        pl_req = sanitized_Request(pl_url)
        pl_req.add_header('X-Requested-With', 'XMLHttpRequest')
        playlist = self._download_json(
            pl_req, playlist_id, note='Downloading playlist info')
        entries = [{
            '_type': 'url',
            'url': t['url'],
            'title': t['title'],
        } for t in playlist['tracks']]
        if mobj.group('songnr'):
            songnr = int(mobj.group('songnr')) - 1
            return entries[songnr]
        playlist_title = playlist['mixData']['name']
        return {
            '_type': 'playlist',
            'id': playlist_id,
            'title': playlist_title,
            'entries': entries,
        }
--- a/haruhi_dl/extractor/extractors.py
+++ b/haruhi_dl/extractor/extractors.py
@ -30,7 +30,11 @@ from .adobetv import (
 from .adultswim import AdultSwimIE
 from .aenetworks import (
    AENetworksIE,
    AENetworksCollectionIE,
    AENetworksShowIE,
    HistoryTopicIE,
    HistoryPlayerIE,
    BiographyIE,
 )
 from .afreecatv import AfreecaTVIE
 from .agora import (
@ -43,8 +47,12 @@ from .airmozilla import AirMozillaIE
 from .albicla import AlbiclaIE
 from .aljazeera import AlJazeeraIE
 from .alphaporno import AlphaPornoIE
 from .amara import AmaraIE
 from .amcnetworks import AMCNetworksIE
-from .americastestkitchen import AmericasTestKitchenIE
+from .americastestkitchen import (
    AmericasTestKitchenIE,
    AmericasTestKitchenSeasonIE,
 )
 from .animeondemand import AnimeOnDemandIE
 from .anvato import AnvatoIE
 from .aol import AolIE
@ -60,7 +68,9 @@ from .appletrailers import (
    AppleTrailersIE,
    AppleTrailersSectionIE,
 )
 from .applepodcasts import ApplePodcastsIE
 from .archiveorg import ArchiveOrgIE
 from .arcpublishing import ArcPublishingIE
 from .arkena import ArkenaIE
 from .ard import (
    ARDBetaMediathekIE,
@ -68,7 +78,7 @@ from .ard import (
    ARDMediathekIE,
 )
 from .arte import (
-    ArteTVPlus7IE,
+    ArteTVIE,
    ArteTVEmbedIE,
    ArteTVPlaylistIE,
 )
@ -98,16 +108,18 @@ from .bbc import (
    BBCCoUkPlaylistIE,
    BBCIE,
 )
 from .beampro import (
    BeamProLiveIE,
    BeamProVodIE,
 )
 from .beeg import BeegIE
 from .behindkink import BehindKinkIE
 from .bellmedia import BellMediaIE
 from .beatport import BeatportIE
 from .bet import BetIE
 from .bfi import BFIPlayerIE
 from .bfmtv import (
    BFMTVIE,
    BFMTVLiveIE,
    BFMTVArticleIE,
 )
 from .bibeltv import BibelTVIE
 from .bigflix import BigflixIE
 from .bild import BildIE
 from .bilibili import (
@ -130,7 +142,9 @@ from .bleacherreport import (
 from .blinkx import BlinkxIE
 from .bloomberg import BloombergIE
 from .bokecc import BokeCCIE
 from .bongacams import BongaCamsIE
 from .bostonglobe import BostonGlobeIE
 from .box import BoxIE
 from .bpb import BpbIE
 from .br import (
    BRIE,
@ -159,6 +173,7 @@ from .canvas import (
    CanvasIE,
    CanvasEenIE,
    VrtNUIE,
    DagelijkseKostIE,
 )
 from .carambatv import (
    CarambaTVIE,
@ -173,7 +188,10 @@ from .cbc import (
    CBCOlympicsIE,
 )
 from .cbs import CBSIE
-from .cbslocal import CBSLocalIE
+from .cbslocal import (
    CBSLocalIE,
    CBSLocalArticleIE,
 )
 from .cbsinteractive import CBSInteractiveIE
 from .cbsnews import (
    CBSNewsEmbedIE,
@ -228,11 +246,8 @@ from .cnn import (
 )
 from .coub import CoubIE
 from .comedycentral import (
    ComedyCentralFullEpisodesIE,
    ComedyCentralIE,
    ComedyCentralShortnameIE,
    ComedyCentralTVIE,
    ToshIE,
 )
 from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
 from .commonprotocols import (
@ -251,6 +266,7 @@ from .crunchyroll import (
 )
 from .cspan import CSpanIE
 from .ctsnews import CtsNewsIE
 from .ctv import CTVIE
 from .ctvnews import CTVNewsIE
 from .cultureunplugged import CultureUnpluggedIE
 from .curiositystream import (
@ -282,7 +298,11 @@ from .douyutv import (
    DouyuShowIE,
    DouyuTVIE,
 )
-from .dplay import DPlayIE
+from .dplay import (
    DPlayIE,
    DiscoveryPlusIE,
    HGTVDeIE,
 )
 from .dreisat import DreiSatIE
 from .drbonanza import DRBonanzaIE
 from .drtuber import DrTuberIE
@ -344,7 +364,6 @@ from .eurozet import (
    EurozetPlayerPodcastIE,
    EurozetPlayerMusicStreamIE,
 )
 from .everyonesmixtape import EveryonesMixtapeIE
 from .expotv import ExpoTVIE
 from .expressen import ExpressenIE
 from .extremetube import ExtremeTubeIE
@ -408,6 +427,7 @@ from .frontendmasters import (
    FrontendMastersLessonIE,
    FrontendMastersCourseIE
 )
 from .fujitv import FujiTVFODPlus7IE
 from .funimation import FunimationIE
 from .funk import FunkIE
 from .funkwhale import (
@ -419,7 +439,6 @@ from .funkwhale import (
    FunkwhaleRadioSHIE,
 )
 from .fusion import FusionIE
 from .fxnetworks import FXNetworksIE
 from .gaia import GaiaIE
 from .gameinformer import GameInformerIE
 from .gamespot import GameSpotIE
@ -427,6 +446,7 @@ from .gamestar import GameStarIE
 from .gaskrank import GaskrankIE
 from .gazeta import GazetaIE
 from .gdcvault import GDCVaultIE
 from .gedidigital import GediDigitalIE
 from .generic import GenericIE
 from .gfycat import GfycatIE
 from .giantbomb import GiantBombIE
@ -440,7 +460,10 @@ from .go import GoIE
 from .godtube import GodTubeIE
 from .golem import GolemIE
 from .googledrive import GoogleDriveIE
-from .googleplus import GooglePlusIE
+from .googlepodcasts import (
    GooglePodcastsIE,
    GooglePodcastsFeedIE,
 )
 from .googlesearch import GoogleSearchIE
 from .goshgay import GoshgayIE
 from .gputechconf import GPUTechConfIE
@ -483,8 +506,12 @@ from .hungama import (
 from .hypem import HypemIE
 from .ign import (
    IGNIE,
-    OneUPIE,
+    IGNVideoIE,
-    PCMagIE,
+    IGNArticleIE,
 )
 from .iheart import (
    IHeartRadioIE,
    IHeartRadioPodcastIE,
 )
 from .imdb import (
    ImdbIE,
@ -531,13 +558,15 @@ from .joj import JojIE
 from .jwplatform import JWPlatformIE
 from .kakao import KakaoIE
 from .kaltura import KalturaIE
 from .kanalplay import KanalPlayIE
 from .kankan import KankanIE
 from .karaoketv import KaraoketvIE
 from .karrierevideos import KarriereVideosIE
 from .keezmovies import KeezMoviesIE
 from .ketnet import KetnetIE
-from .khanacademy import KhanAcademyIE
+from .khanacademy import (
    KhanAcademyIE,
    KhanAcademyUnitIE,
 )
 from .kickstarter import KickStarterIE
 from .kinja import KinjaEmbedIE
 from .kinopoisk import KinoPoiskIE
@ -638,6 +667,7 @@ from .mastodon import MastodonSHIE
 from .massengeschmacktv import MassengeschmackTVIE
 from .matchtv import MatchTVIE
 from .mdr import MDRIE
 from .medaltv import MedalTVIE
 from .mediaset import MediasetIE
 from .mediasite import (
    MediasiteIE,
@ -658,6 +688,11 @@ from .microsoftvirtualacademy import (
    MicrosoftVirtualAcademyIE,
    MicrosoftVirtualAcademyCourseIE,
 )
 from .minds import (
    MindsIE,
    MindsChannelIE,
    MindsGroupIE,
 )
 from .ministrygrid import MinistryGridIE
 from .minoto import MinotoIE
 from .miomio import MioMioIE
@ -708,9 +743,15 @@ from .nationalgeographic import (
    NationalGeographicTVIE,
 )
 from .naver import NaverIE
-from .nba import NBAIE
+from .nba import (
    NBAWatchEmbedIE,
    NBAWatchIE,
    NBAWatchCollectionIE,
    NBAEmbedIE,
    NBAIE,
    NBAChannelIE,
 )
 from .nbc import (
    CSNNEIE,
    NBCIE,
    NBCNewsIE,
    NBCOlympicsIE,
@ -753,8 +794,14 @@ from .nexx import (
    NexxIE,
    NexxEmbedIE,
 )
-from .nfl import NFLIE
+from .nfl import (
-from .nhk import NhkVodIE
+    NFLIE,
    NFLArticleIE,
 )
 from .nhk import (
    NhkVodIE,
    NhkVodProgramIE,
 )
 from .nhl import NHLIE
 from .nick import (
    NickIE,
@ -804,6 +851,7 @@ from .nrk import (
    NRKSkoleIE,
    NRKTVIE,
    NRKTVDirekteIE,
    NRKRadioPodkastIE,
    NRKTVEpisodeIE,
    NRKTVEpisodesIE,
    NRKTVSeasonIE,
@ -816,6 +864,7 @@ from .ntvru import NTVRuIE
 from .nytimes import (
    NYTimesIE,
    NYTimesArticleIE,
    NYTimesCookingIE,
 )
 from .nuvid import NuvidIE
 from .nzz import NZZIE
@ -879,6 +928,10 @@ from .picarto import (
 )
 from .piksel import PikselIE
 from .pinkbike import PinkbikeIE
 from .pinterest import (
    PinterestIE,
    PinterestCollectionIE,
 )
 from .pladform import PladformIE
 from .platzi import (
    PlatziIE,
@ -1005,6 +1058,7 @@ from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETe
 from .rtvnh import RTVNHIE
 from .rtvs import RTVSIE
 from .ruhd import RUHDIE
 from .rumble import RumbleEmbedIE
 from .rutube import (
    RutubeIE,
    RutubeChannelIE,
@ -1021,6 +1075,7 @@ from .safari import (
    SafariApiIE,
    SafariCourseIE,
 )
 from .samplefocus import SampleFocusIE
 from .sapo import SapoIE
 from .savefrom import SaveFromIE
 from .sbs import SBSIE
@ -1053,8 +1108,23 @@ from .shared import (
    VivoIE,
 )
 from .showroomlive import ShowRoomLiveIE
 from .simplecast import (
    SimplecastIE,
    SimplecastEpisodeIE,
    SimplecastPodcastIE,
 )
 from .sina import SinaIE
 from .sixplay import SixPlayIE
 from .skyit import (
    SkyItPlayerIE,
    SkyItVideoIE,
    SkyItVideoLiveIE,
    SkyItIE,
    SkyItAcademyIE,
    SkyItArteIE,
    CieloTVItIE,
    TV8ItIE,
 )
 from .skylinewebcams import SkylineWebcamsIE
 from .skynewsarabia import (
    SkyNewsArabiaIE,
@ -1063,16 +1133,11 @@ from .skynewsarabia import (
 from .sky import (
    SkyNewsIE,
    SkySportsIE,
    SkySportsNewsIE,
 )
 from .slideshare import SlideshareIE
 from .slideslive import SlidesLiveIE
 from .slutload import SlutloadIE
 from .smotri import (
    SmotriIE,
    SmotriCommunityIE,
    SmotriUserIE,
    SmotriBroadcastIE,
 )
 from .snotr import SnotrIE
 from .sohu import SohuIE
 from .sonyliv import SonyLIVIE
@ -1101,16 +1166,28 @@ from .spankbang import (
    SpankBangPlaylistIE,
 )
 from .spankwire import SpankwireIE
-from .spiegel import SpiegelIE, SpiegelArticleIE
+from .spiegel import SpiegelIE
 from .spiegeltv import SpiegeltvIE
 from .spike import (
    BellatorIE,
    ParamountNetworkIE,
 )
-from .stitcher import StitcherIE
+from .stitcher import (
    StitcherIE,
    StitcherShowIE,
 )
 from .sport5 import Sport5IE
 from .sportbox import SportBoxIE
 from .sportdeutschland import SportDeutschlandIE
 from .spotify import (
    SpotifyIE,
    SpotifyShowIE,
 )
 from .spreaker import (
    SpreakerIE,
    SpreakerPageIE,
    SpreakerShowIE,
    SpreakerShowPageIE,
 )
 from .springboardplatform import SpringboardPlatformIE
 from .sprout import SproutIE
 from .srgssr import (
@ -1120,6 +1197,11 @@ from .srgssr import (
 from .srmediathek import SRMediathekIE
 from .stanfordoc import StanfordOpenClassroomIE
 from .steam import SteamIE
 from .storyfire import (
    StoryFireIE,
    StoryFireUserIE,
    StoryFireSeriesIE,
 )
 from .streamable import StreamableIE
 from .streamcloud import StreamcloudIE
 from .streamcz import StreamCZIE
@ -1145,7 +1227,6 @@ from .tagesschau import (
    TagesschauIE,
 )
 from .tass import TassIE
 from .tastytrade import TastyTradeIE
 from .tbs import TBSIE
 from .tdslifeway import TDSLifewayIE
 from .teachable import (
@ -1172,6 +1253,7 @@ from .telequebec import (
    TeleQuebecSquatIE,
    TeleQuebecEmissionIE,
    TeleQuebecLiveIE,
    TeleQuebecVideoIE,
 )
 from .teletask import TeleTaskIE
 from .telewebion import TelewebionIE
@ -1208,7 +1290,10 @@ from .tnaflix import (
    EMPFlixIE,
    MovieFapIE,
 )
-from .toggle import ToggleIE
+from .toggle import (
    ToggleIE,
    MeWatchIE,
 )
 from .tonline import TOnlineIE
 from .toongoggles import ToonGogglesIE
 from .toutv import TouTvIE
@ -1219,6 +1304,10 @@ from .transistorfm import (
    TransistorFMShareIE,
 )
 from .trilulilu import TriluliluIE
 from .trovo import (
    TrovoIE,
    TrovoVodIE,
 )
 from .trunews import TruNewsIE
 from .trutv import TruTVIE
 from .tubafm import (
@ -1241,6 +1330,7 @@ from .tv2 import (
    TV2IE,
    TV2ArticleIE,
    KatsomoIE,
    MTVUutisetArticleIE,
 )
 from .tv2dk import (
    TV2DKIE,
@ -1249,7 +1339,14 @@ from .tv2dk import (
 from .tv2hu import TV2HuIE
 from .tv4 import TV4IE
 from .tv5mondeplus import TV5MondePlusIE
-from .tva import TVAIE
+from .tv5unis import (
    TV5UnisVideoIE,
    TV5UnisIE,
 )
 from .tva import (
    TVAIE,
    QubIE,
 )
 from .tvanouvelles import (
    TVANouvellesIE,
    TVANouvellesArticleIE,
@ -1258,6 +1355,7 @@ from .tvc import (
    TVCIE,
    TVCArticleIE,
 )
 from .tver import TVerIE
 from .tvigle import TvigleIE
 from .tvland import TVLandIE
 from .tvn24 import TVN24IE
@ -1376,7 +1474,6 @@ from .vidme import (
    VidmeUserIE,
    VidmeUserLikesIE,
 )
 from .vidzi import VidziIE
 from .vier import VierIE, VierVideosIE
 from .viewlift import (
    ViewLiftIE,
@ -1436,10 +1533,14 @@ from .vrv import (
    VRVSeriesIE,
 )
 from .vshare import VShareIE
 from .vtm import VTMIE
 from .medialaan import MedialaanIE
 from .vube import VubeIE
 from .vuclip import VuClipIE
-from .vvvvid import VVVVIDIE
+from .vvvvid import (
    VVVVIDIE,
    VVVVIDShowIE,
 )
 from .vyborymos import VyboryMosIE
 from .vzaar import VzaarIE
 from .wakanim import WakanimIE
@ -1551,7 +1652,6 @@ from .youtube import (
    YoutubeTruncatedURLIE,
 )
 from .zapiks import ZapiksIE
 from .zaq1 import Zaq1IE
 from .zattoo import (
    BBVTVIE,
    EinsUndEinsTVIE,
@ -1572,5 +1672,6 @@ from .zattoo import (
    ZattooLiveIE,
 )
 from .zdf import ZDFIE, ZDFChannelIE
 from .zhihu import ZhihuIE
 from .zingmp3 import ZingMp3IE
 from .zype import ZypeIE
--- a/haruhi_dl/extractor/facebook.py
+++ b/haruhi_dl/extractor/facebook.py
@ -1,6 +1,7 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 import socket
@ -8,6 +9,7 @@ from .common import InfoExtractor
 from ..compat import (
    compat_etree_fromstring,
    compat_http_client,
    compat_str,
    compat_urllib_error,
    compat_urllib_parse_unquote,
    compat_urllib_parse_unquote_plus,
@ -16,14 +18,17 @@ from ..utils import (
    clean_html,
    error_to_compat_str,
    ExtractorError,
    float_or_none,
    get_element_by_id,
    int_or_none,
    js_to_json,
    limit_length,
    parse_count,
    qualities,
    sanitized_Request,
    try_get,
    urlencode_postdata,
    urljoin,
 )
@ -39,11 +44,13 @@ class FacebookIE(InfoExtractor):
                                photo\.php|
                                video\.php|
                                video/embed|
-                                story\.php
+                                story\.php|
                                watch(?:/live)?/?
                            )\?(?:.*?)(?:v|video_id|story_fbid)=|
                            [^/]+/videos/(?:[^/]+/)?|
                            [^/]+/posts/|
-                            groups/[^/]+/permalink/
+                            groups/[^/]+/permalink/|
                            watchparty/
                        )|
                    facebook:
                )
@ -54,8 +61,6 @@ class FacebookIE(InfoExtractor):
    _NETRC_MACHINE = 'facebook'
    IE_NAME = 'facebook'
    _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
    _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
    _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
@ -72,6 +77,7 @@ class FacebookIE(InfoExtractor):
        },
        'skip': 'Requires logging in',
    }, {
        # data.video
        'url': 'https://www.facebook.com/video.php?v=274175099429670',
        'info_dict': {
            'id': '274175099429670',
@ -133,6 +139,7 @@ class FacebookIE(InfoExtractor):
        },
    }, {
        # have 1080P, but only up to 720p in swf params
        # data.video.story.attachments[].media
        'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
        'md5': '9571fae53d4165bbbadb17a94651dcdc',
        'info_dict': {
@ -147,6 +154,7 @@ class FacebookIE(InfoExtractor):
        },
    }, {
        # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
        'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
        'info_dict': {
            'id': '1417995061575415',
@ -174,6 +182,7 @@ class FacebookIE(InfoExtractor):
            'skip_download': True,
        },
    }, {
        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
        'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
        'info_dict': {
            'id': '1396382447100162',
@ -193,18 +202,23 @@ class FacebookIE(InfoExtractor):
        'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf',
        'only_matching': True,
    }, {
        # data.mediaset.currMedia.edges
        'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
        'only_matching': True,
    }, {
        # data.video.story.attachments[].media
        'url': 'facebook:544765982287235',
        'only_matching': True,
    }, {
        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.media
        'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
        'only_matching': True,
    }, {
        # data.video.creation_story.attachments[].media
        'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
        'only_matching': True,
    }, {
        # data.video
        'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
        'only_matching': True,
    }, {
@ -212,6 +226,7 @@ class FacebookIE(InfoExtractor):
        'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
        'only_matching': True,
    }, {
        # data.video
        'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
        'info_dict': {
            'id': '359649331226507',
@ -222,7 +237,64 @@ class FacebookIE(InfoExtractor):
        'params': {
            'skip_download': True,
        },
    }, {
        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media
        'url': 'https://www.facebook.com/100033620354545/videos/106560053808006/',
        'info_dict': {
            'id': '106560053808006',
        },
        'playlist_count': 2,
    }, {
        # data.video.story.attachments[].media
        'url': 'https://www.facebook.com/watch/?v=647537299265662',
        'only_matching': True,
    }, {
        # data.node.comet_sections.content.story.attachments[].style_type_renderer.attachment.all_subattachments.nodes[].media
        'url': 'https://www.facebook.com/PankajShahLondon/posts/10157667649866271',
        'info_dict': {
            'id': '10157667649866271',
        },
        'playlist_count': 3,
    }, {
        # data.nodes[].comet_sections.content.story.attachments[].style_type_renderer.attachment.media
        'url': 'https://m.facebook.com/Alliance.Police.Department/posts/4048563708499330',
        'info_dict': {
            'id': '117576630041613',
            'ext': 'mp4',
            # TODO: title can be extracted from video page
            'title': 'Facebook video #117576630041613',
            'uploader_id': '189393014416438',
            'upload_date': '20201123',
            'timestamp': 1606162592,
        },
        'skip': 'Requires logging in',
    }, {
        # node.comet_sections.content.story.attached_story.attachments.style_type_renderer.attachment.media
        'url': 'https://www.facebook.com/groups/ateistiskselskab/permalink/10154930137678856/',
        'info_dict': {
            'id': '211567722618337',
            'ext': 'mp4',
            'title': 'Facebook video #211567722618337',
            'uploader_id': '127875227654254',
            'upload_date': '20161122',
            'timestamp': 1479793574,
        },
    }, {
        # data.video.creation_story.attachments[].media
        'url': 'https://www.facebook.com/watch/live/?v=1823658634322275',
        'only_matching': True,
    }, {
        'url': 'https://www.facebook.com/watchparty/211641140192478',
        'info_dict': {
            'id': '211641140192478',
        },
        'playlist_count': 1,
        'skip': 'Requires logging in',
    }]
    _SUPPORTED_PAGLETS_REGEX = r'(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_[0-9a-f]+)'
    _api_config = {
        'graphURI': '/api/graphql/'
    }
    @staticmethod
    def _extract_urls(webpage, **kwargs):
@ -305,23 +377,24 @@ class FacebookIE(InfoExtractor):
    def _real_initialize(self):
        self._login()
-    def _extract_from_url(self, url, video_id, fatal_if_no_video=True):
+    def _extract_from_url(self, url, video_id):
-        req = sanitized_Request(url)
+        webpage = self._download_webpage(
-        req.add_header('User-Agent', self._CHROME_USER_AGENT)
+            url.replace('://m.facebook.com/', '://www.facebook.com/'), video_id)
        webpage = self._download_webpage(req, video_id)
        video_data = None
        def extract_video_data(instances):
            video_data = []
            for item in instances:
-                if item[1][0] == 'VideoConfig':
+                if try_get(item, lambda x: x[1][0]) == 'VideoConfig':
                    video_item = item[2][0]
                    if video_item.get('video_id'):
-                        return video_item['videoData']
+                        video_data.append(video_item['videoData'])
            return video_data
        server_js_data = self._parse_json(self._search_regex(
-            r'handleServerJS\(({.+})(?:\);|,")', webpage,
+            [r'handleServerJS\(({.+})(?:\);|,")', r'\bs\.handle\(({.+?})\);'],
-            'server js data', default='{}'), video_id, fatal=False)
+            webpage, 'server js data', default='{}'), video_id, fatal=False)
        if server_js_data:
            video_data = extract_video_data(server_js_data.get('instances', []))
@ -331,17 +404,118 @@ class FacebookIE(InfoExtractor):
                return extract_video_data(try_get(
                    js_data, lambda x: x['jsmods']['instances'], list) or [])
        def extract_dash_manifest(video, formats):
            dash_manifest = video.get('dash_manifest')
            if dash_manifest:
                formats.extend(self._parse_mpd_formats(
                    compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
        def process_formats(formats):
            # Downloads with browser's User-Agent are rate limited. Working around
            # with non-browser User-Agent.
            for f in formats:
                f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
            self._sort_formats(formats)
        def extract_relay_data(_filter):
            return self._parse_json(self._search_regex(
                r'handleWithCustomApplyEach\([^,]+,\s*({.*?%s.*?})\);' % _filter,
                webpage, 'replay data', default='{}'), video_id, fatal=False) or {}
        def extract_relay_prefetched_data(_filter):
            replay_data = extract_relay_data(_filter)
            for require in (replay_data.get('require') or []):
                if require[0] == 'RelayPrefetchedStreamCache':
                    return try_get(require, lambda x: x[3][1]['__bbox']['result']['data'], dict) or {}
        if not video_data:
-            server_js_data = self._parse_json(
+            server_js_data = self._parse_json(self._search_regex([
-                self._search_regex(
+                r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+' + self._SUPPORTED_PAGLETS_REGEX,
-                    r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_\d+)',
+                r'bigPipe\.onPageletArrive\(({.*?id\s*:\s*"%s".*?})\);' % self._SUPPORTED_PAGLETS_REGEX
-                    webpage, 'js data', default='{}'),
+            ], webpage, 'js data', default='{}'), video_id, js_to_json, False)
                video_id, transform_source=js_to_json, fatal=False)
            video_data = extract_from_jsmods_instances(server_js_data)
        if not video_data:
-            if not fatal_if_no_video:
+            data = extract_relay_prefetched_data(
-                return webpage, False
+                r'"(?:dash_manifest|playable_url(?:_quality_hd)?)"\s*:\s*"[^"]+"')
            if data:
                entries = []
                def parse_graphql_video(video):
                    formats = []
                    q = qualities(['sd', 'hd'])
                    for (suffix, format_id) in [('', 'sd'), ('_quality_hd', 'hd')]:
                        playable_url = video.get('playable_url' + suffix)
                        if not playable_url:
                            continue
                        formats.append({
                            'format_id': format_id,
                            'quality': q(format_id),
                            'url': playable_url,
                        })
                    extract_dash_manifest(video, formats)
                    process_formats(formats)
                    v_id = video.get('videoId') or video.get('id') or video_id
                    info = {
                        'id': v_id,
                        'formats': formats,
                        'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']),
                        'uploader_id': try_get(video, lambda x: x['owner']['id']),
                        'timestamp': int_or_none(video.get('publish_time')),
                        'duration': float_or_none(video.get('playable_duration_in_ms'), 1000),
                    }
                    description = try_get(video, lambda x: x['savable_description']['text'])
                    title = video.get('name')
                    if title:
                        info.update({
                            'title': title,
                            'description': description,
                        })
                    else:
                        info['title'] = description or 'Facebook video #%s' % v_id
                    entries.append(info)
                def parse_attachment(attachment, key='media'):
                    media = attachment.get(key) or {}
                    if media.get('__typename') == 'Video':
                        return parse_graphql_video(media)
                nodes = data.get('nodes') or []
                node = data.get('node') or {}
                if not nodes and node:
                    nodes.append(node)
                for node in nodes:
                    story = try_get(node, lambda x: x['comet_sections']['content']['story'], dict) or {}
                    attachments = try_get(story, [
                        lambda x: x['attached_story']['attachments'],
                        lambda x: x['attachments']
                    ], list) or []
                    for attachment in attachments:
                        attachment = try_get(attachment, lambda x: x['style_type_renderer']['attachment'], dict)
                        ns = try_get(attachment, lambda x: x['all_subattachments']['nodes'], list) or []
                        for n in ns:
                            parse_attachment(n)
                        parse_attachment(attachment)
                edges = try_get(data, lambda x: x['mediaset']['currMedia']['edges'], list) or []
                for edge in edges:
                    parse_attachment(edge, key='node')
                video = data.get('video') or {}
                if video:
                    attachments = try_get(video, [
                        lambda x: x['story']['attachments'],
                        lambda x: x['creation_story']['attachments']
                    ], list) or []
                    for attachment in attachments:
                        parse_attachment(attachment)
                    if not entries:
                        parse_graphql_video(video)
                return self.playlist_result(entries, video_id)
        if not video_data:
            m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
            if m_msg is not None:
                raise ExtractorError(
@ -350,6 +524,43 @@ class FacebookIE(InfoExtractor):
            elif '>You must log in to continue' in webpage:
                self.raise_login_required()
        if not video_data and '/watchparty/' in url:
            post_data = {
                'doc_id': 3731964053542869,
                'variables': json.dumps({
                    'livingRoomID': video_id,
                }),
            }
            prefetched_data = extract_relay_prefetched_data(r'"login_data"\s*:\s*{')
            if prefetched_data:
                lsd = try_get(prefetched_data, lambda x: x['login_data']['lsd'], dict)
                if lsd:
                    post_data[lsd['name']] = lsd['value']
            relay_data = extract_relay_data(r'\[\s*"RelayAPIConfigDefaults"\s*,')
            for define in (relay_data.get('define') or []):
                if define[0] == 'RelayAPIConfigDefaults':
                    self._api_config = define[2]
            living_room = self._download_json(
                urljoin(url, self._api_config['graphURI']), video_id,
                data=urlencode_postdata(post_data))['data']['living_room']
            entries = []
            for edge in (try_get(living_room, lambda x: x['recap']['watched_content']['edges']) or []):
                video = try_get(edge, lambda x: x['node']['video']) or {}
                v_id = video.get('id')
                if not v_id:
                    continue
                v_id = compat_str(v_id)
                entries.append(self.url_result(
                    self._VIDEO_PAGE_TEMPLATE % v_id,
                    self.ie_key(), v_id, video.get('name')))
            return self.playlist_result(entries, video_id)
        if not video_data:
            # Video info not in first request, do a secondary request using
            # tahoe player specific URL
            tahoe_data = self._download_webpage(
@ -379,8 +590,19 @@ class FacebookIE(InfoExtractor):
        if not video_data:
            raise ExtractorError('Cannot parse data')
-        subtitles = {}
+        if len(video_data) > 1:
            entries = []
            for v in video_data:
                video_url = v[0].get('video_url')
                if not video_url:
                    continue
                entries.append(self.url_result(urljoin(
                    url, video_url), self.ie_key(), v[0].get('video_id')))
            return self.playlist_result(entries, video_id)
        video_data = video_data[0]
        formats = []
        subtitles = {}
        for f in video_data:
            format_id = f['stream_type']
            if f and isinstance(f, dict):
@ -399,22 +621,14 @@ class FacebookIE(InfoExtractor):
                            'url': src,
                            'preference': preference,
                        })
-            dash_manifest = f[0].get('dash_manifest')
+            extract_dash_manifest(f[0], formats)
            if dash_manifest:
                formats.extend(self._parse_mpd_formats(
                    compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
            subtitles_src = f[0].get('subtitles_src')
            if subtitles_src:
                subtitles.setdefault('en', []).append({'url': subtitles_src})
        if not formats:
            raise ExtractorError('Cannot find video formats')
-        # Downloads with browser's User-Agent are rate limited. Working around
+        process_formats(formats)
        # with non-browser User-Agent.
        for f in formats:
            f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
        self._sort_formats(formats)
        video_title = self._html_search_regex(
            r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage,
@ -454,35 +668,13 @@ class FacebookIE(InfoExtractor):
            'subtitles': subtitles,
        }
-        return webpage, info_dict
+        return info_dict
    def _real_extract(self, url):
        video_id = self._match_id(url)
        real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
-        webpage, info_dict = self._extract_from_url(real_url, video_id, fatal_if_no_video=False)
+        return self._extract_from_url(real_url, video_id)
        if info_dict:
            return info_dict
        if '/posts/' in url:
            video_id_json = self._search_regex(
                r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])', webpage, 'video ids', group='ids',
                default='')
            if video_id_json:
                entries = [
                    self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
                    for vid in self._parse_json(video_id_json, video_id)]
                return self.playlist_result(entries, video_id)
            # Single Video?
            video_id = self._search_regex(r'video_id:\s*"([0-9]+)"', webpage, 'single video id')
            return self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
        else:
            _, info_dict = self._extract_from_url(
                self._VIDEO_PAGE_TEMPLATE % video_id,
                video_id, fatal_if_no_video=True)
            return info_dict
 class FacebookPluginsVideoIE(InfoExtractor):
--- a/haruhi_dl/extractor/franceculture.py
+++ b/haruhi_dl/extractor/franceculture.py
@ -11,7 +11,7 @@ from ..utils import (
 class FranceCultureIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
        'info_dict': {
            'id': 'rendez-vous-au-pays-des-geeks',
@ -20,10 +20,14 @@ class FranceCultureIE(InfoExtractor):
            'title': 'Rendez-vous au pays des geeks',
            'thumbnail': r're:^https?://.*\.jpg$',
            'upload_date': '20140301',
-            'timestamp': 1393642916,
+            'timestamp': 1393700400,
            'vcodec': 'none',
        }
-    }
+    }, {
        # no thumbnail
        'url': 'https://www.franceculture.fr/emissions/la-recherche-montre-en-main/la-recherche-montre-en-main-du-mercredi-10-octobre-2018',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
@ -36,19 +40,19 @@ class FranceCultureIE(InfoExtractor):
                    </h1>|
                    <div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
                ).*?
-                (<button[^>]+data-asset-source="[^"]+"[^>]+>)
+                (<button[^>]+data-(?:url|asset-source)="[^"]+"[^>]+>)
            ''',
            webpage, 'video data'))
-        video_url = video_data['data-asset-source']
+        video_url = video_data.get('data-url') or video_data['data-asset-source']
-        title = video_data.get('data-asset-title') or self._og_search_title(webpage)
+        title = video_data.get('data-asset-title') or video_data.get('data-diffusion-title') or self._og_search_title(webpage)
        description = self._html_search_regex(
            r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
            webpage, 'description', default=None)
        thumbnail = self._search_regex(
            r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
-            webpage, 'thumbnail', fatal=False)
+            webpage, 'thumbnail', default=None)
        uploader = self._html_search_regex(
            r'(?s)<span class="author">(.*?)</span>',
            webpage, 'uploader', default=None)
@ -64,6 +68,6 @@ class FranceCultureIE(InfoExtractor):
            'ext': ext,
            'vcodec': 'none' if ext == 'mp3' else None,
            'uploader': uploader,
-            'timestamp': int_or_none(video_data.get('data-asset-created-date')),
+            'timestamp': int_or_none(video_data.get('data-start-time')) or int_or_none(video_data.get('data-asset-created-date')),
            'duration': int_or_none(video_data.get('data-duration')),
        }
--- a/haruhi_dl/extractor/franceinter.py
+++ b/haruhi_dl/extractor/franceinter.py
@ -16,6 +16,7 @@ class FranceInterIE(InfoExtractor):
            'ext': 'mp3',
            'title': 'Affaire Cahuzac : le contentieux du compte en Suisse',
            'description': 'md5:401969c5d318c061f86bda1fa359292b',
            'thumbnail': r're:^https?://.*\.jpg',
            'upload_date': '20160907',
        },
    }
@ -31,6 +32,7 @@ class FranceInterIE(InfoExtractor):
        title = self._og_search_title(webpage)
        description = self._og_search_description(webpage)
        thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
        upload_date_str = self._search_regex(
            r'class=["\']\s*cover-emission-period\s*["\'][^>]*>[^<]+\s+(\d{1,2}\s+[^\s]+\s+\d{4})<',
@ -48,6 +50,7 @@ class FranceInterIE(InfoExtractor):
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'upload_date': upload_date,
            'formats': [{
                'url': video_url,
--- a/haruhi_dl/extractor/francetv.py
+++ b/haruhi_dl/extractor/francetv.py
@ -17,6 +17,7 @@ from ..utils import (
    parse_duration,
    try_get,
    url_or_none,
    urljoin,
 )
 from .dailymotion import DailymotionIE
@ -128,18 +129,38 @@ class FranceTVIE(InfoExtractor):
        is_live = None
-        formats = []
+        videos = []
-        for video in info['videos']:
+
-            if video['statut'] != 'ONLINE':
+        for video in (info.get('videos') or []):
            if video.get('statut') != 'ONLINE':
                continue
-            video_url = video['url']
+            if not video.get('url'):
                continue
            videos.append(video)
        if not videos:
            for device_type in ['desktop', 'mobile']:
                fallback_info = self._download_json(
                    'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id,
                    video_id, 'Downloading fallback %s video JSON' % device_type, query={
                        'device_type': device_type,
                        'browser': 'chrome',
                    }, fatal=False)
                if fallback_info and fallback_info.get('video'):
                    videos.append(fallback_info['video'])
        formats = []
        for video in videos:
            video_url = video.get('url')
            if not video_url:
                continue
            if is_live is None:
                is_live = (try_get(
-                    video, lambda x: x['plages_ouverture'][0]['direct'],
+                    video, lambda x: x['plages_ouverture'][0]['direct'], bool) is True
-                    bool) is True) or '/live.francetv.fr/' in video_url
+                    or video.get('is_live') is True
-            format_id = video['format']
+                    or '/live.francetv.fr/' in video_url)
            format_id = video.get('format')
            ext = determine_ext(video_url)
            if ext == 'f4m':
                if georestricted:
@ -154,6 +175,9 @@ class FranceTVIE(InfoExtractor):
                    sign(video_url, format_id), video_id, 'mp4',
                    entry_protocol='m3u8_native', m3u8_id=format_id,
                    fatal=False))
            elif ext == 'mpd':
                formats.extend(self._extract_mpd_formats(
                    sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False))
            elif video_url.startswith('rtmp'):
                formats.append({
                    'url': video_url,
@ -166,6 +190,7 @@ class FranceTVIE(InfoExtractor):
                        'url': video_url,
                        'format_id': format_id,
                    })
        self._sort_formats(formats)
        title = info['titre']
@ -185,10 +210,10 @@ class FranceTVIE(InfoExtractor):
        return {
            'id': video_id,
            'title': self._live_title(title) if is_live else title,
-            'description': clean_html(info['synopsis']),
+            'description': clean_html(info.get('synopsis')),
-            'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
+            'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')),
-            'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
+            'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')),
-            'timestamp': int_or_none(info['diffusion']['timestamp']),
+            'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])),
            'is_live': is_live,
            'formats': formats,
            'subtitles': subtitles,
--- a/haruhi_dl/extractor/fujitv.py
+++ b/haruhi_dl/extractor/fujitv.py
@ -0,0 +1,35 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 class FujiTVFODPlus7IE(InfoExtractor):
    _VALID_URL = r'https?://i\.fod\.fujitv\.co\.jp/plus7/web/[0-9a-z]{4}/(?P<id>[0-9a-z]+)'
    _BASE_URL = 'http://i.fod.fujitv.co.jp/'
    _BITRATE_MAP = {
        300: (320, 180),
        800: (640, 360),
        1200: (1280, 720),
        2000: (1280, 720),
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        formats = self._extract_m3u8_formats(
            self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id)
        for f in formats:
            wh = self._BITRATE_MAP.get(f.get('tbr'))
            if wh:
                f.update({
                    'width': wh[0],
                    'height': wh[1],
                })
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': video_id,
            'formats': formats,
            'thumbnail': self._BASE_URL + 'pc/image/wbtn/wbtn_%s.jpg' % video_id,
        }
--- a/haruhi_dl/extractor/fxnetworks.py
+++ b/haruhi_dl/extractor/fxnetworks.py
@ -1,77 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .adobepass import AdobePassIE
 from ..utils import (
    extract_attributes,
    int_or_none,
    parse_age_limit,
    smuggle_url,
    update_url_query,
 )
 class FXNetworksIE(AdobePassIE):
    _VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://www.fxnetworks.com/video/1032565827847',
        'md5': '8d99b97b4aa7a202f55b6ed47ea7e703',
        'info_dict': {
            'id': 'dRzwHC_MMqIv',
            'ext': 'mp4',
            'title': 'First Look: Better Things - Season 2',
            'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.',
            'age_limit': 14,
            'uploader': 'NEWA-FNG-FX',
            'upload_date': '20170825',
            'timestamp': 1503686274,
            'episode_number': 0,
            'season_number': 2,
            'series': 'Better Things',
        },
        'add_ie': ['ThePlatform'],
    }, {
        'url': 'http://www.simpsonsworld.com/video/716094019682',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        if 'The content you are trying to access is not available in your region.' in webpage:
            self.raise_geo_restricted()
        video_data = extract_attributes(self._search_regex(
            r'(<a.+?rel="https?://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data'))
        player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None)
        release_url = video_data['rel']
        title = video_data['data-title']
        rating = video_data.get('data-rating')
        query = {
            'mbr': 'true',
        }
        if player_type == 'movies':
            query.update({
                'manifest': 'm3u',
            })
        else:
            query.update({
                'switch': 'http',
            })
        if video_data.get('data-req-auth') == '1':
            resource = self._get_mvpd_resource(
                video_data['data-channel'], title,
                video_data.get('data-guid'), rating)
            query['auth'] = self._extract_mvpd_auth(url, video_id, 'fx', resource)
        return {
            '_type': 'url_transparent',
            'id': video_id,
            'title': title,
            'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}),
            'series': video_data.get('data-show-title'),
            'episode_number': int_or_none(video_data.get('data-episode')),
            'season_number': int_or_none(video_data.get('data-season')),
            'thumbnail': video_data.get('data-large-thumb'),
            'age_limit': parse_age_limit(rating),
            'ie_key': 'ThePlatform',
        }
--- a/haruhi_dl/extractor/gamespot.py
+++ b/haruhi_dl/extractor/gamespot.py
@ -1,16 +1,7 @@
 from __future__ import unicode_literals
 import re
 from .once import OnceIE
-from ..compat import (
+from ..compat import compat_urllib_parse_unquote
    compat_urllib_parse_unquote,
 )
 from ..utils import (
    unescapeHTML,
    url_basename,
    dict_get,
 )
 class GameSpotIE(OnceIE):
@ -24,17 +15,16 @@ class GameSpotIE(OnceIE):
            'title': 'Arma 3 - Community Guide: SITREP I',
            'description': 'Check out this video where some of the basics of Arma 3 is explained.',
        },
        'skip': 'manifest URL give HTTP Error 404: Not Found',
    }, {
        'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/',
        'md5': '173ea87ad762cf5d3bf6163dceb255a6',
        'info_dict': {
            'id': 'gs-2300-6424837',
            'ext': 'mp4',
            'title': 'Now Playing - The Witcher 3: Wild Hunt',
            'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.',
        },
        'params': {
            'skip_download': True,  # m3u8 downloads
        },
    }, {
        'url': 'https://www.gamespot.com/videos/embed/6439218/',
        'only_matching': True,
@ -49,90 +39,40 @@ class GameSpotIE(OnceIE):
    def _real_extract(self, url):
        page_id = self._match_id(url)
        webpage = self._download_webpage(url, page_id)
-        data_video_json = self._search_regex(
+        data_video = self._parse_json(self._html_search_regex(
-            r'data-video=["\'](.*?)["\']', webpage, 'data video')
+            r'data-video=(["\'])({.*?})\1', webpage,
-        data_video = self._parse_json(unescapeHTML(data_video_json), page_id)
+            'video data', group=2), page_id)
        title = compat_urllib_parse_unquote(data_video['title'])
        streams = data_video['videoStreams']
        manifest_url = None
        formats = []
-        f4m_url = streams.get('f4m_stream')
+
-        if f4m_url:
+        m3u8_url = streams.get('adaptive_stream')
            manifest_url = f4m_url
            formats.extend(self._extract_f4m_formats(
                f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False))
        m3u8_url = dict_get(streams, ('m3u8_stream', 'adaptive_stream'))
        if m3u8_url:
            manifest_url = m3u8_url
            m3u8_formats = self._extract_m3u8_formats(
                m3u8_url, page_id, 'mp4', 'm3u8_native',
                m3u8_id='hls', fatal=False)
-            formats.extend(m3u8_formats)
+            for f in m3u8_formats:
        progressive_url = dict_get(
            streams, ('progressive_hd', 'progressive_high', 'progressive_low', 'other_lr'))
        if progressive_url and manifest_url:
            qualities_basename = self._search_regex(
                r'/([^/]+)\.csmil/',
                manifest_url, 'qualities basename', default=None)
            if qualities_basename:
                QUALITIES_RE = r'((,\d+)+,?)'
                qualities = self._search_regex(
                    QUALITIES_RE, qualities_basename,
                    'qualities', default=None)
                if qualities:
                    qualities = list(map(lambda q: int(q), qualities.strip(',').split(',')))
                    qualities.sort()
                    http_template = re.sub(QUALITIES_RE, r'%d', qualities_basename)
                    http_url_basename = url_basename(progressive_url)
                    if m3u8_formats:
                        self._sort_formats(m3u8_formats)
                        m3u8_formats = list(filter(
                            lambda f: f.get('vcodec') != 'none', m3u8_formats))
                    if len(qualities) == len(m3u8_formats):
                        for q, m3u8_format in zip(qualities, m3u8_formats):
                            f = m3u8_format.copy()
                            f.update({
                                'url': progressive_url.replace(
                                    http_url_basename, http_template % q),
                                'format_id': f['format_id'].replace('hls', 'http'),
                                'protocol': 'http',
                            })
                formats.append(f)
-                    else:
+                http_f = f.copy()
-                        for q in qualities:
+                del http_f['manifest_url']
-                            formats.append({
+                http_f.update({
-                                'url': progressive_url.replace(
+                    'format_id': f['format_id'].replace('hls-', 'http-'),
-                                    http_url_basename, http_template % q),
+                    'protocol': 'http',
-                                'ext': 'mp4',
+                    'url': f['url'].replace('.m3u8', '.mp4'),
                                'format_id': 'http-%d' % q,
                                'tbr': q,
                })
                formats.append(http_f)
-        onceux_json = self._search_regex(
+        mpd_url = streams.get('adaptive_dash')
-            r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None)
+        if mpd_url:
-        if onceux_json:
+            formats.extend(self._extract_mpd_formats(
-            onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri')
+                mpd_url, page_id, mpd_id='dash', fatal=False))
            if onceux_url:
                formats.extend(self._extract_once_formats(re.sub(
                    r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url),
                    http_formats_preference=-1))
        if not formats:
            for quality in ['sd', 'hd']:
                # It's actually a link to a flv file
                flv_url = streams.get('f4m_{0}'.format(quality))
                if flv_url is not None:
                    formats.append({
                        'url': flv_url,
                        'ext': 'flv',
                        'format_id': quality,
                    })
        self._sort_formats(formats)
        return {
-            'id': data_video['guid'],
+            'id': data_video.get('guid') or page_id,
            'display_id': page_id,
-            'title': compat_urllib_parse_unquote(data_video['title']),
+            'title': title,
            'formats': formats,
            'description': self._html_search_meta('description', webpage),
            'thumbnail': self._og_search_thumbnail(webpage),
--- a/haruhi_dl/extractor/gedidigital.py
+++ b/haruhi_dl/extractor/gedidigital.py
@ -0,0 +1,161 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    determine_ext,
    int_or_none,
 )
 class GediDigitalIE(InfoExtractor):
    _VALID_URL = r'''(?x)https?://video\.
        (?:
            (?:
                (?:espresso\.)?repubblica
                |lastampa
                |ilsecoloxix
            )|
            (?:
                iltirreno
                |messaggeroveneto
                |ilpiccolo
                |gazzettadimantova
                |mattinopadova
                |laprovinciapavese
                |tribunatreviso
                |nuovavenezia
                |gazzettadimodena
                |lanuovaferrara
                |corrierealpi
                |lasentinella
            )\.gelocal
        )\.it(?:/[^/]+){2,3}?/(?P<id>\d+)(?:[/?&#]|$)'''
    _TESTS = [{
        'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683',
        'md5': '84658d7fb9e55a6e57ecc77b73137494',
        'info_dict': {
            'id': '121559',
            'ext': 'mp4',
            'title': 'Il paradosso delle Regionali: ecco perché la Lega vince ma sembra aver perso',
            'description': 'md5:de7f4d6eaaaf36c153b599b10f8ce7ca',
            'thumbnail': r're:^https://www\.repstatic\.it/video/photo/.+?-thumb-full-.+?\.jpg$',
            'duration': 125,
        },
    }, {
        'url': 'https://video.espresso.repubblica.it/embed/tutti-i-video/01-ted-villa/14772/14870&width=640&height=360',
        'only_matching': True,
    }, {
        'url': 'https://video.repubblica.it/motori/record-della-pista-a-spa-francorchamps-la-pagani-huayra-roadster-bc-stupisce/367415/367963',
        'only_matching': True,
    }, {
        'url': 'https://video.ilsecoloxix.it/sport/cassani-e-i-brividi-azzurri-ai-mondiali-di-imola-qui-mi-sono-innamorato-del-ciclismo-da-ragazzino-incredibile-tornarci-da-ct/66184/66267',
        'only_matching': True,
    }, {
        'url': 'https://video.iltirreno.gelocal.it/sport/dentro-la-notizia-ferrari-cosa-succede-a-maranello/141059/142723',
        'only_matching': True,
    }, {
        'url': 'https://video.messaggeroveneto.gelocal.it/locale/maria-giovanna-elmi-covid-vaccino/138155/139268',
        'only_matching': True,
    }, {
        'url': 'https://video.ilpiccolo.gelocal.it/dossier/big-john/dinosauro-big-john-al-via-le-visite-guidate-a-trieste/135226/135751',
        'only_matching': True,
    }, {
        'url': 'https://video.gazzettadimantova.gelocal.it/locale/dal-ponte-visconteo-di-valeggio-l-and-8217sos-dei-ristoratori-aprire-anche-a-cena/137310/137818',
        'only_matching': True,
    }, {
        'url': 'https://video.mattinopadova.gelocal.it/dossier/coronavirus-in-veneto/covid-a-vo-un-anno-dopo-un-cuore-tricolore-per-non-dimenticare/138402/138964',
        'only_matching': True,
    }, {
        'url': 'https://video.laprovinciapavese.gelocal.it/locale/mede-zona-rossa-via-alle-vaccinazioni-per-gli-over-80/137545/138120',
        'only_matching': True,
    }, {
        'url': 'https://video.tribunatreviso.gelocal.it/dossier/coronavirus-in-veneto/ecco-le-prima-vaccinazioni-di-massa-nella-marca/134485/135024',
        'only_matching': True,
    }, {
        'url': 'https://video.nuovavenezia.gelocal.it/locale/camion-troppo-alto-per-il-ponte-ferroviario-perde-il-carico/135734/136266',
        'only_matching': True,
    }, {
        'url': 'https://video.gazzettadimodena.gelocal.it/locale/modena-scoperta-la-proteina-che-predice-il-livello-di-gravita-del-covid/139109/139796',
        'only_matching': True,
    }, {
        'url': 'https://video.lanuovaferrara.gelocal.it/locale/due-bombole-di-gpl-aperte-e-abbandonate-i-vigili-bruciano-il-gas/134391/134957',
        'only_matching': True,
    }, {
        'url': 'https://video.corrierealpi.gelocal.it/dossier/cortina-2021-i-mondiali-di-sci-alpino/mondiali-di-sci-il-timelapse-sulla-splendida-olympia/133760/134331',
        'only_matching': True,
    }, {
        'url': 'https://video.lasentinella.gelocal.it/locale/vestigne-centra-un-auto-e-si-ribalta/138931/139466',
        'only_matching': True,
    }, {
        'url': 'https://video.espresso.repubblica.it/tutti-i-video/01-ted-villa/14772',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        title = self._html_search_meta(
            ['twitter:title', 'og:title'], webpage, fatal=True)
        player_data = re.findall(
            r"PlayerFactory\.setParam\('(?P<type>format|param)',\s*'(?P<name>[^']+)',\s*'(?P<val>[^']+)'\);",
            webpage)
        formats = []
        duration = thumb = None
        for t, n, v in player_data:
            if t == 'format':
                if n in ('video-hds-vod-ec', 'video-hls-vod-ec', 'video-viralize', 'video-youtube-pfp'):
                    continue
                elif n.endswith('-vod-ak'):
                    formats.extend(self._extract_akamai_formats(
                        v, video_id, {'http': 'media.gedidigital.it'}))
                else:
                    ext = determine_ext(v)
                    if ext == 'm3u8':
                        formats.extend(self._extract_m3u8_formats(
                            v, video_id, 'mp4', 'm3u8_native', m3u8_id=n, fatal=False))
                        continue
                    f = {
                        'format_id': n,
                        'url': v,
                    }
                    if ext == 'mp3':
                        abr = int_or_none(self._search_regex(
                            r'-mp3-audio-(\d+)', v, 'abr', default=None))
                        f.update({
                            'abr': abr,
                            'tbr': abr,
                            'vcodec': 'none'
                        })
                    else:
                        mobj = re.match(r'^video-rrtv-(\d+)(?:-(\d+))?$', n)
                        if mobj:
                            f.update({
                                'height': int(mobj.group(1)),
                                'vbr': int_or_none(mobj.group(2)),
                            })
                        if not f.get('vbr'):
                            f['vbr'] = int_or_none(self._search_regex(
                                r'-video-rrtv-(\d+)', v, 'abr', default=None))
                    formats.append(f)
            elif t == 'param':
                if n in ['image_full', 'image']:
                    thumb = v
                elif n == 'videoDuration':
                    duration = int_or_none(v)
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': title,
            'description': self._html_search_meta(
                ['twitter:description', 'og:description', 'description'], webpage),
            'thumbnail': thumb or self._og_search_thumbnail(webpage),
            'formats': formats,
            'duration': duration,
        }
--- a/haruhi_dl/extractor/generic.py
+++ b/haruhi_dl/extractor/generic.py
@ -20,19 +20,24 @@ from ..utils import (
    ExtractorError,
    float_or_none,
    HEADRequest,
    int_or_none,
    is_html,
    js_to_json,
    KNOWN_EXTENSIONS,
    merge_dicts,
    mimetype2ext,
    orderedSet,
    parse_duration,
    sanitized_Request,
    smuggle_url,
    unescapeHTML,
-    unified_strdate,
+    unified_timestamp,
    unsmuggle_url,
    UnsupportedError,
    url_or_none,
    xpath_attr,
    xpath_text,
    xpath_with_ns,
 )
 from .commonprotocols import RtmpIE
 from .brightcove import (
@ -48,7 +53,6 @@ from .ooyala import OoyalaIE
 from .rutv import RUTVIE
 from .tvc import TVCIE
 from .sportbox import SportBoxIE
 from .smotri import SmotriIE
 from .myvi import MyviIE
 from .condenast import CondeNastIE
 from .udn import UDNEmbedIE
@ -63,7 +67,10 @@ from .tube8 import Tube8IE
 from .mofosex import MofosexEmbedIE
 from .spankwire import SpankwireIE
 from .youporn import YouPornIE
-from .vimeo import VimeoIE
+from .vimeo import (
    VimeoIE,
    VHXEmbedIE,
 )
 from .dailymotion import DailymotionIE
 from .dailymail import DailyMailIE
 from .onionstudios import OnionStudiosIE
@ -91,6 +98,7 @@ from .piksel import PikselIE
 from .videa import VideaIE
 from .twentymin import TwentyMinutenIE
 from .ustream import UstreamIE
 from .arte import ArteTVEmbedIE
 from .videopress import VideoPressIE
 from .rutube import RutubeIE
 from .limelight import LimelightBaseIE
@ -125,6 +133,9 @@ from .rtlnl import RtlNlIE
 from .xnews import XLinkIE
 from .libsyn import LibsynIE
 from .pulsembed import PulsEmbedIE
 from .arcpublishing import ArcPublishingIE
 from .medialaan import MedialaanIE
 from .simplecast import SimplecastIE
 class GenericIE(InfoExtractor):
@ -203,11 +214,48 @@ class GenericIE(InfoExtractor):
        {
            'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
            'info_dict': {
-                'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+                'id': 'http://podcastfeeds.nbcnews.com/nbcnews/video/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
-                'ext': 'm4v',
+                'title': 'MSNBC Rachel Maddow (video)',
-                'upload_date': '20150228',
+                'description': 're:.*her unique approach to storytelling.*',
-                'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+            },
-            }
+            'playlist': [{
                'info_dict': {
                    'ext': 'mov',
                    'id': 'pdv_maddow_netcast_mov-12-04-2020-224335',
                    'title': 're:MSNBC Rachel Maddow',
                    'description': 're:.*her unique approach to storytelling.*',
                    'timestamp': int,
                    'upload_date': compat_str,
                    'duration': float,
                },
            }],
        },
        # RSS feed with item with description and thumbnails
        {
            'url': 'https://anchor.fm/s/dd00e14/podcast/rss',
            'info_dict': {
                'id': 'https://anchor.fm/s/dd00e14/podcast/rss',
                'title': 're:.*100% Hydrogen.*',
                'description': 're:.*In this episode.*',
            },
            'playlist': [{
                'info_dict': {
                    'ext': 'm4a',
                    'id': 'c1c879525ce2cb640b344507e682c36d',
                    'title': 're:Hydrogen!',
                    'description': 're:.*In this episode we are going.*',
                    'timestamp': 1567977776,
                    'upload_date': '20190908',
                    'duration': 459,
                    'thumbnail': r're:^https?://.*\.jpg$',
                    'episode_number': 1,
                    'season_number': 1,
                    'age_limit': 0,
                },
            }],
            'params': {
                'skip_download': True,
            },
        },
        # RSS feed with enclosures and unsupported link URLs
        {
@ -2111,23 +2159,23 @@ class GenericIE(InfoExtractor):
                'skip_download': True,
            },
        },
-        {
+        # {
-            # Zype embed
+        #     # Zype embed
-            'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
+        #     'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
-            'info_dict': {
+        #     'info_dict': {
-                'id': '5b400b834b32992a310622b9',
+        #         'id': '5b400b834b32992a310622b9',
-                'ext': 'mp4',
+        #         'ext': 'mp4',
-                'title': 'Smoky Barbecue Favorites',
+        #         'title': 'Smoky Barbecue Favorites',
-                'thumbnail': r're:^https?://.*\.jpe?g',
+        #         'thumbnail': r're:^https?://.*\.jpe?g',
-                'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
+        #         'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
-                'upload_date': '20170909',
+        #         'upload_date': '20170909',
-                'timestamp': 1504915200,
+        #         'timestamp': 1504915200,
-            },
+        #     },
-            'add_ie': [ZypeIE.ie_key()],
+        #     'add_ie': [ZypeIE.ie_key()],
-            'params': {
+        #     'params': {
-                'skip_download': True,
+        #         'skip_download': True,
-            },
+        #     },
-        },
+        # },
        {
            # videojs embed
            'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
@ -2205,6 +2253,54 @@ class GenericIE(InfoExtractor):
        #         'force_generic_extractor': True,
        #     },
        # }
        {
            # VHX Embed
            'url': 'https://demo.vhx.tv/category-c/videos/file-example-mp4-480-1-5mg-copy',
            'info_dict': {
                'id': '858208',
                'ext': 'mp4',
                'title': 'Untitled',
                'uploader_id': 'user80538407',
                'uploader': 'OTT Videos',
            },
        },
        {
            # ArcPublishing PoWa video player
            'url': 'https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/',
            'md5': 'b03b2fac8680e1e5a7cc81a5c27e71b3',
            'info_dict': {
                'id': '8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
                'ext': 'mp4',
                'title': 'Senate candidates wave to voters on Anchorage streets',
                'description': 'md5:91f51a6511f090617353dc720318b20e',
                'timestamp': 1604378735,
                'upload_date': '20201103',
                'duration': 1581,
            },
        },
        {
            # MyChannels SDK embed
            # https://www.24kitchen.nl/populair/deskundige-dit-waarom-sommigen-gevoelig-zijn-voor-voedselallergieen
            'url': 'https://www.demorgen.be/nieuws/burgemeester-rotterdam-richt-zich-in-videoboodschap-tot-relschoppers-voelt-het-goed~b0bcfd741/',
            'md5': '90c0699c37006ef18e198c032d81739c',
            'info_dict': {
                'id': '194165',
                'ext': 'mp4',
                'title': 'Burgemeester Aboutaleb spreekt relschoppers toe',
                'timestamp': 1611740340,
                'upload_date': '20210127',
                'duration': 159,
            },
        },
        {
            # Simplecast player embed
            'url': 'https://www.bio.org/podcast',
            'info_dict': {
                'id': 'podcast',
                'title': 'I AM BIO Podcast | BIO',
            },
            'playlist_mincount': 52,
        },
    ]
    def report_following_redirect(self, new_url):
@ -2216,6 +2312,10 @@ class GenericIE(InfoExtractor):
        playlist_desc_el = doc.find('./channel/description')
        playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
        NS_MAP = {
            'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
        }
        entries = []
        for it in doc.findall('./channel/item'):
            next_url = None
@ -2231,10 +2331,33 @@ class GenericIE(InfoExtractor):
            if not next_url:
                continue
            def itunes(key):
                return xpath_text(
                    it, xpath_with_ns('./itunes:%s' % key, NS_MAP),
                    default=None)
            duration = itunes('duration')
            explicit = (itunes('explicit') or '').lower()
            if explicit in ('true', 'yes'):
                age_limit = 18
            elif explicit in ('false', 'no'):
                age_limit = 0
            else:
                age_limit = None
            entries.append({
                '_type': 'url_transparent',
                'url': next_url,
                'title': it.find('title').text,
                'description': xpath_text(it, 'description', default=None),
                'timestamp': unified_timestamp(
                    xpath_text(it, 'pubDate', default=None)),
                'duration': int_or_none(duration) or parse_duration(duration),
                'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),
                'episode': itunes('title'),
                'episode_number': int_or_none(itunes('episode')),
                'season_number': int_or_none(itunes('season')),
                'age_limit': age_limit,
            })
        return {
@ -2354,7 +2477,7 @@ class GenericIE(InfoExtractor):
        info_dict = {
            'id': video_id,
            'title': self._generic_title(url),
-            'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
+            'timestamp': unified_timestamp(head_response.headers.get('Last-Modified'))
        }
        # Check for direct link to a video
@ -2417,6 +2540,9 @@ class GenericIE(InfoExtractor):
        webpage = self._webpage_read_content(
            full_response, url, video_id, prefix=first_bytes)
        if '<title>DPG Media Privacy Gate</title>' in webpage:
            webpage = self._download_webpage(url, video_id)
        self.report_extraction(video_id)
        # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
@ -2592,6 +2718,10 @@ class GenericIE(InfoExtractor):
            SVTIE,
            XLinkIE,
            LibsynIE,
            VHXEmbedIE,
            ArcPublishingIE,
            MedialaanIE,
            SimplecastIE,
        ):
            try:
                ie_key = embie.ie_key()
@ -2751,11 +2881,9 @@ class GenericIE(InfoExtractor):
            return self.url_result(ustream_url, UstreamIE.ie_key())
        # Look for embedded arte.tv player
-        mobj = re.search(
+        arte_urls = ArteTVEmbedIE._extract_urls(webpage)
-            r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
+        if arte_urls:
-            webpage)
+            return self.playlist_from_matches(arte_urls, video_id, video_title)
        if mobj is not None:
            return self.url_result(mobj.group('url'), 'ArteTVEmbed')
        # Look for embedded francetv player
        mobj = re.search(
@ -2764,11 +2892,6 @@ class GenericIE(InfoExtractor):
        if mobj is not None:
            return self.url_result(mobj.group('url'))
        # Look for embedded smotri.com player
        smotri_url = SmotriIE._extract_url(webpage)
        if smotri_url:
            return self.url_result(smotri_url, 'Smotri')
        # Look for embedded Myvi.ru player
        myvi_url = MyviIE._extract_url(webpage)
        if myvi_url:
--- a/haruhi_dl/extractor/go.py
+++ b/haruhi_dl/extractor/go.py
@ -38,13 +38,17 @@ class GoIE(AdobePassIE):
        'disneynow': {
            'brand': '011',
            'resource_id': 'Disney',
-        }
+        },
        'fxnow.fxnetworks': {
            'brand': '025',
            'requestor_id': 'dtci',
        },
    }
    _VALID_URL = r'''(?x)
                    https?://
                        (?:
                            (?:(?P<sub_domain>%s)\.)?go|
-                            (?P<sub_domain_2>abc|freeform|disneynow)
+                            (?P<sub_domain_2>abc|freeform|disneynow|fxnow\.fxnetworks)
                        )\.com/
                        (?:
                            (?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
@ -99,6 +103,19 @@ class GoIE(AdobePassIE):
            # m3u8 download
            'skip_download': True,
        },
    }, {
        'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841',
        'info_dict': {
            'id': 'VDKA12782841',
            'ext': 'mp4',
            'title': 'First Look: Better Things - Season 2',
            'description': 'md5:fa73584a95761c605d9d54904e35b407',
        },
        'params': {
            'geo_bypass_ip_block': '3.244.239.0/24',
            # m3u8 download
            'skip_download': True,
        },
    }, {
        'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
        'only_matching': True,
--- a/haruhi_dl/extractor/googleplus.py
+++ b/haruhi_dl/extractor/googleplus.py
@ -1,73 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 import codecs
 from .common import InfoExtractor
 from ..utils import unified_strdate
 class GooglePlusIE(InfoExtractor):
    IE_DESC = 'Google Plus'
    _VALID_URL = r'https?://plus\.google\.com/(?:[^/]+/)*?posts/(?P<id>\w+)'
    IE_NAME = 'plus.google'
    _TEST = {
        'url': 'https://plus.google.com/u/0/108897254135232129896/posts/ZButuJc6CtH',
        'info_dict': {
            'id': 'ZButuJc6CtH',
            'ext': 'flv',
            'title': '嘆きの天使 降臨',
            'upload_date': '20120613',
            'uploader': '井上ヨシマサ',
        }
    }
    def _real_extract(self, url):
        video_id = self._match_id(url)
        # Step 1, Retrieve post webpage to extract further information
        webpage = self._download_webpage(url, video_id, 'Downloading entry webpage')
        title = self._og_search_description(webpage).splitlines()[0]
        upload_date = unified_strdate(self._html_search_regex(
            r'''(?x)<a.+?class="o-U-s\s[^"]+"\s+style="display:\s*none"\s*>
                    ([0-9]{4}-[0-9]{2}-[0-9]{2})</a>''',
            webpage, 'upload date', fatal=False, flags=re.VERBOSE))
        uploader = self._html_search_regex(
            r'rel="author".*?>(.*?)</a>', webpage, 'uploader', fatal=False)
        # Step 2, Simulate clicking the image box to launch video
        DOMAIN = 'https://plus.google.com/'
        video_page = self._search_regex(
            r'<a href="((?:%s)?photos/.*?)"' % re.escape(DOMAIN),
            webpage, 'video page URL')
        if not video_page.startswith(DOMAIN):
            video_page = DOMAIN + video_page
        webpage = self._download_webpage(video_page, video_id, 'Downloading video page')
        def unicode_escape(s):
            decoder = codecs.getdecoder('unicode_escape')
            return re.sub(
                r'\\u[0-9a-fA-F]{4,}',
                lambda m: decoder(m.group(0))[0],
                s)
        # Extract video links all sizes
        formats = [{
            'url': unicode_escape(video_url),
            'ext': 'flv',
            'width': int(width),
            'height': int(height),
        } for width, height, video_url in re.findall(
            r'\d+,(\d+),(\d+),"(https?://[^.]+\.googleusercontent\.com.*?)"', webpage)]
        self._sort_formats(formats)
        return {
            'id': video_id,
            'title': title,
            'uploader': uploader,
            'upload_date': upload_date,
            'formats': formats,
        }
--- a/haruhi_dl/extractor/googlepodcasts.py
+++ b/haruhi_dl/extractor/googlepodcasts.py
@ -0,0 +1,88 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import re
 from .common import InfoExtractor
 from ..utils import (
    clean_podcast_url,
    int_or_none,
    try_get,
    urlencode_postdata,
 )
 class GooglePodcastsBaseIE(InfoExtractor):
    _VALID_URL_BASE = r'https?://podcasts\.google\.com/feed/'
    def _batch_execute(self, func_id, video_id, params):
        return json.loads(self._download_json(
            'https://podcasts.google.com/_/PodcastsUi/data/batchexecute',
            video_id, data=urlencode_postdata({
                'f.req': json.dumps([[[func_id, json.dumps(params), None, '1']]]),
            }), transform_source=lambda x: self._search_regex(r'(?s)(\[.+\])', x, 'data'))[0][2])
    def _extract_episode(self, episode):
        return {
            'id': episode[4][3],
            'title': episode[8],
            'url': clean_podcast_url(episode[13]),
            'thumbnail': episode[2],
            'description': episode[9],
            'creator': try_get(episode, lambda x: x[14]),
            'timestamp': int_or_none(episode[11]),
            'duration': int_or_none(episode[12]),
            'series': episode[1],
        }
 class GooglePodcastsIE(GooglePodcastsBaseIE):
    IE_NAME = 'google:podcasts'
    _VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<feed_url>[^/]+)/episode/(?P<id>[^/?&#]+)'
    _TEST = {
        'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA/episode/MzBlNWRlN2UtOWE4Yy00ODcwLTk2M2MtM2JlMmUyNmViOTRh',
        'md5': 'fa56b2ee8bd0703e27e42d4b104c4766',
        'info_dict': {
            'id': '30e5de7e-9a8c-4870-963c-3be2e26eb94a',
            'ext': 'mp3',
            'title': 'WWDTM New Year 2021',
            'description': 'We say goodbye to 2020 with Christine Baranksi, Doug Jones, Jonna Mendez, and Kellee Edwards.',
            'upload_date': '20210102',
            'timestamp': 1609606800,
            'duration': 2901,
            'series': "Wait Wait... Don't Tell Me!",
        }
    }
    def _real_extract(self, url):
        b64_feed_url, b64_guid = re.match(self._VALID_URL, url).groups()
        episode = self._batch_execute(
            'oNjqVe', b64_guid, [b64_feed_url, b64_guid])[1]
        return self._extract_episode(episode)
 class GooglePodcastsFeedIE(GooglePodcastsBaseIE):
    IE_NAME = 'google:podcasts:feed'
    _VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<id>[^/?&#]+)/?(?:[?#&]|$)'
    _TEST = {
        'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA',
        'info_dict': {
            'title': "Wait Wait... Don't Tell Me!",
            'description': "NPR's weekly current events quiz. Have a laugh and test your news knowledge while figuring out what's real and what we've made up.",
        },
        'playlist_mincount': 20,
    }
    def _real_extract(self, url):
        b64_feed_url = self._match_id(url)
        data = self._batch_execute('ncqJEe', b64_feed_url, [b64_feed_url])
        entries = []
        for episode in (try_get(data, lambda x: x[1][0]) or []):
            entries.append(self._extract_episode(episode))
        feed = try_get(data, lambda x: x[3]) or []
        return self.playlist_result(
            entries, playlist_title=try_get(feed, lambda x: x[0]),
            playlist_description=try_get(feed, lambda x: x[2]))
--- a/haruhi_dl/extractor/hotstar.py
+++ b/haruhi_dl/extractor/hotstar.py
@ -3,6 +3,7 @@ from __future__ import unicode_literals
 import hashlib
 import hmac
 import json
 import re
 import time
 import uuid
@ -25,43 +26,50 @@ from ..utils import (
 class HotStarBaseIE(InfoExtractor):
    _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
-    def _call_api_impl(self, path, video_id, query):
+    def _call_api_impl(self, path, video_id, headers, query, data=None):
        st = int(time.time())
        exp = st + 6000
        auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
        auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
-        response = self._download_json(
+        h = {'hotstarauth': auth}
-            'https://api.hotstar.com/' + path, video_id, headers={
+        h.update(headers)
-                'hotstarauth': auth,
+        return self._download_json(
            'https://api.hotstar.com/' + path,
            video_id, headers=h, query=query, data=data)
    def _call_api(self, path, video_id, query_name='contentId'):
        response = self._call_api_impl(path, video_id, {
            'x-country-code': 'IN',
            'x-platform-code': 'JIO',
-            }, query=query)
+        }, {
            query_name: video_id,
            'tas': 10000,
        })
        if response['statusCode'] != 'OK':
            raise ExtractorError(
                response['body']['message'], expected=True)
        return response['body']['results']
-    def _call_api(self, path, video_id, query_name='contentId'):
+    def _call_api_v2(self, path, video_id, headers, query=None, data=None):
-        return self._call_api_impl(path, video_id, {
+        h = {'X-Request-Id': compat_str(uuid.uuid4())}
-            query_name: video_id,
+        h.update(headers)
-            'tas': 10000,
+        try:
        })
    def _call_api_v2(self, path, video_id):
            return self._call_api_impl(
-            '%s/in/contents/%s' % (path, video_id), video_id, {
+                path, video_id, h, query, data)
-                'desiredConfig': 'encryption:plain;ladder:phone,tv;package:hls,dash',
+        except ExtractorError as e:
-                'client': 'mweb',
+            if isinstance(e.cause, compat_HTTPError):
-                'clientVersion': '6.18.0',
+                if e.cause.code == 402:
-                'deviceId': compat_str(uuid.uuid4()),
+                    self.raise_login_required()
-                'osName': 'Windows',
+                message = self._parse_json(e.cause.read().decode(), video_id)['message']
-                'osVersion': '10',
+                if message in ('Content not available in region', 'Country is not supported'):
-            })
+                    raise self.raise_geo_restricted(message)
                raise ExtractorError(message)
            raise e
 class HotStarIE(HotStarBaseIE):
    IE_NAME = 'hotstar'
-    _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
+    _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+[/-])?(?P<id>\d{10})'
    _TESTS = [{
        # contentData
        'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
@ -92,8 +100,13 @@ class HotStarIE(HotStarBaseIE):
        # only available via api v2
        'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
        'only_matching': True,
    }, {
        'url': 'https://www.hotstar.com/in/tv/start-music/1260005217/cooks-vs-comalis/1100039717',
        'only_matching': True,
    }]
    _GEO_BYPASS = False
    _DEVICE_ID = None
    _USER_TOKEN = None
    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -121,7 +134,30 @@ class HotStarIE(HotStarBaseIE):
        headers = {'Referer': url}
        formats = []
        geo_restricted = False
-        playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets']
+
        if not self._USER_TOKEN:
            self._DEVICE_ID = compat_str(uuid.uuid4())
            self._USER_TOKEN = self._call_api_v2('um/v3/users', video_id, {
                'X-HS-Platform': 'PCTV',
                'Content-Type': 'application/json',
            }, data=json.dumps({
                'device_ids': [{
                    'id': self._DEVICE_ID,
                    'type': 'device_id',
                }],
            }).encode())['user_identity']
        playback_sets = self._call_api_v2(
            'play/v2/playback/content/' + video_id, video_id, {
                'X-HS-Platform': 'web',
                'X-HS-AppVersion': '6.99.1',
                'X-HS-UserToken': self._USER_TOKEN,
            }, query={
                'device-id': self._DEVICE_ID,
                'desired-config': 'encryption:plain',
                'os-name': 'Windows',
                'os-version': '10',
            })['data']['playBackSets']
        for playback_set in playback_sets:
            if not isinstance(playback_set, dict):
                continue
@ -163,19 +199,22 @@ class HotStarIE(HotStarBaseIE):
        for f in formats:
            f.setdefault('http_headers', {}).update(headers)
        image = try_get(video_data, lambda x: x['image']['h'], compat_str)
        return {
            'id': video_id,
            'title': title,
            'thumbnail': 'https://img1.hotstarext.com/image/upload/' + image if image else None,
            'description': video_data.get('description'),
            'duration': int_or_none(video_data.get('duration')),
            'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')),
            'formats': formats,
            'channel': video_data.get('channelName'),
-            'channel_id': video_data.get('channelId'),
+            'channel_id': str_or_none(video_data.get('channelId')),
            'series': video_data.get('showName'),
            'season': video_data.get('seasonName'),
            'season_number': int_or_none(video_data.get('seasonNo')),
-            'season_id': video_data.get('seasonId'),
+            'season_id': str_or_none(video_data.get('seasonId')),
            'episode': title,
            'episode_number': int_or_none(video_data.get('episodeNo')),
        }
@ -183,7 +222,7 @@ class HotStarIE(HotStarBaseIE):
 class HotStarPlaylistIE(HotStarBaseIE):
    IE_NAME = 'hotstar:playlist'
-    _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
+    _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:[a-z]{2}/)?tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
    _TESTS = [{
        'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
        'info_dict': {
@ -193,6 +232,9 @@ class HotStarPlaylistIE(HotStarBaseIE):
    }, {
        'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
        'only_matching': True,
    }, {
        'url': 'https://www.hotstar.com/us/tv/masterchef-india/s-830/list/episodes/t-1_2_830',
        'only_matching': True,
    }]
    def _real_extract(self, url):
--- a/haruhi_dl/extractor/ign.py
+++ b/haruhi_dl/extractor/ign.py
@ -3,28 +3,39 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..compat import (
    compat_parse_qs,
    compat_urllib_parse_urlparse,
 )
 from ..utils import (
    HEADRequest,
    determine_ext,
    int_or_none,
    parse_iso8601,
    strip_or_none,
    try_get,
 )
-class IGNIE(InfoExtractor):
+class IGNBaseIE(InfoExtractor):
    def _call_api(self, slug):
        return self._download_json(
            'http://apis.ign.com/{0}/v3/{0}s/slug/{1}'.format(self._PAGE_TYPE, slug), slug)
 class IGNIE(IGNBaseIE):
    """
    Extractor for some of the IGN sites, like www.ign.com, es.ign.com de.ign.com.
    Some videos of it.ign.com are also supported
    """
-    _VALID_URL = r'https?://.+?\.ign\.com/(?:[^/]+/)?(?P<type>videos|show_videos|articles|feature|(?:[^/]+/\d+/video))(/.+)?/(?P<name_or_id>.+)'
+    _VALID_URL = r'https?://(?:.+?\.ign|www\.pcmag)\.com/videos/(?:\d{4}/\d{2}/\d{2}/)?(?P<id>[^/?&#]+)'
    IE_NAME = 'ign.com'
    _PAGE_TYPE = 'video'
-    _API_URL_TEMPLATE = 'http://apis.ign.com/video/v3/videos/%s'
+    _TESTS = [{
    _EMBED_RE = r'<iframe[^>]+?["\']((?:https?:)?//.+?\.ign\.com.+?/embed.+?)["\']'
    _TESTS = [
        {
        'url': 'http://www.ign.com/videos/2013/06/05/the-last-of-us-review',
-            'md5': 'febda82c4bafecd2d44b6e1a18a595f8',
+        'md5': 'd2e1586d9987d40fad7867bf96a018ea',
        'info_dict': {
            'id': '8f862beef863986b2785559b9e1aa599',
            'ext': 'mp4',
@ -32,13 +43,147 @@ class IGNIE(InfoExtractor):
            'description': 'md5:c8946d4260a4d43a00d5ae8ed998870c',
            'timestamp': 1370440800,
            'upload_date': '20130605',
-                'uploader_id': 'cberidon@ign.com',
+            'tags': 'count:9',
        }
-        },
+    }, {
-        {
+        'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
        'md5': 'f1581a6fe8c5121be5b807684aeac3f6',
        'info_dict': {
            'id': 'ee10d774b508c9b8ec07e763b9125b91',
            'ext': 'mp4',
            'title': 'What\'s New Now: Is GoGo Snooping on Your Data?',
            'description': 'md5:817a20299de610bd56f13175386da6fa',
            'timestamp': 1420571160,
            'upload_date': '20150106',
            'tags': 'count:4',
        }
    }, {
        'url': 'https://www.ign.com/videos/is-a-resident-evil-4-remake-on-the-way-ign-daily-fix',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        video = self._call_api(display_id)
        video_id = video['videoId']
        metadata = video['metadata']
        title = metadata.get('longTitle') or metadata.get('title') or metadata['name']
        formats = []
        refs = video.get('refs') or {}
        m3u8_url = refs.get('m3uUrl')
        if m3u8_url:
            formats.extend(self._extract_m3u8_formats(
                m3u8_url, video_id, 'mp4', 'm3u8_native',
                m3u8_id='hls', fatal=False))
        f4m_url = refs.get('f4mUrl')
        if f4m_url:
            formats.extend(self._extract_f4m_formats(
                f4m_url, video_id, f4m_id='hds', fatal=False))
        for asset in (video.get('assets') or []):
            asset_url = asset.get('url')
            if not asset_url:
                continue
            formats.append({
                'url': asset_url,
                'tbr': int_or_none(asset.get('bitrate'), 1000),
                'fps': int_or_none(asset.get('frame_rate')),
                'height': int_or_none(asset.get('height')),
                'width': int_or_none(asset.get('width')),
            })
        mezzanine_url = try_get(video, lambda x: x['system']['mezzanineUrl'])
        if mezzanine_url:
            formats.append({
                'ext': determine_ext(mezzanine_url, 'mp4'),
                'format_id': 'mezzanine',
                'preference': 1,
                'url': mezzanine_url,
            })
        self._sort_formats(formats)
        thumbnails = []
        for thumbnail in (video.get('thumbnails') or []):
            thumbnail_url = thumbnail.get('url')
            if not thumbnail_url:
                continue
            thumbnails.append({
                'url': thumbnail_url,
            })
        tags = []
        for tag in (video.get('tags') or []):
            display_name = tag.get('displayName')
            if not display_name:
                continue
            tags.append(display_name)
        return {
            'id': video_id,
            'title': title,
            'description': strip_or_none(metadata.get('description')),
            'timestamp': parse_iso8601(metadata.get('publishDate')),
            'duration': int_or_none(metadata.get('duration')),
            'display_id': display_id,
            'thumbnails': thumbnails,
            'formats': formats,
            'tags': tags,
        }
 class IGNVideoIE(InfoExtractor):
    _VALID_URL = r'https?://.+?\.ign\.com/(?:[a-z]{2}/)?[^/]+/(?P<id>\d+)/(?:video|trailer)/'
    _TESTS = [{
        'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
        'md5': 'dd9aca7ed2657c4e118d8b261e5e9de1',
        'info_dict': {
            'id': 'e9be7ea899a9bbfc0674accc22a36cc8',
            'ext': 'mp4',
            'title': 'How Hitman Aims to Be Different Than Every Other Stealth Game - NYCC 2015',
            'description': 'Taking out assassination targets in Hitman has never been more stylish.',
            'timestamp': 1444665600,
            'upload_date': '20151012',
        }
    }, {
        'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
        'only_matching': True,
    }, {
        # Youtube embed
        'url': 'https://me.ign.com/ar/ratchet-clank-rift-apart/144327/trailer/embed',
        'only_matching': True,
    }, {
        # Twitter embed
        'url': 'http://adria.ign.com/sherlock-season-4/9687/trailer/embed',
        'only_matching': True,
    }, {
        # Vimeo embed
        'url': 'https://kr.ign.com/bic-2018/3307/trailer/embed',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        req = HEADRequest(url.rsplit('/', 1)[0] + '/embed')
        url = self._request_webpage(req, video_id).geturl()
        ign_url = compat_parse_qs(
            compat_urllib_parse_urlparse(url).query).get('url', [None])[0]
        if ign_url:
            return self.url_result(ign_url, IGNIE.ie_key())
        return self.url_result(url)
 class IGNArticleIE(IGNBaseIE):
    _VALID_URL = r'https?://.+?\.ign\.com/(?:articles(?:/\d{4}/\d{2}/\d{2})?|(?:[a-z]{2}/)?feature/\d+)/(?P<id>[^/?&#]+)'
    _PAGE_TYPE = 'article'
    _TESTS = [{
        'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind',
        'info_dict': {
-                'id': '100-little-things-in-gta-5-that-will-blow-your-mind',
+            'id': '524497489e4e8ff5848ece34',
            'title': '100 Little Things in GTA 5 That Will Blow Your Mind',
        },
        'playlist': [
            {
@ -49,7 +194,6 @@ class IGNIE(InfoExtractor):
                    'description': 'Rockstar drops the mic on this generation of games. Watch our review of the masterly Grand Theft Auto V.',
                    'timestamp': 1379339880,
                    'upload_date': '20130916',
                        'uploader_id': 'danieljkrupa@gmail.com',
                },
            },
            {
@ -60,173 +204,54 @@ class IGNIE(InfoExtractor):
                    'description': 'The twisted beauty of GTA 5 in stunning slow motion.',
                    'timestamp': 1386878820,
                    'upload_date': '20131212',
                        'uploader_id': 'togilvie@ign.com',
                },
            },
        ],
        'params': {
            'playlist_items': '2-3',
            'skip_download': True,
        },
-        },
+    }, {
        {
        'url': 'http://www.ign.com/articles/2014/08/15/rewind-theater-wild-trailer-gamescom-2014?watch',
            'md5': '618fedb9c901fd086f6f093564ef8558',
        'info_dict': {
-                'id': '078fdd005f6d3c02f63d795faa1b984f',
+            'id': '53ee806780a81ec46e0790f8',
                'ext': 'mp4',
            'title': 'Rewind Theater - Wild Trailer Gamescom 2014',
                'description': 'Brian and Jared explore Michel Ancel\'s captivating new preview.',
                'timestamp': 1408047180,
                'upload_date': '20140814',
                'uploader_id': 'jamesduggan1990@gmail.com',
        },
-        },
+        'playlist_count': 2,
-        {
+    }, {
            'url': 'http://me.ign.com/en/videos/112203/video/how-hitman-aims-to-be-different-than-every-other-s',
            'only_matching': True,
        },
        {
            'url': 'http://me.ign.com/ar/angry-birds-2/106533/video/lrd-ldyy-lwl-lfylm-angry-birds',
            'only_matching': True,
        },
        {
        # videoId pattern
        'url': 'http://www.ign.com/articles/2017/06/08/new-ducktales-short-donalds-birthday-doesnt-go-as-planned',
        'only_matching': True,
        },
    ]
    def _find_video_id(self, webpage):
        res_id = [
            r'"video_id"\s*:\s*"(.*?)"',
            r'class="hero-poster[^"]*?"[^>]*id="(.+?)"',
            r'data-video-id="(.+?)"',
            r'<object id="vid_(.+?)"',
            r'<meta name="og:image" content=".*/(.+?)-(.+?)/.+.jpg"',
            r'videoId&quot;\s*:\s*&quot;(.+?)&quot;',
            r'videoId["\']\s*:\s*["\']([^"\']+?)["\']',
        ]
        return self._search_regex(res_id, webpage, 'video id', default=None)
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        name_or_id = mobj.group('name_or_id')
        page_type = mobj.group('type')
        webpage = self._download_webpage(url, name_or_id)
        if page_type != 'video':
            multiple_urls = re.findall(
                r'<param name="flashvars"[^>]*value="[^"]*?url=(https?://www\.ign\.com/videos/.*?)["&]',
                webpage)
            if multiple_urls:
                entries = [self.url_result(u, ie='IGN') for u in multiple_urls]
                return {
                    '_type': 'playlist',
                    'id': name_or_id,
                    'entries': entries,
                }
        video_id = self._find_video_id(webpage)
        if not video_id:
            return self.url_result(self._search_regex(
                self._EMBED_RE, webpage, 'embed url'))
        return self._get_video_info(video_id)
    def _get_video_info(self, video_id):
        api_data = self._download_json(
            self._API_URL_TEMPLATE % video_id, video_id)
        formats = []
        m3u8_url = api_data['refs'].get('m3uUrl')
        if m3u8_url:
            formats.extend(self._extract_m3u8_formats(
                m3u8_url, video_id, 'mp4', 'm3u8_native',
                m3u8_id='hls', fatal=False))
        f4m_url = api_data['refs'].get('f4mUrl')
        if f4m_url:
            formats.extend(self._extract_f4m_formats(
                f4m_url, video_id, f4m_id='hds', fatal=False))
        for asset in api_data['assets']:
            formats.append({
                'url': asset['url'],
                'tbr': asset.get('actual_bitrate_kbps'),
                'fps': asset.get('frame_rate'),
                'height': int_or_none(asset.get('height')),
                'width': int_or_none(asset.get('width')),
            })
        self._sort_formats(formats)
        thumbnails = [{
            'url': thumbnail['url']
        } for thumbnail in api_data.get('thumbnails', [])]
        metadata = api_data['metadata']
        return {
            'id': api_data.get('videoId') or video_id,
            'title': metadata.get('longTitle') or metadata.get('name') or metadata.get['title'],
            'description': metadata.get('description'),
            'timestamp': parse_iso8601(metadata.get('publishDate')),
            'duration': int_or_none(metadata.get('duration')),
            'display_id': metadata.get('slug') or video_id,
            'uploader_id': metadata.get('creator'),
            'thumbnails': thumbnails,
            'formats': formats,
        }
 class OneUPIE(IGNIE):
    _VALID_URL = r'https?://gamevideos\.1up\.com/(?P<type>video)/id/(?P<name_or_id>.+)\.html'
    IE_NAME = '1up.com'
    _TESTS = [{
        'url': 'http://gamevideos.1up.com/video/id/34976.html',
        'md5': 'c9cc69e07acb675c31a16719f909e347',
        'info_dict': {
            'id': '34976',
            'ext': 'mp4',
            'title': 'Sniper Elite V2 - Trailer',
            'description': 'md5:bf0516c5ee32a3217aa703e9b1bc7826',
            'timestamp': 1313099220,
            'upload_date': '20110811',
            'uploader_id': 'IGN',
        }
    }]
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        result = super(OneUPIE, self)._real_extract(url)
        result['id'] = mobj.group('name_or_id')
        return result
 class PCMagIE(IGNIE):
    _VALID_URL = r'https?://(?:www\.)?pcmag\.com/(?P<type>videos|article2)(/.+)?/(?P<name_or_id>.+)'
    IE_NAME = 'pcmag'
    _EMBED_RE = r'iframe\.setAttribute\("src",\s*__util.objToUrlString\("http://widgets\.ign\.com/video/embed/content\.html?[^"]*url=([^"]+)["&]'
    _TESTS = [{
        'url': 'http://www.pcmag.com/videos/2015/01/06/010615-whats-new-now-is-gogo-snooping-on-your-data',
        'md5': '212d6154fd0361a2781075f1febbe9ad',
        'info_dict': {
            'id': 'ee10d774b508c9b8ec07e763b9125b91',
            'ext': 'mp4',
            'title': '010615_What\'s New Now: Is GoGo Snooping on Your Data?',
            'description': 'md5:a7071ae64d2f68cc821c729d4ded6bb3',
            'timestamp': 1420571160,
            'upload_date': '20150106',
            'uploader_id': 'cozzipix@gmail.com',
        }
    }, {
-        'url': 'http://www.pcmag.com/article2/0,2817,2470156,00.asp',
+        # Youtube embed
-        'md5': '94130c1ca07ba0adb6088350681f16c1',
+        'url': 'https://www.ign.com/articles/2021-mvp-named-in-puppy-bowl-xvii',
-        'info_dict': {
+        'only_matching': True,
-            'id': '042e560ba94823d43afcb12ddf7142ca',
+    }, {
-            'ext': 'mp4',
+        # IMDB embed
-            'title': 'HTC\'s Weird New Re Camera - What\'s New Now',
+        'url': 'https://www.ign.com/articles/2014/08/07/sons-of-anarchy-final-season-trailer',
-            'description': 'md5:53433c45df96d2ea5d0fda18be2ca908',
+        'only_matching': True,
-            'timestamp': 1412953920,
+    }, {
-            'upload_date': '20141010',
+        # Facebook embed
-            'uploader_id': 'chris_snyder@pcmag.com',
+        'url': 'https://www.ign.com/articles/2017/09/20/marvels-the-punisher-watch-the-new-trailer-for-the-netflix-series',
-        }
+        'only_matching': True,
    }, {
        # Brightcove embed
        'url': 'https://www.ign.com/articles/2016/01/16/supergirl-goes-flying-with-martian-manhunter-in-new-clip',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        article = self._call_api(display_id)
        def entries():
            media_url = try_get(article, lambda x: x['mediaRelations'][0]['media']['metadata']['url'])
            if media_url:
                yield self.url_result(media_url, IGNIE.ie_key())
            for content in (article.get('content') or []):
                for video_url in re.findall(r'(?:\[(?:ignvideo\s+url|youtube\s+clip_id)|<iframe[^>]+src)="([^"]+)"', content):
                    yield self.url_result(video_url)
        return self.playlist_result(
            entries(), article.get('articleId'),
            strip_or_none(try_get(article, lambda x: x['metadata']['headline'])))
--- a/haruhi_dl/extractor/iheart.py
+++ b/haruhi_dl/extractor/iheart.py
@ -0,0 +1,97 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    clean_podcast_url,
    int_or_none,
    str_or_none,
 )
 class IHeartRadioBaseIE(InfoExtractor):
    def _call_api(self, path, video_id, fatal=True, query=None):
        return self._download_json(
            'https://api.iheart.com/api/v3/podcast/' + path,
            video_id, fatal=fatal, query=query)
    def _extract_episode(self, episode):
        return {
            'thumbnail': episode.get('imageUrl'),
            'description': clean_html(episode.get('description')),
            'timestamp': int_or_none(episode.get('startDate'), 1000),
            'duration': int_or_none(episode.get('duration')),
        }
 class IHeartRadioIE(IHeartRadioBaseIE):
    IENAME = 'iheartradio'
    _VALID_URL = r'(?:https?://(?:www\.)?iheart\.com/podcast/[^/]+/episode/(?P<display_id>[^/?&#]+)-|iheartradio:)(?P<id>\d+)'
    _TEST = {
        'url': 'https://www.iheart.com/podcast/105-behind-the-bastards-29236323/episode/part-one-alexander-lukashenko-the-dictator-70346499/?embed=true',
        'md5': 'c8609c92c8688dcb69d8541042b8abca',
        'info_dict': {
            'id': '70346499',
            'ext': 'mp3',
            'title': 'Part One: Alexander Lukashenko: The Dictator of Belarus',
            'description': 'md5:96cc7297b3a5a9ebae28643801c96fae',
            'timestamp': 1597741200,
            'upload_date': '20200818',
        }
    }
    def _real_extract(self, url):
        episode_id = self._match_id(url)
        episode = self._call_api(
            'episodes/' + episode_id, episode_id)['episode']
        info = self._extract_episode(episode)
        info.update({
            'id': episode_id,
            'title': episode['title'],
            'url': clean_podcast_url(episode['mediaUrl']),
        })
        return info
 class IHeartRadioPodcastIE(IHeartRadioBaseIE):
    IE_NAME = 'iheartradio:podcast'
    _VALID_URL = r'https?://(?:www\.)?iheart(?:podcastnetwork)?\.com/podcast/[^/?&#]+-(?P<id>\d+)/?(?:[?#&]|$)'
    _TESTS = [{
        'url': 'https://www.iheart.com/podcast/1119-it-could-happen-here-30717896/',
        'info_dict': {
            'id': '30717896',
            'title': 'It Could Happen Here',
            'description': 'md5:5842117412a967eb0b01f8088eb663e2',
        },
        'playlist_mincount': 11,
    }, {
        'url': 'https://www.iheartpodcastnetwork.com/podcast/105-stuff-you-should-know-26940277',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        podcast_id = self._match_id(url)
        path = 'podcasts/' + podcast_id
        episodes = self._call_api(
            path + '/episodes', podcast_id, query={'limit': 1000000000})['data']
        entries = []
        for episode in episodes:
            episode_id = str_or_none(episode.get('id'))
            if not episode_id:
                continue
            info = self._extract_episode(episode)
            info.update({
                '_type': 'url',
                'id': episode_id,
                'title': episode.get('title'),
                'url': 'iheartradio:' + episode_id,
                'ie_key': IHeartRadioIE.ie_key(),
            })
            entries.append(info)
        podcast = self._call_api(path, podcast_id, False) or {}
        return self.playlist_result(
            entries, podcast_id, podcast.get('title'), podcast.get('description'))
--- a/haruhi_dl/extractor/ina.py
+++ b/haruhi_dl/extractor/ina.py
@ -12,7 +12,7 @@ from ..utils import (
 class InaIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
+    _VALID_URL = r'https?://(?:(?:www|m)\.)?ina\.fr/(?:video|audio)/(?P<id>[A-Z0-9_]+)'
    _TESTS = [{
        'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html',
        'md5': 'a667021bf2b41f8dc6049479d9bb38a3',
@ -31,6 +31,9 @@ class InaIE(InfoExtractor):
    }, {
        'url': 'https://www.ina.fr/video/P16173408-video.html',
        'only_matching': True,
    }, {
        'url': 'http://m.ina.fr/video/I12055569',
        'only_matching': True,
    }]
    def _real_extract(self, url):
--- a/haruhi_dl/extractor/infoq.py
+++ b/haruhi_dl/extractor/infoq.py
@ -54,7 +54,7 @@ class InfoQIE(BokeCCBaseIE):
    def _extract_rtmp_video(self, webpage):
        # The server URL is hardcoded
-        video_url = 'rtmpe://video.infoq.com/cfx/st/'
+        video_url = 'rtmpe://videof.infoq.com/cfx/st/'
        # Extract video URL
        encoded_id = self._search_regex(
@ -86,17 +86,18 @@ class InfoQIE(BokeCCBaseIE):
        return [{
            'format_id': 'http_video',
            'url': http_video_url,
            'http_headers': {'Referer': 'https://www.infoq.com/'},
        }]
    def _extract_http_audio(self, webpage, video_id):
-        fields = self._hidden_inputs(webpage)
+        fields = self._form_hidden_inputs('mp3Form', webpage)
        http_audio_url = fields.get('filename')
        if not http_audio_url:
            return []
        # base URL is found in the Location header in the response returned by
        # GET https://www.infoq.com/mp3download.action?filename=... when logged in.
-        http_audio_url = compat_urlparse.urljoin('http://res.infoq.com/downloads/mp3downloads/', http_audio_url)
+        http_audio_url = compat_urlparse.urljoin('http://ress.infoq.com/downloads/mp3downloads/', http_audio_url)
        http_audio_url = update_url_query(http_audio_url, self._extract_cf_auth(webpage))
        # audio file seem to be missing some times even if there is a download link
--- a/haruhi_dl/extractor/instagram.py
+++ b/haruhi_dl/extractor/instagram.py
@ -22,7 +22,7 @@ from ..utils import (
 class InstagramIE(InfoExtractor):
-    _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv)/(?P<id>[^/?#&]+))'
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
    _TESTS = [{
        'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
        'md5': '0d2da106a9d2631273e192b372806516',
@ -35,7 +35,7 @@ class InstagramIE(InfoExtractor):
            'timestamp': 1371748545,
            'upload_date': '20130620',
            'uploader_id': 'naomipq',
-            'uploader': 'Naomi Leonor Phan-Quang',
+            'uploader': 'B E A U T Y  F O R  A S H E S',
            'like_count': int,
            'comment_count': int,
            'comments': list,
@ -95,6 +95,9 @@ class InstagramIE(InfoExtractor):
    }, {
        'url': 'https://www.instagram.com/tv/aye83DjauH/',
        'only_matching': True,
    }, {
        'url': 'https://www.instagram.com/reel/CDUMkliABpa/',
        'only_matching': True,
    }]
    @staticmethod
@ -122,9 +125,9 @@ class InstagramIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)
-        (video_url, description, thumbnail, timestamp, uploader,
+        (media, video_url, description, thumbnail, timestamp, uploader,
         uploader_id, like_count, comment_count, comments, height,
-         width) = [None] * 11
+         width) = [None] * 12
        shared_data = self._parse_json(
            self._search_regex(
@ -137,6 +140,18 @@ class InstagramIE(InfoExtractor):
                (lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
                 lambda x: x['entry_data']['PostPage'][0]['media']),
                dict)
        # _sharedData.entry_data.PostPage is empty when authenticated (see
        # https://github.com/hdl-org/haruhi-dl/pull/22880)
        if not media:
            additional_data = self._parse_json(
                self._search_regex(
                    r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;',
                    webpage, 'additional data', default='{}'),
                video_id, fatal=False)
            if additional_data:
                media = try_get(
                    additional_data, lambda x: x['graphql']['shortcode_media'],
                    dict)
        if media:
            video_url = media.get('video_url')
            height = int_or_none(media.get('dimensions', {}).get('height'))
@ -144,17 +159,23 @@ class InstagramIE(InfoExtractor):
            description = try_get(
                media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
                compat_str) or media.get('caption')
-                thumbnail = media.get('display_src')
+            thumbnail = media.get('display_src') or media.get('display_url')
            timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
            uploader = media.get('owner', {}).get('full_name')
            uploader_id = media.get('owner', {}).get('username')
-                def get_count(key, kind):
+            def get_count(keys, kind):
-                    return int_or_none(try_get(
+                if not isinstance(keys, (list, tuple)):
                    keys = [keys]
                for key in keys:
                    count = int_or_none(try_get(
                        media, (lambda x: x['edge_media_%s' % key]['count'],
                                lambda x: x['%ss' % kind]['count'])))
                    if count is not None:
                        return count
            like_count = get_count('preview_like', 'like')
-                comment_count = get_count('to_comment', 'comment')
+            comment_count = get_count(
                ('preview_comment', 'to_comment', 'to_parent_comment'), 'comment')
            comments = [{
                'author': comment.get('user', {}).get('username'),
--- a/haruhi_dl/extractor/itv.py
+++ b/haruhi_dl/extractor/itv.py
@ -1,29 +1,21 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import uuid
 import xml.etree.ElementTree as etree
 import json
 import re
 from .common import InfoExtractor
 from .brightcove import BrightcoveNewIE
 from ..compat import (
    compat_str,
    compat_etree_register_namespace,
 )
 from ..utils import (
    clean_html,
    determine_ext,
    ExtractorError,
    extract_attributes,
-    int_or_none,
+    get_element_by_class,
    JSON_LD_RE,
    merge_dicts,
    parse_duration,
    smuggle_url,
    url_or_none,
    xpath_with_ns,
    xpath_element,
    xpath_text,
 )
@ -31,14 +23,18 @@ class ITVIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)'
    _GEO_COUNTRIES = ['GB']
    _TESTS = [{
-        'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053',
+        'url': 'https://www.itv.com/hub/liar/2a4547a0012',
        'info_dict': {
-            'id': '2a2936a0053',
+            'id': '2a4547a0012',
-            'ext': 'flv',
+            'ext': 'mp4',
-            'title': 'Home Movie',
+            'title': 'Liar - Series 2 - Episode 6',
            'description': 'md5:d0f91536569dec79ea184f0a44cca089',
            'series': 'Liar',
            'season_number': 2,
            'episode_number': 6,
        },
        'params': {
-            # rtmp download
+            # m3u8 download
            'skip_download': True,
        },
    }, {
@ -61,139 +57,8 @@ class ITVIE(InfoExtractor):
        params = extract_attributes(self._search_regex(
            r'(?s)(<[^>]+id="video"[^>]*>)', webpage, 'params'))
-        ns_map = {
+        ios_playlist_url = params.get('data-video-playlist') or params['data-video-id']
-            'soapenv': 'http://schemas.xmlsoap.org/soap/envelope/',
+        hmac = params['data-video-hmac']
            'tem': 'http://tempuri.org/',
            'itv': 'http://schemas.datacontract.org/2004/07/Itv.BB.Mercury.Common.Types',
            'com': 'http://schemas.itv.com/2009/05/Common',
        }
        for ns, full_ns in ns_map.items():
            compat_etree_register_namespace(ns, full_ns)
        def _add_ns(name):
            return xpath_with_ns(name, ns_map)
        def _add_sub_element(element, name):
            return etree.SubElement(element, _add_ns(name))
        production_id = (
            params.get('data-video-autoplay-id')
            or '%s#001' % (
                params.get('data-video-episode-id')
                or video_id.replace('a', '/')))
        req_env = etree.Element(_add_ns('soapenv:Envelope'))
        _add_sub_element(req_env, 'soapenv:Header')
        body = _add_sub_element(req_env, 'soapenv:Body')
        get_playlist = _add_sub_element(body, ('tem:GetPlaylist'))
        request = _add_sub_element(get_playlist, 'tem:request')
        _add_sub_element(request, 'itv:ProductionId').text = production_id
        _add_sub_element(request, 'itv:RequestGuid').text = compat_str(uuid.uuid4()).upper()
        vodcrid = _add_sub_element(request, 'itv:Vodcrid')
        _add_sub_element(vodcrid, 'com:Id')
        _add_sub_element(request, 'itv:Partition')
        user_info = _add_sub_element(get_playlist, 'tem:userInfo')
        _add_sub_element(user_info, 'itv:Broadcaster').text = 'Itv'
        _add_sub_element(user_info, 'itv:DM')
        _add_sub_element(user_info, 'itv:RevenueScienceValue')
        _add_sub_element(user_info, 'itv:SessionId')
        _add_sub_element(user_info, 'itv:SsoToken')
        _add_sub_element(user_info, 'itv:UserToken')
        site_info = _add_sub_element(get_playlist, 'tem:siteInfo')
        _add_sub_element(site_info, 'itv:AdvertisingRestriction').text = 'None'
        _add_sub_element(site_info, 'itv:AdvertisingSite').text = 'ITV'
        _add_sub_element(site_info, 'itv:AdvertisingType').text = 'Any'
        _add_sub_element(site_info, 'itv:Area').text = 'ITVPLAYER.VIDEO'
        _add_sub_element(site_info, 'itv:Category')
        _add_sub_element(site_info, 'itv:Platform').text = 'DotCom'
        _add_sub_element(site_info, 'itv:Site').text = 'ItvCom'
        device_info = _add_sub_element(get_playlist, 'tem:deviceInfo')
        _add_sub_element(device_info, 'itv:ScreenSize').text = 'Big'
        player_info = _add_sub_element(get_playlist, 'tem:playerInfo')
        _add_sub_element(player_info, 'itv:Version').text = '2'
        headers = self.geo_verification_headers()
        headers.update({
            'Content-Type': 'text/xml; charset=utf-8',
            'SOAPAction': 'http://tempuri.org/PlaylistService/GetPlaylist',
        })
        info = self._search_json_ld(webpage, video_id, default={})
        formats = []
        subtitles = {}
        def extract_subtitle(sub_url):
            ext = determine_ext(sub_url, 'ttml')
            subtitles.setdefault('en', []).append({
                'url': sub_url,
                'ext': 'ttml' if ext == 'xml' else ext,
            })
        resp_env = self._download_xml(
            params['data-playlist-url'], video_id,
            headers=headers, data=etree.tostring(req_env), fatal=False)
        if resp_env:
            playlist = xpath_element(resp_env, './/Playlist')
            if playlist is None:
                fault_code = xpath_text(resp_env, './/faultcode')
                fault_string = xpath_text(resp_env, './/faultstring')
                if fault_code == 'InvalidGeoRegion':
                    self.raise_geo_restricted(
                        msg=fault_string, countries=self._GEO_COUNTRIES)
                elif fault_code not in (
                        'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
                    raise ExtractorError(
                        '%s said: %s' % (self.IE_NAME, fault_string), expected=True)
                info.update({
                    'title': self._og_search_title(webpage),
                    'episode_title': params.get('data-video-episode'),
                    'series': params.get('data-video-title'),
                })
            else:
                title = xpath_text(playlist, 'EpisodeTitle', default=None)
                info.update({
                    'title': title,
                    'episode_title': title,
                    'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
                    'series': xpath_text(playlist, 'ProgrammeTitle'),
                    'duration': parse_duration(xpath_text(playlist, 'Duration')),
                })
                video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
                media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
                rtmp_url = media_files.attrib['base']
                for media_file in media_files.findall('MediaFile'):
                    play_path = xpath_text(media_file, 'URL')
                    if not play_path:
                        continue
                    tbr = int_or_none(media_file.get('bitrate'), 1000)
                    f = {
                        'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
                        'play_path': play_path,
                        # Providing this swfVfy allows to avoid truncated downloads
                        'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
                        'page_url': url,
                        'tbr': tbr,
                        'ext': 'flv',
                    }
                    app = self._search_regex(
                        'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
                    if app:
                        f.update({
                            'url': rtmp_url.split('?', 1)[0],
                            'app': app,
                        })
                    else:
                        f['url'] = rtmp_url
                    formats.append(f)
                for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
                    if caption_url.text:
                        extract_subtitle(caption_url.text)
        ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id')
        hmac = params.get('data-video-hmac')
        if ios_playlist_url and hmac and re.match(r'https?://', ios_playlist_url):
        headers = self.geo_verification_headers()
        headers.update({
            'Accept': 'application/vnd.itv.vod.playlist.v2+json',
@ -227,11 +92,12 @@ class ITVIE(InfoExtractor):
                    },
                    'platformTag': 'dotcom'
                }
-                }).encode(), headers=headers, fatal=False)
+            }).encode(), headers=headers)
-            if ios_playlist:
+        video_data = ios_playlist['Playlist']['Video']
                video_data = ios_playlist.get('Playlist', {}).get('Video', {})
        ios_base_url = video_data.get('Base')
-                for media_file in video_data.get('MediaFiles', []):
+
        formats = []
        for media_file in (video_data.get('MediaFiles') or []):
            href = media_file.get('Href')
            if not href:
                continue
@ -246,35 +112,42 @@ class ITVIE(InfoExtractor):
                formats.append({
                    'url': href,
                })
-                subs = video_data.get('Subtitles')
+        self._sort_formats(formats)
-                if isinstance(subs, list):
+
        subtitles = {}
        subs = video_data.get('Subtitles') or []
        for sub in subs:
            if not isinstance(sub, dict):
                continue
            href = url_or_none(sub.get('Href'))
-                        if href:
+            if not href:
-                            extract_subtitle(href)
+                continue
-                if not info.get('duration'):
+            subtitles.setdefault('en', []).append({
-                    info['duration'] = parse_duration(video_data.get('Duration'))
+                'url': href,
-
+                'ext': determine_ext(href, 'vtt'),
        self._sort_formats(formats)
        info.update({
            'id': video_id,
            'formats': formats,
            'subtitles': subtitles,
            })
-        webpage_info = self._search_json_ld(webpage, video_id, default={})
+        info = self._search_json_ld(webpage, video_id, default={})
-        if not webpage_info.get('title'):
+        if not info:
-            webpage_info['title'] = self._html_search_regex(
+            json_ld = self._parse_json(self._search_regex(
-                r'(?s)<h\d+[^>]+\bclass=["\'][^>]*episode-title["\'][^>]*>([^<]+)<',
+                JSON_LD_RE, webpage, 'JSON-LD', '{}',
-                webpage, 'title', default=None) or self._og_search_title(
+                group='json_ld'), video_id, fatal=False)
-                webpage, default=None) or self._html_search_meta(
+            if json_ld and json_ld.get('@type') == 'BreadcrumbList':
-                'twitter:title', webpage, 'title',
+                for ile in (json_ld.get('itemListElement:') or []):
-                default=None) or webpage_info['episode']
+                    item = ile.get('item:') or {}
                    if item.get('@type') == 'TVEpisode':
                        item['@context'] = 'http://schema.org'
                        info = self._json_ld(item, video_id, fatal=False) or {}
                        break
-        return merge_dicts(info, webpage_info)
+        return merge_dicts({
            'id': video_id,
            'title': self._html_search_meta(['og:title', 'twitter:title'], webpage),
            'formats': formats,
            'subtitles': subtitles,
            'duration': parse_duration(video_data.get('Duration')),
            'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)),
        }, info)
 class ITVBTCCIE(InfoExtractor):
--- a/haruhi_dl/extractor/kakao.py
+++ b/haruhi_dl/extractor/kakao.py
@ -3,10 +3,13 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import compat_HTTPError
 from ..utils import (
    ExtractorError,
    int_or_none,
    str_or_none,
    strip_or_none,
    try_get,
    unified_timestamp,
    update_url_query,
 )
@ -23,7 +26,7 @@ class KakaoIE(InfoExtractor):
            'id': '301965083',
            'ext': 'mp4',
            'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動！顔高低差GPも！」 『乃木坂工事中』',
-            'uploader_id': 2671005,
+            'uploader_id': '2671005',
            'uploader': '그랑그랑이',
            'timestamp': 1488160199,
            'upload_date': '20170227',
@ -36,11 +39,15 @@ class KakaoIE(InfoExtractor):
            'ext': 'mp4',
            'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
            'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
-            'uploader_id': 2653210,
+            'uploader_id': '2653210',
            'uploader': '쇼! 음악중심',
            'timestamp': 1485684628,
            'upload_date': '20170129',
        }
    }, {
        # geo restricted
        'url': 'https://tv.kakao.com/channel/3643855/cliplink/412069491',
        'only_matching': True,
    }]
    def _real_extract(self, url):
@ -68,8 +75,7 @@ class KakaoIE(InfoExtractor):
            'fields': ','.join([
                '-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title',
                'description', 'channelId', 'createTime', 'duration', 'playCount',
-                'likeCount', 'commentCount', 'tagList', 'channel', 'name',
+                'likeCount', 'commentCount', 'tagList', 'channel', 'name', 'thumbnailUrl',
                'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault',
                'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label'])
        }
@ -82,24 +88,28 @@ class KakaoIE(InfoExtractor):
        title = clip.get('title') or clip_link.get('displayTitle')
-        query['tid'] = impress.get('tid', '')
+        query.update({
            'fields': '-*,code,message,url',
            'tid': impress.get('tid') or '',
        })
        formats = []
-        for fmt in clip.get('videoOutputList', []):
+        for fmt in (clip.get('videoOutputList') or []):
            try:
                profile_name = fmt['profile']
                if profile_name == 'AUDIO':
                    continue
-                query.update({
+                query['profile'] = profile_name
-                    'profile': profile_name,
+                try:
                    'fields': '-*,url',
                })
                    fmt_url_json = self._download_json(
                        api_base + 'raw/videolocation', display_id,
                        'Downloading video URL for profile %s' % profile_name,
-                    query=query, headers=player_header, fatal=False)
+                        query=query, headers=player_header)
-
+                except ExtractorError as e:
-                if fmt_url_json is None:
+                    if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
                        resp = self._parse_json(e.cause.read().decode(), video_id)
                        if resp.get('code') == 'GeoBlocked':
                            self.raise_geo_restricted()
                    continue
                fmt_url = fmt_url_json['url']
@ -116,27 +126,13 @@ class KakaoIE(InfoExtractor):
                pass
        self._sort_formats(formats)
        thumbs = []
        for thumb in clip.get('clipChapterThumbnailList', []):
            thumbs.append({
                'url': thumb.get('thumbnailUrl'),
                'id': compat_str(thumb.get('timeInSec')),
                'preference': -1 if thumb.get('isDefault') else 0
            })
        top_thumbnail = clip.get('thumbnailUrl')
        if top_thumbnail:
            thumbs.append({
                'url': top_thumbnail,
                'preference': 10,
            })
        return {
            'id': display_id,
            'title': title,
            'description': strip_or_none(clip.get('description')),
-            'uploader': clip_link.get('channel', {}).get('name'),
+            'uploader': try_get(clip_link, lambda x: x['channel']['name']),
-            'uploader_id': clip_link.get('channelId'),
+            'uploader_id': str_or_none(clip_link.get('channelId')),
-            'thumbnails': thumbs,
+            'thumbnail': clip.get('thumbnailUrl'),
            'timestamp': unified_timestamp(clip_link.get('createTime')),
            'duration': int_or_none(clip.get('duration')),
            'view_count': int_or_none(clip.get('playCount')),
--- a/haruhi_dl/extractor/kanalplay.py
+++ b/haruhi_dl/extractor/kanalplay.py
@ -1,97 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
    float_or_none,
    srt_subtitles_timecode,
 )
 class KanalPlayIE(InfoExtractor):
    IE_DESC = 'Kanal 5/9/11 Play'
    _VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',
        'info_dict': {
            'id': '3270012277',
            'ext': 'flv',
            'title': 'Saknar både dusch och avlopp',
            'description': 'md5:6023a95832a06059832ae93bc3c7efb7',
            'duration': 2636.36,
        },
        'params': {
            # rtmp download
            'skip_download': True,
        }
    }, {
        'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',
        'only_matching': True,
    }, {
        'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',
        'only_matching': True,
    }]
    def _fix_subtitles(self, subs):
        return '\r\n\r\n'.join(
            '%s\r\n%s --> %s\r\n%s'
            % (
                num,
                srt_subtitles_timecode(item['startMillis'] / 1000.0),
                srt_subtitles_timecode(item['endMillis'] / 1000.0),
                item['text'],
            ) for num, item in enumerate(subs, 1))
    def _get_subtitles(self, channel_id, video_id):
        subs = self._download_json(
            'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
            video_id, 'Downloading subtitles JSON', fatal=False)
        return {'sv': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        channel_id = mobj.group('channel_id')
        video = self._download_json(
            'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),
            video_id)
        reasons_for_no_streams = video.get('reasonsForNoStreams')
        if reasons_for_no_streams:
            raise ExtractorError(
                '%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),
                expected=True)
        title = video['title']
        description = video.get('description')
        duration = float_or_none(video.get('length'), 1000)
        thumbnail = video.get('posterUrl')
        stream_base_url = video['streamBaseUrl']
        formats = [{
            'url': stream_base_url,
            'play_path': stream['source'],
            'ext': 'flv',
            'tbr': float_or_none(stream.get('bitrate'), 1000),
            'rtmp_real_time': True,
        } for stream in video['streams']]
        self._sort_formats(formats)
        subtitles = {}
        if video.get('hasSubtitle'):
            subtitles = self.extract_subtitles(channel_id, video_id)
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'duration': duration,
            'formats': formats,
            'subtitles': subtitles,
        }
--- a/haruhi_dl/extractor/ketnet.py
+++ b/haruhi_dl/extractor/ketnet.py
@ -2,92 +2,71 @@ from __future__ import unicode_literals
 from .canvas import CanvasIE
 from .common import InfoExtractor
 from ..compat import compat_urllib_parse_unquote
 from ..utils import (
    int_or_none,
    parse_iso8601,
 )
 class KetnetIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?P<id>(?:[^/]+/)*[^/?#&]+)'
    _TESTS = [{
-        'url': 'https://www.ketnet.be/kijken/zomerse-filmpjes',
+        'url': 'https://www.ketnet.be/kijken/n/nachtwacht/3/nachtwacht-s3a1-de-greystook',
-        'md5': '6bdeb65998930251bbd1c510750edba9',
+        'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9',
        'info_dict': {
-            'id': 'zomerse-filmpjes',
+            'id': 'pbs-pub-aef8b526-115e-4006-aa24-e59ff6c6ef6f$vid-ddb815bf-c8e7-467b-8879-6bad7a32cebd',
            'ext': 'mp4',
-            'title': 'Gluur mee op de filmset en op Pennenzakkenrock',
+            'title': 'Nachtwacht - Reeks 3: Aflevering 1',
-            'description': 'Gluur mee met Ghost Rockers op de filmset',
+            'description': 'De Nachtwacht krijgt te maken met een parasiet',
            'thumbnail': r're:^https?://.*\.jpg$',
-        }
+            'duration': 1468.02,
-    }, {
+            'timestamp': 1609225200,
-        # mzid in playerConfig instead of sources
+            'upload_date': '20201229',
-        'url': 'https://www.ketnet.be/kijken/nachtwacht/de-greystook',
+            'series': 'Nachtwacht',
-        'md5': '90139b746a0a9bd7bb631283f6e2a64e',
+            'season': 'Reeks 3',
-        'info_dict': {
+            'episode': 'De Greystook',
-            'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
+            'episode_number': 1,
            'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
            'ext': 'flv',
            'title': 'Nachtwacht: De Greystook',
            'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 1468.03,
        },
        'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
    }, {
-        'url': 'https://www.ketnet.be/kijken/karrewiet/uitzending-8-september-2016',
+        'url': 'https://www.ketnet.be/themas/karrewiet/jaaroverzicht-20200/karrewiet-het-jaar-van-black-mamba',
        'only_matching': True,
    }, {
        'url': 'https://www.ketnet.be/achter-de-schermen/sien-repeteert-voor-stars-for-life',
        'only_matching': True,
    }, {
        # mzsource, geo restricted to Belgium
        'url': 'https://www.ketnet.be/kijken/nachtwacht/de-bermadoe',
        'only_matching': True,
    }]
    def _real_extract(self, url):
-        video_id = self._match_id(url)
+        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
+        video = self._download_json(
            'https://senior-bff.ketnet.be/graphql', display_id, query={
                'query': '''{
  video(id: "content/ketnet/nl/%s.model.json") {
    description
    episodeNr
    imageUrl
    mediaReference
    programTitle
    publicationDate
    seasonTitle
    subtitleVideodetail
    titleVideodetail
  }
 }''' % display_id,
            })['data']['video']
-        config = self._parse_json(
+        mz_id = compat_urllib_parse_unquote(video['mediaReference'])
            self._search_regex(
                r'(?s)playerConfig\s*=\s*({.+?})\s*;', webpage,
                'player config'),
            video_id)
        mzid = config.get('mzid')
        if mzid:
            return self.url_result(
                'https://mediazone.vrt.be/api/v1/ketnet/assets/%s' % mzid,
                CanvasIE.ie_key(), video_id=mzid)
        title = config['title']
        formats = []
        for source_key in ('', 'mz'):
            source = config.get('%ssource' % source_key)
            if not isinstance(source, dict):
                continue
            for format_id, format_url in source.items():
                if format_id == 'hls':
                    formats.extend(self._extract_m3u8_formats(
                        format_url, video_id, 'mp4',
                        entry_protocol='m3u8_native', m3u8_id=format_id,
                        fatal=False))
                elif format_id == 'hds':
                    formats.extend(self._extract_f4m_formats(
                        format_url, video_id, f4m_id=format_id, fatal=False))
                else:
                    formats.append({
                        'url': format_url,
                        'format_id': format_id,
                    })
        self._sort_formats(formats)
        return {
-            'id': video_id,
+            '_type': 'url_transparent',
-            'title': title,
+            'id': mz_id,
-            'description': config.get('description'),
+            'title': video['titleVideodetail'],
-            'thumbnail': config.get('image'),
+            'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/' + mz_id,
-            'series': config.get('program'),
+            'thumbnail': video.get('imageUrl'),
-            'episode': config.get('episode'),
+            'description': video.get('description'),
-            'formats': formats,
+            'timestamp': parse_iso8601(video.get('publicationDate')),
            'series': video.get('programTitle'),
            'season': video.get('seasonTitle'),
            'episode': video.get('subtitleVideodetail'),
            'episode_number': int_or_none(video.get('episodeNr')),
            'ie_key': CanvasIE.ie_key(),
        }
--- a/haruhi_dl/extractor/khanacademy.py
+++ b/haruhi_dl/extractor/khanacademy.py
@ -1,82 +1,107 @@
 from __future__ import unicode_literals
-import re
+import json
 from .common import InfoExtractor
 from ..utils import (
-    unified_strdate,
+    int_or_none,
    parse_iso8601,
    try_get,
 )
-class KhanAcademyIE(InfoExtractor):
+class KhanAcademyBaseIE(InfoExtractor):
-    _VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
+    _VALID_URL_TEMPL = r'https?://(?:www\.)?khanacademy\.org/(?P<id>(?:[^/]+/){%s}%s[^?#/&]+)'
    IE_NAME = 'KhanAcademy'
-    _TESTS = [{
+    def _parse_video(self, video):
-        'url': 'http://www.khanacademy.org/video/one-time-pad',
+        return {
-        'md5': '7b391cce85e758fb94f763ddc1bbb979',
+            '_type': 'url_transparent',
            'url': video['youtubeId'],
            'id': video.get('slug'),
            'title': video.get('title'),
            'thumbnail': video.get('imageUrl') or video.get('thumbnailUrl'),
            'duration': int_or_none(video.get('duration')),
            'description': video.get('description'),
            'ie_key': 'Youtube',
        }
    def _real_extract(self, url):
        display_id = self._match_id(url)
        component_props = self._parse_json(self._download_json(
            'https://www.khanacademy.org/api/internal/graphql',
            display_id, query={
                'hash': 1604303425,
                'variables': json.dumps({
                    'path': display_id,
                    'queryParams': '',
                }),
            })['data']['contentJson'], display_id)['componentProps']
        return self._parse_component_props(component_props)
 class KhanAcademyIE(KhanAcademyBaseIE):
    IE_NAME = 'khanacademy'
    _VALID_URL = KhanAcademyBaseIE._VALID_URL_TEMPL % ('4', 'v/')
    _TEST = {
        'url': 'https://www.khanacademy.org/computing/computer-science/cryptography/crypt/v/one-time-pad',
        'md5': '9c84b7b06f9ebb80d22a5c8dedefb9a0',
        'info_dict': {
-            'id': 'one-time-pad',
+            'id': 'FlIG3TvQCBQ',
-            'ext': 'webm',
+            'ext': 'mp4',
            'title': 'The one-time pad',
            'description': 'The perfect cipher',
            'duration': 176,
            'uploader': 'Brit Cruise',
            'uploader_id': 'khanacademy',
            'upload_date': '20120411',
            'timestamp': 1334170113,
            'license': 'cc-by-nc-sa',
        },
        'add_ie': ['Youtube'],
-    }, {
+    }
-        'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
+
    def _parse_component_props(self, component_props):
        video = component_props['tutorialPageData']['contentModel']
        info = self._parse_video(video)
        author_names = video.get('authorNames')
        info.update({
            'uploader': ', '.join(author_names) if author_names else None,
            'timestamp': parse_iso8601(video.get('dateAdded')),
            'license': video.get('kaUserLicense'),
        })
        return info
 class KhanAcademyUnitIE(KhanAcademyBaseIE):
    IE_NAME = 'khanacademy:unit'
    _VALID_URL = (KhanAcademyBaseIE._VALID_URL_TEMPL % ('2', '')) + '/?(?:[?#&]|$)'
    _TEST = {
        'url': 'https://www.khanacademy.org/computing/computer-science/cryptography',
        'info_dict': {
            'id': 'cryptography',
-            'title': 'Journey into cryptography',
+            'title': 'Cryptography',
            'description': 'How have humans protected their secret messages through history? What has changed today?',
        },
-        'playlist_mincount': 3,
+        'playlist_mincount': 31,
    }]
    def _real_extract(self, url):
        m = re.match(self._VALID_URL, url)
        video_id = m.group('id')
        if m.group('key') == 'video':
            data = self._download_json(
                'http://api.khanacademy.org/api/v1/videos/' + video_id,
                video_id, 'Downloading video info')
            upload_date = unified_strdate(data['date_added'])
            uploader = ', '.join(data['author_names'])
            return {
                '_type': 'url_transparent',
                'url': data['url'],
                'id': video_id,
                'title': data['title'],
                'thumbnail': data['image_url'],
                'duration': data['duration'],
                'description': data['description'],
                'uploader': uploader,
                'upload_date': upload_date,
    }
        else:
            # topic
            data = self._download_json(
                'http://api.khanacademy.org/api/v1/topic/' + video_id,
                video_id, 'Downloading topic info')
-            entries = [
+    def _parse_component_props(self, component_props):
-                {
+        curation = component_props['curation']
                    '_type': 'url',
                    'url': c['url'],
                    'id': c['id'],
                    'title': c['title'],
                }
                for c in data['children'] if c['kind'] in ('Video', 'Topic')]
-            return {
+        entries = []
-                '_type': 'playlist',
+        tutorials = try_get(curation, lambda x: x['tabs'][0]['modules'][0]['tutorials'], list) or []
-                'id': video_id,
+        for tutorial_number, tutorial in enumerate(tutorials, 1):
-                'title': data['title'],
+            chapter_info = {
-                'description': data['description'],
+                'chapter': tutorial.get('title'),
-                'entries': entries,
+                'chapter_number': tutorial_number,
                'chapter_id': tutorial.get('id'),
            }
            for content_item in (tutorial.get('contentItems') or []):
                if content_item.get('kind') == 'Video':
                    info = self._parse_video(content_item)
                    info.update(chapter_info)
                    entries.append(info)
        return self.playlist_result(
            entries, curation.get('unit'), curation.get('title'),
            curation.get('description'))
--- a/haruhi_dl/extractor/linuxacademy.py
+++ b/haruhi_dl/extractor/linuxacademy.py
@ -8,11 +8,15 @@ from .common import InfoExtractor
 from ..compat import (
    compat_b64decode,
    compat_HTTPError,
    compat_str,
 )
 from ..utils import (
    clean_html,
    ExtractorError,
-    orderedSet,
+    js_to_json,
-    unescapeHTML,
+    parse_duration,
    try_get,
    unified_timestamp,
    urlencode_postdata,
    urljoin,
 )
@ -28,11 +32,15 @@ class LinuxAcademyIE(InfoExtractor):
                        )
                    '''
    _TESTS = [{
-        'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154',
+        'url': 'https://linuxacademy.com/cp/courses/lesson/course/7971/lesson/2/module/675',
        'info_dict': {
-            'id': '1498-2',
+            'id': '7971-2',
            'ext': 'mp4',
-            'title': "Introduction to the Practitioner's Brief",
+            'title': 'What Is Data Science',
            'description': 'md5:c574a3c20607144fb36cb65bdde76c99',
            'timestamp': 1607387907,
            'upload_date': '20201208',
            'duration': 304,
        },
        'params': {
            'skip_download': True,
@ -46,7 +54,8 @@ class LinuxAcademyIE(InfoExtractor):
        'info_dict': {
            'id': '154',
            'title': 'AWS Certified Cloud Practitioner',
-            'description': 'md5:039db7e60e4aac9cf43630e0a75fa834',
+            'description': 'md5:a68a299ca9bb98d41cca5abc4d4ce22c',
            'duration': 28835,
        },
        'playlist_count': 41,
        'skip': 'Requires Linux Academy account credentials',
@ -74,6 +83,7 @@ class LinuxAcademyIE(InfoExtractor):
            self._AUTHORIZE_URL, None, 'Downloading authorize page', query={
                'client_id': self._CLIENT_ID,
                'response_type': 'token id_token',
                'response_mode': 'web_message',
                'redirect_uri': self._ORIGIN_URL,
                'scope': 'openid email user_impersonation profile',
                'audience': self._ORIGIN_URL,
@ -129,7 +139,13 @@ class LinuxAcademyIE(InfoExtractor):
        access_token = self._search_regex(
            r'access_token=([^=&]+)', urlh.geturl(),
-            'access token')
+            'access token', default=None)
        if not access_token:
            access_token = self._parse_json(
                self._search_regex(
                    r'authorizationResponse\s*=\s*({.+?})\s*;', callback_page,
                    'authorization response'), None,
                transform_source=js_to_json)['response']['access_token']
        self._download_webpage(
            'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s'
@ -144,30 +160,84 @@ class LinuxAcademyIE(InfoExtractor):
        # course path
        if course_id:
-            entries = [
+            module = self._parse_json(
-                self.url_result(
+                self._search_regex(
-                    urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key())
+                    r'window\.module\s*=\s*({.+?})\s*;', webpage, 'module'),
-                for lesson_url in orderedSet(re.findall(
+                item_id)
-                    r'<a[^>]+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)',
+            entries = []
-                    webpage))]
+            chapter_number = None
-            title = unescapeHTML(self._html_search_regex(
+            chapter = None
-                (r'class=["\']course-title["\'][^>]*>(?P<value>[^<]+)',
+            chapter_id = None
-                 r'var\s+title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'),
+            for item in module['items']:
-                webpage, 'title', default=None, group='value'))
+                if not isinstance(item, dict):
-            description = unescapeHTML(self._html_search_regex(
+                    continue
-                r'var\s+description\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+
-                webpage, 'description', default=None, group='value'))
+                def type_field(key):
-            return self.playlist_result(entries, course_id, title, description)
+                    return (try_get(item, lambda x: x['type'][key], compat_str) or '').lower()
                type_fields = (type_field('name'), type_field('slug'))
                # Move to next module section
                if 'section' in type_fields:
                    chapter = item.get('course_name')
                    chapter_id = item.get('course_module')
                    chapter_number = 1 if not chapter_number else chapter_number + 1
                    continue
                # Skip non-lessons
                if 'lesson' not in type_fields:
                    continue
                lesson_url = urljoin(url, item.get('url'))
                if not lesson_url:
                    continue
                title = item.get('title') or item.get('lesson_name')
                description = item.get('md_desc') or clean_html(item.get('description')) or clean_html(item.get('text'))
                entries.append({
                    '_type': 'url_transparent',
                    'url': lesson_url,
                    'ie_key': LinuxAcademyIE.ie_key(),
                    'title': title,
                    'description': description,
                    'timestamp': unified_timestamp(item.get('date')) or unified_timestamp(item.get('created_on')),
                    'duration': parse_duration(item.get('duration')),
                    'chapter': chapter,
                    'chapter_id': chapter_id,
                    'chapter_number': chapter_number,
                })
            return {
                '_type': 'playlist',
                'entries': entries,
                'id': course_id,
                'title': module.get('title'),
                'description': module.get('md_desc') or clean_html(module.get('desc')),
                'duration': parse_duration(module.get('duration')),
            }
        # single video path
-        info = self._extract_jwplayer_data(
+        m3u8_url = self._parse_json(
-            webpage, item_id, require_title=False, m3u8_id='hls',)
+            self._search_regex(
-        title = self._search_regex(
+                r'player\.playlist\s*=\s*(\[.+?\])\s*;', webpage, 'playlist'),
            item_id)[0]['file']
        formats = self._extract_m3u8_formats(
            m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native',
            m3u8_id='hls')
        self._sort_formats(formats)
        info = {
            'id': item_id,
            'formats': formats,
        }
        lesson = self._parse_json(
            self._search_regex(
                (r'window\.lesson\s*=\s*({.+?})\s*;',
                 r'player\.lesson\s*=\s*({.+?})\s*;'),
                webpage, 'lesson', default='{}'), item_id, fatal=False)
        if lesson:
            info.update({
                'title': lesson.get('lesson_name'),
                'description': lesson.get('md_desc') or clean_html(lesson.get('desc')),
                'timestamp': unified_timestamp(lesson.get('date')) or unified_timestamp(lesson.get('created_on')),
                'duration': parse_duration(lesson.get('duration')),
            })
        if not info.get('title'):
            info['title'] = self._search_regex(
                (r'>Lecture\s*:\s*(?P<value>[^<]+)',
                 r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
                'title', group='value')
        info.update({
            'id': item_id,
            'title': title,
        })
        return info
--- a/haruhi_dl/extractor/lrt.py
+++ b/haruhi_dl/extractor/lrt.py
@ -5,28 +5,26 @@ import re
 from .common import InfoExtractor
 from ..utils import (
-    determine_ext,
+    clean_html,
-    int_or_none,
+    merge_dicts,
    parse_duration,
    remove_end,
 )
 class LRTIE(InfoExtractor):
    IE_NAME = 'lrt.lt'
-    _VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?lrt\.lt(?P<path>/mediateka/irasas/(?P<id>[0-9]+))'
    _TESTS = [{
        # m3u8 download
-        'url': 'http://www.lrt.lt/mediateka/irasas/54391/',
+        'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene',
-        'md5': 'fe44cf7e4ab3198055f2c598fc175cb0',
+        'md5': '85cb2bb530f31d91a9c65b479516ade4',
        'info_dict': {
-            'id': '54391',
+            'id': '2000127261',
            'ext': 'mp4',
-            'title': 'Septynios Kauno dienos',
+            'title': 'Greita ir gardu: Sicilijos įkvėpta klasikinių makaronų su baklažanais vakarienė',
-            'description': 'md5:24d84534c7dc76581e59f5689462411a',
+            'description': 'md5:ad7d985f51b0dc1489ba2d76d7ed47fa',
-            'duration': 1783,
+            'duration': 3035,
-            'view_count': int,
+            'timestamp': 1604079000,
-            'like_count': int,
+            'upload_date': '20201030',
        },
    }, {
        # direct mp3 download
@ -43,52 +41,35 @@ class LRTIE(InfoExtractor):
        },
    }]
    def _extract_js_var(self, webpage, var_name, default):
        return self._search_regex(
            r'%s\s*=\s*(["\'])((?:(?!\1).)+)\1' % var_name,
            webpage, var_name.replace('_', ' '), default, group=2)
    def _real_extract(self, url):
-        video_id = self._match_id(url)
+        path, video_id = re.match(self._VALID_URL, url).groups()
        webpage = self._download_webpage(url, video_id)
-        title = remove_end(self._og_search_title(webpage), ' - LRT')
+        media_url = self._extract_js_var(webpage, 'main_url', path)
        media = self._download_json(self._extract_js_var(
            webpage, 'media_info_url',
            'https://www.lrt.lt/servisai/stream_url/vod/media_info/'),
            video_id, query={'url': media_url})
        jw_data = self._parse_jwplayer_data(
            media['playlist_item'], video_id, base_url=url)
-        formats = []
+        json_ld_data = self._search_json_ld(webpage, video_id)
-        for _, file_url in re.findall(
+
-                r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
+        tags = []
-            ext = determine_ext(file_url)
+        for tag in (media.get('tags') or []):
-            if ext not in ('m3u8', 'mp3'):
+            tag_name = tag.get('name')
            if not tag_name:
                continue
-            # mp3 served as m3u8 produces stuttered media file
+            tags.append(tag_name)
            if ext == 'm3u8' and '.mp3' in file_url:
                continue
            if ext == 'm3u8':
                formats.extend(self._extract_m3u8_formats(
                    file_url, video_id, 'mp4', entry_protocol='m3u8_native',
                    fatal=False))
            elif ext == 'mp3':
                formats.append({
                    'url': file_url,
                    'vcodec': 'none',
                })
        self._sort_formats(formats)
-        thumbnail = self._og_search_thumbnail(webpage)
+        clean_info = {
-        description = self._og_search_description(webpage)
+            'description': clean_html(media.get('content')),
-        duration = parse_duration(self._search_regex(
+            'tags': tags,
            r'var\s+record_len\s*=\s*(["\'])(?P<duration>[0-9]+:[0-9]+:[0-9]+)\1',
            webpage, 'duration', default=None, group='duration'))
        view_count = int_or_none(self._html_search_regex(
            r'<div[^>]+class=(["\']).*?record-desc-seen.*?\1[^>]*>(?P<count>.+?)</div>',
            webpage, 'view count', fatal=False, group='count'))
        like_count = int_or_none(self._search_regex(
            r'<span[^>]+id=(["\'])flikesCount.*?\1>(?P<count>\d+)<',
            webpage, 'like count', fatal=False, group='count'))
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'thumbnail': thumbnail,
            'description': description,
            'duration': duration,
            'view_count': view_count,
            'like_count': like_count,
        }
        return merge_dicts(clean_info, jw_data, json_ld_data)
--- a/haruhi_dl/extractor/malltv.py
+++ b/haruhi_dl/extractor/malltv.py
@ -1,10 +1,16 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
-from ..utils import merge_dicts
+from ..utils import (
    clean_html,
    dict_get,
    float_or_none,
    int_or_none,
    merge_dicts,
    parse_duration,
    try_get,
 )
 class MallTVIE(InfoExtractor):
@ -17,7 +23,7 @@ class MallTVIE(InfoExtractor):
            'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
            'ext': 'mp4',
            'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
-            'description': 'md5:25fc0ec42a72ba602b602c683fa29deb',
+            'description': 'md5:db7d5744a4bd4043d9d98324aa72ab35',
            'duration': 216,
            'timestamp': 1538870400,
            'upload_date': '20181007',
@ -37,20 +43,46 @@ class MallTVIE(InfoExtractor):
        webpage = self._download_webpage(
            url, display_id, headers=self.geo_verification_headers())
-        SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b'
+        video = self._parse_json(self._search_regex(
            r'videoObject\s*=\s*JSON\.parse\(JSON\.stringify\(({.+?})\)\);',
            webpage, 'video object'), display_id)
        video_source = video['VideoSource']
        video_id = self._search_regex(
-            SOURCE_RE, webpage, 'video id', group='id')
+            r'/([\da-z]+)/index\b', video_source, 'video id')
-        media = self._parse_html5_media_entries(
+        formats = self._extract_m3u8_formats(
-            url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id,
+            video_source + '.m3u8', video_id, 'mp4', 'm3u8_native')
-            m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0]
+        self._sort_formats(formats)
        subtitles = {}
        for s in (video.get('Subtitles') or {}):
            s_url = s.get('Url')
            if not s_url:
                continue
            subtitles.setdefault(s.get('Language') or 'cz', []).append({
                'url': s_url,
            })
        entity_counts = video.get('EntityCounts') or {}
        def get_count(k):
            v = entity_counts.get(k + 's') or {}
            return int_or_none(dict_get(v, ('Count', 'StrCount')))
        info = self._search_json_ld(webpage, video_id, default={})
-        return merge_dicts(media, info, {
+        return merge_dicts({
            'id': video_id,
            'display_id': display_id,
-            'title': self._og_search_title(webpage, default=None) or display_id,
+            'title': video.get('Title'),
-            'description': self._og_search_description(webpage, default=None),
+            'description': clean_html(video.get('Description')),
-            'thumbnail': self._og_search_thumbnail(webpage, default=None),
+            'thumbnail': video.get('ThumbnailUrl'),
-        })
+            'formats': formats,
            'subtitles': subtitles,
            'duration': int_or_none(video.get('DurationSeconds')) or parse_duration(video.get('Duration')),
            'view_count': get_count('View'),
            'like_count': get_count('Like'),
            'dislike_count': get_count('Dislike'),
            'average_rating': float_or_none(try_get(video, lambda x: x['EntityRating']['AvarageRate'])),
            'comment_count': get_count('Comment'),
        }, info)
--- a/haruhi_dl/extractor/mdr.py
+++ b/haruhi_dl/extractor/mdr.py
@ -2,12 +2,16 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
    compat_str,
    compat_urlparse,
 )
 from ..utils import (
    determine_ext,
    int_or_none,
    parse_duration,
    parse_iso8601,
    url_or_none,
    xpath_text,
 )
@ -16,6 +20,8 @@ class MDRIE(InfoExtractor):
    IE_DESC = 'MDR.DE and KiKA'
    _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
    _GEO_COUNTRIES = ['DE']
    _TESTS = [{
        # MDR regularly deletes its videos
        'url': 'http://www.mdr.de/fakt/video189002.html',
@ -66,6 +72,22 @@ class MDRIE(InfoExtractor):
            'duration': 3239,
            'uploader': 'MITTELDEUTSCHER RUNDFUNK',
        },
    }, {
        # empty bitrateVideo and bitrateAudio
        'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
        'info_dict': {
            'id': '128372',
            'ext': 'mp4',
            'title': 'Der kleine Wichtel kehrt zurück',
            'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
            'duration': 4876,
            'timestamp': 1607823300,
            'upload_date': '20201213',
            'uploader': 'ZDF',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
        'only_matching': True,
@ -91,10 +113,13 @@ class MDRIE(InfoExtractor):
        title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True)
        type_ = xpath_text(doc, './type', default=None)
        formats = []
        processed_urls = []
        for asset in doc.findall('./assets/asset'):
            for source in (
                    'download',
                    'progressiveDownload',
                    'dynamicHttpStreamingRedirector',
                    'adaptiveHttpStreamingRedirector'):
@ -102,63 +127,49 @@ class MDRIE(InfoExtractor):
                if url_el is None:
                    continue
-                video_url = url_el.text
+                video_url = url_or_none(url_el.text)
-                if video_url in processed_urls:
+                if not video_url or video_url in processed_urls:
                    continue
                processed_urls.append(video_url)
-                vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
+                ext = determine_ext(video_url)
                abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
                ext = determine_ext(url_el.text)
                if ext == 'm3u8':
-                    url_formats = self._extract_m3u8_formats(
+                    formats.extend(self._extract_m3u8_formats(
                        video_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                        preference=0, m3u8_id='HLS', fatal=False)
+                        preference=0, m3u8_id='HLS', fatal=False))
                elif ext == 'f4m':
-                    url_formats = self._extract_f4m_formats(
+                    formats.extend(self._extract_f4m_formats(
                        video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
-                        preference=0, f4m_id='HDS', fatal=False)
+                        preference=0, f4m_id='HDS', fatal=False))
                else:
                    media_type = xpath_text(asset, './mediaType', 'media type', default='MP4')
                    vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
                    abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
                    filesize = int_or_none(xpath_text(asset, './fileSize', 'file size'))
                    format_id = [media_type]
                    if vbr or abr:
                        format_id.append(compat_str(vbr or abr))
                    f = {
                        'url': video_url,
-                        'format_id': '%s-%d' % (media_type, vbr or abr),
+                        'format_id': '-'.join(format_id),
                        'filesize': filesize,
                        'abr': abr,
-                        'preference': 1,
+                        'vbr': vbr,
                    }
                    if vbr:
                        width = int_or_none(xpath_text(asset, './frameWidth', 'width'))
                        height = int_or_none(xpath_text(asset, './frameHeight', 'height'))
                        f.update({
-                            'vbr': vbr,
+                            'width': int_or_none(xpath_text(asset, './frameWidth', 'width')),
-                            'width': width,
+                            'height': int_or_none(xpath_text(asset, './frameHeight', 'height')),
                            'height': height,
                        })
-                    url_formats = [f]
+                    if type_ == 'audio':
                        f['vcodec'] = 'none'
-                if not url_formats:
+                    formats.append(f)
                    continue
                if not vbr:
                    for f in url_formats:
                        abr = f.get('tbr') or abr
                        if 'tbr' in f:
                            del f['tbr']
                        f.update({
                            'abr': abr,
                            'vcodec': 'none',
                        })
                formats.extend(url_formats)
        self._sort_formats(formats)
--- a/haruhi_dl/extractor/medaltv.py
+++ b/haruhi_dl/extractor/medaltv.py
@ -0,0 +1,131 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    ExtractorError,
    float_or_none,
    int_or_none,
    str_or_none,
    try_get,
 )
 class MedalTVIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)'
    _TESTS = [{
        'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr',
        'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
        'info_dict': {
            'id': '34934644',
            'ext': 'mp4',
            'title': 'Quad Cold',
            'description': 'Medal,https://medal.tv/desktop/',
            'uploader': 'MowgliSB',
            'timestamp': 1603165266,
            'upload_date': '20201020',
            'uploader_id': 10619174,
        }
    }, {
        'url': 'https://medal.tv/clips/36787208',
        'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
        'info_dict': {
            'id': '36787208',
            'ext': 'mp4',
            'title': 'u tk me i tk u bigger',
            'description': 'Medal,https://medal.tv/desktop/',
            'uploader': 'Mimicc',
            'timestamp': 1605580939,
            'upload_date': '20201117',
            'uploader_id': 5156321,
        }
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        hydration_data = self._parse_json(self._search_regex(
            r'<script[^>]*>\s*(?:var\s*)?hydrationData\s*=\s*({.+?})\s*</script>',
            webpage, 'hydration data', default='{}'), video_id)
        clip = try_get(
            hydration_data, lambda x: x['clips'][video_id], dict) or {}
        if not clip:
            raise ExtractorError(
                'Could not find video information.', video_id=video_id)
        title = clip['contentTitle']
        source_width = int_or_none(clip.get('sourceWidth'))
        source_height = int_or_none(clip.get('sourceHeight'))
        aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9
        def add_item(container, item_url, height, id_key='format_id', item_id=None):
            item_id = item_id or '%dp' % height
            if item_id not in item_url:
                return
            width = int(round(aspect_ratio * height))
            container.append({
                'url': item_url,
                id_key: item_id,
                'width': width,
                'height': height
            })
        formats = []
        thumbnails = []
        for k, v in clip.items():
            if not (v and isinstance(v, compat_str)):
                continue
            mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k)
            if not mobj:
                continue
            prefix = mobj.group(1)
            height = int_or_none(mobj.group(2))
            if prefix == 'contentUrl':
                add_item(
                    formats, v, height or source_height,
                    item_id=None if height else 'source')
            elif prefix == 'thumbnail':
                add_item(thumbnails, v, height, 'id')
        error = clip.get('error')
        if not formats and error:
            if error == 404:
                raise ExtractorError(
                    'That clip does not exist.',
                    expected=True, video_id=video_id)
            else:
                raise ExtractorError(
                    'An unknown error occurred ({0}).'.format(error),
                    video_id=video_id)
        self._sort_formats(formats)
        # Necessary because the id of the author is not known in advance.
        # Won't raise an issue if no profile can be found as this is optional.
        author = try_get(
            hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
        author_id = str_or_none(author.get('id'))
        author_url = 'https://medal.tv/users/{0}'.format(author_id) if author_id else None
        return {
            'id': video_id,
            'title': title,
            'formats': formats,
            'thumbnails': thumbnails,
            'description': clip.get('contentDescription'),
            'uploader': author.get('displayName'),
            'timestamp': float_or_none(clip.get('created'), 1000),
            'uploader_id': author_id,
            'uploader_url': author_url,
            'duration': int_or_none(clip.get('videoLengthSeconds')),
            'view_count': int_or_none(clip.get('views')),
            'like_count': int_or_none(clip.get('likes')),
            'comment_count': int_or_none(clip.get('comments')),
        }
--- a/haruhi_dl/extractor/medialaan.py
+++ b/haruhi_dl/extractor/medialaan.py
@ -2,268 +2,113 @@ from __future__ import unicode_literals
 import re
-from .gigya import GigyaBaseIE
+from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    extract_attributes,
    int_or_none,
-    parse_duration,
+    mimetype2ext,
-    try_get,
+    parse_iso8601,
    unified_timestamp,
 )
-class MedialaanIE(GigyaBaseIE):
+class MedialaanIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                    https?://
                        (?:www\.|nieuws\.)?
                        (?:
-                            (?P<site_id>vtm|q2|vtmkzoom)\.be/
+                            (?:embed\.)?mychannels.video/embed/|
                            embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/|
                            (?:www\.)?(?:
                                (?:
-                                video(?:/[^/]+/id/|/?\?.*?\baid=)|
+                                    7sur7|
-                                (?:[^/]+/)*
+                                    demorgen|
                                    hln|
                                    joe|
                                    qmusic
                                )\.be|
                                (?:
                                    [abe]d|
                                    bndestem|
                                    destentor|
                                    gelderlander|
                                    pzc|
                                    tubantia|
                                    volkskrant
                                )\.nl
                            )/video/(?:[^/]+/)*[^/?&#]+~p
                        )
-                        )
+                        (?P<id>\d+)
                        (?P<id>[^/?#&]+)
                    '''
    _NETRC_MACHINE = 'medialaan'
    _APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-'
    _SITE_TO_APP_ID = {
        'vtm': 'vtm_watch',
        'q2': 'q2',
        'vtmkzoom': 'vtmkzoom',
    }
    _TESTS = [{
-        # vod
+        'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993',
        'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch',
        'info_dict': {
-            'id': 'vtm_20170219_VM0678361_vtmwatch',
+            'id': '193993',
            'ext': 'mp4',
-            'title': 'Allemaal Chris afl. 6',
+            'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?',
-            'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2',
+            'timestamp': 1611663540,
-            'timestamp': 1487533280,
+            'upload_date': '20210126',
-            'upload_date': '20170219',
+            'duration': 238,
            'duration': 2562,
            'series': 'Allemaal Chris',
            'season': 'Allemaal Chris',
            'season_number': 1,
            'season_id': '256936078124527',
            'episode': 'Allemaal Chris afl. 6',
            'episode_number': 6,
            'episode_id': '256936078591527',
        },
        'params': {
            'skip_download': True,
        },
        'skip': 'Requires account credentials',
    }, {
-        # clip
+        'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093',
        'url': 'http://vtm.be/video?aid=168332',
        'info_dict': {
            'id': '168332',
            'ext': 'mp4',
            'title': '"Veronique liegt!"',
            'description': 'md5:1385e2b743923afe54ba4adc38476155',
            'timestamp': 1489002029,
            'upload_date': '20170308',
            'duration': 96,
        },
    }, {
        # vod
        'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000',
        'only_matching': True,
    }, {
-        # vod
+        'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default',
        'url': 'http://vtm.be/video?aid=163157',
        'only_matching': True,
    }, {
-        # vod
+        'url': 'https://embed.mychannels.video/script/production/193993',
        'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2',
        'only_matching': True,
    }, {
-        # clip
+        'url': 'https://embed.mychannels.video/production/193993',
        'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio',
        'only_matching': True,
    }, {
-        # http/s redirect
+        'url': 'https://mychannels.video/embed/193993',
-        'url': 'https://vtmkzoom.be/video?aid=45724',
+        'only_matching': True,
        'info_dict': {
            'id': '257136373657000',
            'ext': 'mp4',
            'title': 'K3 Dansstudio Ushuaia afl.6',
        },
        'params': {
            'skip_download': True,
        },
        'skip': 'Requires account credentials',
    }, {
-        # nieuws.vtm.be
+        'url': 'https://embed.mychannels.video/embed/193993',
        'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma',
        'only_matching': True,
    }]
-    def _real_initialize(self):
+    @staticmethod
-        self._logged_in = False
+    def _extract_urls(webpage, **kw):
-
+        entries = []
-    def _login(self):
+        for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage):
-        username, password = self._get_login_info()
+            mychannels_id = extract_attributes(element).get('data-mychannels-id')
-        if username is None:
+            if mychannels_id:
-            self.raise_login_required()
+                entries.append('https://mychannels.video/embed/' + mychannels_id)
-
+        return entries
        auth_data = {
            'APIKey': self._APIKEY,
            'sdk': 'js_6.1',
            'format': 'json',
            'loginID': username,
            'password': password,
        }
        auth_info = self._gigya_login(auth_data)
        self._uid = auth_info['UID']
        self._uid_signature = auth_info['UIDSignature']
        self._signature_timestamp = auth_info['signatureTimestamp']
        self._logged_in = True
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        production_id = self._match_id(url)
-        video_id, site_id = mobj.group('id', 'site_id')
+        production = self._download_json(
            'https://embed.mychannels.video/sdk/production/' + production_id,
            production_id, query={'options': 'UUUU_default'})['productions'][0]
        title = production['title']
-        webpage = self._download_webpage(url, video_id)
+        formats = []
-
+        for source in (production.get('sources') or []):
-        config = self._parse_json(
+            src = source.get('src')
-            self._search_regex(
+            if not src:
-                r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);',
+                continue
-                webpage, 'config', default='{}'), video_id,
+            ext = mimetype2ext(source.get('type'))
-            transform_source=lambda s: s.replace(
+            if ext == 'm3u8':
-                '\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'"))
+                formats.extend(self._extract_m3u8_formats(
-
+                    src, production_id, 'mp4', 'm3u8_native',
-        vod_id = config.get('vodId') or self._search_regex(
+                    m3u8_id='hls', fatal=False))
            (r'\\"vodId\\"\s*:\s*\\"(.+?)\\"',
             r'"vodId"\s*:\s*"(.+?)"',
             r'<[^>]+id=["\']vod-(\d+)'),
            webpage, 'video_id', default=None)
        # clip, no authentication required
        if not vod_id:
            player = self._parse_json(
                self._search_regex(
                    r'vmmaplayer\(({.+?})\);', webpage, 'vmma player',
                    default=''),
                video_id, transform_source=lambda s: '[%s]' % s, fatal=False)
            if player:
                video = player[-1]
                if video['videoUrl'] in ('http', 'https'):
                    return self.url_result(video['url'], MedialaanIE.ie_key())
                info = {
                    'id': video_id,
                    'url': video['videoUrl'],
                    'title': video['title'],
                    'thumbnail': video.get('imageUrl'),
                    'timestamp': int_or_none(video.get('createdDate')),
                    'duration': int_or_none(video.get('duration')),
                }
            else:
-                info = self._parse_html5_media_entries(
+                formats.append({
-                    url, webpage, video_id, m3u8_id='hls')[0]
+                    'ext': ext,
-                info.update({
+                    'url': src,
                    'id': video_id,
                    'title': self._html_search_meta('description', webpage),
                    'duration': parse_duration(self._html_search_meta('duration', webpage)),
                })
        # vod, authentication required
        else:
            if not self._logged_in:
                self._login()
            settings = self._parse_json(
                self._search_regex(
                    r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
                    webpage, 'drupal settings', default='{}'),
                video_id)
            def get(container, item):
                return try_get(
                    settings, lambda x: x[container][item],
                    compat_str) or self._search_regex(
                    r'"%s"\s*:\s*"([^"]+)' % item, webpage, item,
                    default=None)
            app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch')
            sso = get('vod', 'gigyaDatabase') or 'vtm-sso'
            data = self._download_json(
                'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id,
                video_id, query={
                    'app_id': app_id,
                    'user_network': sso,
                    'UID': self._uid,
                    'UIDSignature': self._uid_signature,
                    'signatureTimestamp': self._signature_timestamp,
                })
            formats = self._extract_m3u8_formats(
                data['response']['uri'], video_id, entry_protocol='m3u8_native',
                ext='mp4', m3u8_id='hls')
        self._sort_formats(formats)
-            info = {
+        return {
-                'id': vod_id,
+            'id': production_id,
            'title': title,
            'formats': formats,
            'thumbnail': production.get('posterUrl'),
            'timestamp': parse_iso8601(production.get('publicationDate'), ' '),
            'duration': int_or_none(production.get('duration')) or None,
        }
            api_key = get('vod', 'apiKey')
            channel = get('medialaanGigya', 'channel')
            if api_key:
                videos = self._download_json(
                    'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False,
                    query={
                        'channels': channel,
                        'ids': vod_id,
                        'limit': 1,
                        'apikey': api_key,
                    })
                if videos:
                    video = try_get(
                        videos, lambda x: x['response']['videos'][0], dict)
                    if video:
                        def get(container, item, expected_type=None):
                            return try_get(
                                video, lambda x: x[container][item], expected_type)
                        def get_string(container, item):
                            return get(container, item, compat_str)
                        info.update({
                            'series': get_string('program', 'title'),
                            'season': get_string('season', 'title'),
                            'season_number': int_or_none(get('season', 'number')),
                            'season_id': get_string('season', 'id'),
                            'episode': get_string('episode', 'title'),
                            'episode_number': int_or_none(get('episode', 'number')),
                            'episode_id': get_string('episode', 'id'),
                            'duration': int_or_none(
                                video.get('duration')) or int_or_none(
                                video.get('durationMillis'), scale=1000),
                            'title': get_string('episode', 'title'),
                            'description': get_string('episode', 'text'),
                            'timestamp': unified_timestamp(get_string(
                                'publication', 'begin')),
                        })
            if not info.get('title'):
                info['title'] = try_get(
                    config, lambda x: x['videoConfig']['title'],
                    compat_str) or self._html_search_regex(
                    r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title',
                    default=None) or self._og_search_title(webpage)
        if not info.get('description'):
            info['description'] = self._html_search_regex(
                r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>',
                webpage, 'description', default=None)
        return info
--- a/haruhi_dl/extractor/mediaset.py
+++ b/haruhi_dl/extractor/mediaset.py
@ -23,7 +23,7 @@ class MediasetIE(ThePlatformBaseIE):
                        https?://
                            (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
                            (?:
-                                (?:video|on-demand)/(?:[^/]+/)+[^/]+_|
+                                (?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_|
                                player/index\.html\?.*?\bprogramGuid=
                            )
                    )(?P<id>[0-9A-Z]{16,})
@ -88,6 +88,9 @@ class MediasetIE(ThePlatformBaseIE):
    }, {
        'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135',
        'only_matching': True,
    }, {
        'url': 'https://www.mediasetplay.mediaset.it/movie/herculeslaleggendahainizio/hercules-la-leggenda-ha-inizio_F305927501000102',
        'only_matching': True,
    }]
    @staticmethod
--- a/haruhi_dl/extractor/mgtv.py
+++ b/haruhi_dl/extractor/mgtv.py
@ -17,9 +17,8 @@ from ..utils import (
 class MGTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
+    _VALID_URL = r'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
    IE_DESC = '芒果TV'
    _GEO_COUNTRIES = ['CN']
    _TESTS = [{
        'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
@ -34,14 +33,18 @@ class MGTVIE(InfoExtractor):
    }, {
        'url': 'http://www.mgtv.com/b/301817/3826653.html',
        'only_matching': True,
    }, {
        'url': 'https://w.mgtv.com/b/301817/3826653.html',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        tk2 = base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1]
        try:
            api_data = self._download_json(
                'https://pcweb.api.mgtv.com/player/video', video_id, query={
-                    'tk2': base64.urlsafe_b64encode(b'did=%s|pno=1030|ver=0.3.0301|clit=%d' % (compat_str(uuid.uuid4()).encode(), time.time()))[::-1],
+                    'tk2': tk2,
                    'video_id': video_id,
                }, headers=self.geo_verification_headers())['data']
        except ExtractorError as e:
@ -56,6 +59,7 @@ class MGTVIE(InfoExtractor):
        stream_data = self._download_json(
            'https://pcweb.api.mgtv.com/player/getSource', video_id, query={
                'pm2': api_data['atc']['pm2'],
                'tk2': tk2,
                'video_id': video_id,
            }, headers=self.geo_verification_headers())['data']
        stream_domain = stream_data['stream_domain'][0]
--- a/haruhi_dl/extractor/minds.py
+++ b/haruhi_dl/extractor/minds.py
@ -0,0 +1,196 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    clean_html,
    int_or_none,
    str_or_none,
    strip_or_none,
 )
 class MindsBaseIE(InfoExtractor):
    _VALID_URL_BASE = r'https?://(?:www\.)?minds\.com/'
    def _call_api(self, path, video_id, resource, query=None):
        api_url = 'https://www.minds.com/api/' + path
        token = self._get_cookies(api_url).get('XSRF-TOKEN')
        return self._download_json(
            api_url, video_id, 'Downloading %s JSON metadata' % resource, headers={
                'Referer': 'https://www.minds.com/',
                'X-XSRF-TOKEN': token.value if token else '',
            }, query=query)
 class MindsIE(MindsBaseIE):
    IE_NAME = 'minds'
    _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?:media|newsfeed|archive/view)/(?P<id>[0-9]+)'
    _TESTS = [{
        'url': 'https://www.minds.com/media/100000000000086822',
        'md5': '215a658184a419764852239d4970b045',
        'info_dict': {
            'id': '100000000000086822',
            'ext': 'mp4',
            'title': 'Minds intro sequence',
            'thumbnail': r're:https?://.+\.png',
            'uploader_id': 'ottman',
            'upload_date': '20130524',
            'timestamp': 1369404826,
            'uploader': 'Bill Ottman',
            'view_count': int,
            'like_count': int,
            'dislike_count': int,
            'tags': ['animation'],
            'comment_count': int,
            'license': 'attribution-cc',
        },
    }, {
        # entity.type == 'activity' and empty title
        'url': 'https://www.minds.com/newsfeed/798025111988506624',
        'md5': 'b2733a74af78d7fd3f541c4cbbaa5950',
        'info_dict': {
            'id': '798022190320226304',
            'ext': 'mp4',
            'title': '798022190320226304',
            'uploader': 'ColinFlaherty',
            'upload_date': '20180111',
            'timestamp': 1515639316,
            'uploader_id': 'ColinFlaherty',
        },
    }, {
        'url': 'https://www.minds.com/archive/view/715172106794442752',
        'only_matching': True,
    }, {
        # youtube perma_url
        'url': 'https://www.minds.com/newsfeed/1197131838022602752',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        entity_id = self._match_id(url)
        entity = self._call_api(
            'v1/entities/entity/' + entity_id, entity_id, 'entity')['entity']
        if entity.get('type') == 'activity':
            if entity.get('custom_type') == 'video':
                video_id = entity['entity_guid']
            else:
                return self.url_result(entity['perma_url'])
        else:
            assert(entity['subtype'] == 'video')
            video_id = entity_id
        # 1080p and webm formats available only on the sources array
        video = self._call_api(
            'v2/media/video/' + video_id, video_id, 'video')
        formats = []
        for source in (video.get('sources') or []):
            src = source.get('src')
            if not src:
                continue
            formats.append({
                'format_id': source.get('label'),
                'height': int_or_none(source.get('size')),
                'url': src,
            })
        self._sort_formats(formats)
        entity = video.get('entity') or entity
        owner = entity.get('ownerObj') or {}
        uploader_id = owner.get('username')
        tags = entity.get('tags')
        if tags and isinstance(tags, compat_str):
            tags = [tags]
        thumbnail = None
        poster = video.get('poster') or entity.get('thumbnail_src')
        if poster:
            urlh = self._request_webpage(poster, video_id, fatal=False)
            if urlh:
                thumbnail = urlh.geturl()
        return {
            'id': video_id,
            'title': entity.get('title') or video_id,
            'formats': formats,
            'description': clean_html(entity.get('description')) or None,
            'license': str_or_none(entity.get('license')),
            'timestamp': int_or_none(entity.get('time_created')),
            'uploader': strip_or_none(owner.get('name')),
            'uploader_id': uploader_id,
            'uploader_url': 'https://www.minds.com/' + uploader_id if uploader_id else None,
            'view_count': int_or_none(entity.get('play:count')),
            'like_count': int_or_none(entity.get('thumbs:up:count')),
            'dislike_count': int_or_none(entity.get('thumbs:down:count')),
            'tags': tags,
            'comment_count': int_or_none(entity.get('comments:count')),
            'thumbnail': thumbnail,
        }
 class MindsFeedBaseIE(MindsBaseIE):
    _PAGE_SIZE = 150
    def _entries(self, feed_id):
        query = {'limit': self._PAGE_SIZE, 'sync': 1}
        i = 1
        while True:
            data = self._call_api(
                'v2/feeds/container/%s/videos' % feed_id,
                feed_id, 'page %s' % i, query)
            entities = data.get('entities') or []
            for entity in entities:
                guid = entity.get('guid')
                if not guid:
                    continue
                yield self.url_result(
                    'https://www.minds.com/newsfeed/' + guid,
                    MindsIE.ie_key(), guid)
            query['from_timestamp'] = data['load-next']
            if not (query['from_timestamp'] and len(entities) == self._PAGE_SIZE):
                break
            i += 1
    def _real_extract(self, url):
        feed_id = self._match_id(url)
        feed = self._call_api(
            'v1/%s/%s' % (self._FEED_PATH, feed_id),
            feed_id, self._FEED_TYPE)[self._FEED_TYPE]
        return self.playlist_result(
            self._entries(feed['guid']), feed_id,
            strip_or_none(feed.get('name')),
            feed.get('briefdescription'))
 class MindsChannelIE(MindsFeedBaseIE):
    _FEED_TYPE = 'channel'
    IE_NAME = 'minds:' + _FEED_TYPE
    _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?!(?:newsfeed|media|api|archive|groups)/)(?P<id>[^/?&#]+)'
    _FEED_PATH = 'channel'
    _TEST = {
        'url': 'https://www.minds.com/ottman',
        'info_dict': {
            'id': 'ottman',
            'title': 'Bill Ottman',
            'description': 'Co-creator & CEO @minds',
        },
        'playlist_mincount': 54,
    }
 class MindsGroupIE(MindsFeedBaseIE):
    _FEED_TYPE = 'group'
    IE_NAME = 'minds:' + _FEED_TYPE
    _VALID_URL = MindsBaseIE._VALID_URL_BASE + r'groups/profile/(?P<id>[0-9]+)'
    _FEED_PATH = 'groups/group'
    _TEST = {
        'url': 'https://www.minds.com/groups/profile/785582576369672204/feed/videos',
        'info_dict': {
            'id': '785582576369672204',
            'title': 'Cooking Videos',
        },
        'playlist_mincount': 1,
    }
--- a/haruhi_dl/extractor/mitele.py
+++ b/haruhi_dl/extractor/mitele.py
@ -1,15 +1,14 @@
 # coding: utf-8
 from __future__ import unicode_literals
-from .common import InfoExtractor
+from .telecinco import TelecincoIE
 from ..utils import (
    int_or_none,
    parse_iso8601,
    smuggle_url,
 )
-class MiTeleIE(InfoExtractor):
+class MiTeleIE(TelecincoIE):
    IE_DESC = 'mitele.es'
    _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
@ -31,7 +30,6 @@ class MiTeleIE(InfoExtractor):
            'timestamp': 1471209401,
            'upload_date': '20160814',
        },
        'add_ie': ['Ooyala'],
    }, {
        # no explicit title
        'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
@ -54,7 +52,6 @@ class MiTeleIE(InfoExtractor):
        'params': {
            'skip_download': True,
        },
        'add_ie': ['Ooyala'],
    }, {
        'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
        'only_matching': True,
@ -70,16 +67,11 @@ class MiTeleIE(InfoExtractor):
            r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})',
            webpage, 'Pre Player'), display_id)['prePlayer']
        title = pre_player['title']
-        video = pre_player['video']
+        video_info = self._parse_content(pre_player['video'], url)
        video_id = video['dataMediaId']
        content = pre_player.get('content') or {}
        info = content.get('info') or {}
-        return {
+        video_info.update({
            '_type': 'url_transparent',
            # for some reason only HLS is supported
            'url': smuggle_url('ooyala:' + video_id, {'supportedformats': 'm3u8,dash'}),
            'id': video_id,
            'title': title,
            'description': info.get('synopsis'),
            'series': content.get('title'),
@ -87,7 +79,7 @@ class MiTeleIE(InfoExtractor):
            'episode': content.get('subtitle'),
            'episode_number': int_or_none(info.get('episode_number')),
            'duration': int_or_none(info.get('duration')),
            'thumbnail': video.get('dataPoster'),
            'age_limit': int_or_none(info.get('rating')),
            'timestamp': parse_iso8601(pre_player.get('publishedTime')),
-        }
+        })
        return video_info
--- a/haruhi_dl/extractor/mixcloud.py
+++ b/haruhi_dl/extractor/mixcloud.py
@ -251,8 +251,11 @@ class MixcloudPlaylistBaseIE(MixcloudBaseIE):
                cloudcast_url = cloudcast.get('url')
                if not cloudcast_url:
                    continue
                slug = try_get(cloudcast, lambda x: x['slug'], compat_str)
                owner_username = try_get(cloudcast, lambda x: x['owner']['username'], compat_str)
                video_id = '%s_%s' % (owner_username, slug) if slug and owner_username else None
                entries.append(self.url_result(
-                    cloudcast_url, MixcloudIE.ie_key(), cloudcast.get('slug')))
+                    cloudcast_url, MixcloudIE.ie_key(), video_id))
            page_info = items['pageInfo']
            has_next_page = page_info['hasNextPage']
@ -321,7 +324,8 @@ class MixcloudUserIE(MixcloudPlaylistBaseIE):
    _DESCRIPTION_KEY = 'biog'
    _ROOT_TYPE = 'user'
    _NODE_TEMPLATE = '''slug
-          url'''
+          url
          owner { username }'''
    def _get_playlist_title(self, title, slug):
        return '%s (%s)' % (title, slug)
@ -345,6 +349,7 @@ class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
    _NODE_TEMPLATE = '''cloudcast {
            slug
            url
            owner { username }
          }'''
    def _get_cloudcast(self, node):
--- a/haruhi_dl/extractor/motherless.py
+++ b/haruhi_dl/extractor/motherless.py
@ -61,6 +61,23 @@ class MotherlessIE(InfoExtractor):
        # no keywords
        'url': 'http://motherless.com/8B4BBC1',
        'only_matching': True,
    }, {
        # see https://motherless.com/videos/recent for recent videos with
        # uploaded date in "ago" format
        'url': 'https://motherless.com/3C3E2CF',
        'info_dict': {
            'id': '3C3E2CF',
            'ext': 'mp4',
            'title': 'a/ Hot Teens',
            'categories': list,
            'upload_date': '20210104',
            'uploader_id': 'yonbiw',
            'thumbnail': r're:https?://.*\.jpg',
            'age_limit': 18,
        },
        'params': {
            'skip_download': True,
        },
    }]
    def _real_extract(self, url):
@ -85,20 +102,28 @@ class MotherlessIE(InfoExtractor):
            or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
        age_limit = self._rta_search(webpage)
        view_count = str_to_int(self._html_search_regex(
-            (r'>(\d+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
+            (r'>([\d,.]+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
            webpage, 'view count', fatal=False))
        like_count = str_to_int(self._html_search_regex(
-            (r'>(\d+)\s+Favorites<', r'<strong>Favorited</strong>\s+([^<]+)<'),
+            (r'>([\d,.]+)\s+Favorites<',
             r'<strong>Favorited</strong>\s+([^<]+)<'),
            webpage, 'like count', fatal=False))
-        upload_date = self._html_search_regex(
+        upload_date = unified_strdate(self._search_regex(
-            (r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<',
+            r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', webpage,
-             r'<strong>Uploaded</strong>\s+([^<]+)<'), webpage, 'upload date')
+            'upload date', default=None))
-        if 'Ago' in upload_date:
+        if not upload_date:
-            days = int(re.search(r'([0-9]+)', upload_date).group(1))
+            uploaded_ago = self._search_regex(
-            upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
+                r'>\s*(\d+[hd])\s+[aA]go\b', webpage, 'uploaded ago',
-        else:
+                default=None)
-            upload_date = unified_strdate(upload_date)
+            if uploaded_ago:
                delta = int(uploaded_ago[:-1])
                _AGO_UNITS = {
                    'h': 'hours',
                    'd': 'days',
                }
                kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
                upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
        comment_count = webpage.count('class="media-comment-contents"')
        uploader_id = self._html_search_regex(
--- a/haruhi_dl/extractor/mtv.py
+++ b/haruhi_dl/extractor/mtv.py
@ -253,6 +253,10 @@ class MTVServicesInfoExtractor(InfoExtractor):
        return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
    @staticmethod
    def _extract_child_with_type(parent, t):
        return next(c for c in parent['children'] if c.get('type') == t)
    def _extract_mgid(self, webpage):
        try:
            # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
@ -278,6 +282,13 @@ class MTVServicesInfoExtractor(InfoExtractor):
        if not mgid:
            mgid = self._extract_triforce_mgid(webpage)
        if not mgid:
            data = self._parse_json(self._search_regex(
                r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
            main_container = self._extract_child_with_type(data, 'MainContainer')
            video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
            mgid = video_player['props']['media']['video']['config']['uri']
        return mgid
    def _real_extract(self, url):
--- a/haruhi_dl/extractor/nba.py
+++ b/haruhi_dl/extractor/nba.py
@ -5,33 +5,137 @@ import re
 from .turner import TurnerBaseIE
 from ..compat import (
-    compat_urllib_parse_urlencode,
+    compat_parse_qs,
-    compat_urlparse,
+    compat_str,
    compat_urllib_parse_unquote,
    compat_urllib_parse_urlparse,
 )
 from ..utils import (
    int_or_none,
    merge_dicts,
    OnDemandPagedList,
-    remove_start,
+    parse_duration,
    parse_iso8601,
    try_get,
    update_url_query,
    urljoin,
 )
-class NBAIE(TurnerBaseIE):
+class NBACVPBaseIE(TurnerBaseIE):
-    _VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?P<path>(?:[^/]+/)+(?P<id>[^?]*?))/?(?:/index\.html)?(?:\?.*)?$'
+    def _extract_nba_cvp_info(self, path, video_id, fatal=False):
        return self._extract_cvp_info(
            'http://secure.nba.com/%s' % path, video_id, {
                'default': {
                    'media_src': 'http://nba.cdn.turner.com/nba/big',
                },
                'm3u8': {
                    'media_src': 'http://nbavod-f.akamaihd.net',
                },
            }, fatal=fatal)
 class NBAWatchBaseIE(NBACVPBaseIE):
    _VALID_URL_BASE = r'https?://(?:(?:www\.)?nba\.com(?:/watch)?|watch\.nba\.com)/'
    def _extract_video(self, filter_key, filter_value):
        video = self._download_json(
            'https://neulionscnbav2-a.akamaihd.net/solr/nbad_program/usersearch',
            filter_value, query={
                'fl': 'description,image,name,pid,releaseDate,runtime,tags,seoName',
                'q': filter_key + ':' + filter_value,
                'wt': 'json',
            })['response']['docs'][0]
        video_id = str(video['pid'])
        title = video['name']
        formats = []
        m3u8_url = (self._download_json(
            'https://watch.nba.com/service/publishpoint', video_id, query={
                'type': 'video',
                'format': 'json',
                'id': video_id,
            }, headers={
                'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
            }, fatal=False) or {}).get('path')
        if m3u8_url:
            m3u8_formats = self._extract_m3u8_formats(
                re.sub(r'_(?:pc|iphone)\.', '.', m3u8_url), video_id, 'mp4',
                'm3u8_native', m3u8_id='hls', fatal=False)
            formats.extend(m3u8_formats)
            for f in m3u8_formats:
                http_f = f.copy()
                http_f.update({
                    'format_id': http_f['format_id'].replace('hls-', 'http-'),
                    'protocol': 'http',
                    'url': http_f['url'].replace('.m3u8', ''),
                })
                formats.append(http_f)
        info = {
            'id': video_id,
            'title': title,
            'thumbnail': urljoin('https://nbadsdmt.akamaized.net/media/nba/nba/thumbs/', video.get('image')),
            'description': video.get('description'),
            'duration': int_or_none(video.get('runtime')),
            'timestamp': parse_iso8601(video.get('releaseDate')),
            'tags': video.get('tags'),
        }
        seo_name = video.get('seoName')
        if seo_name and re.search(r'\d{4}/\d{2}/\d{2}/', seo_name):
            base_path = ''
            if seo_name.startswith('teams/'):
                base_path += seo_name.split('/')[1] + '/'
            base_path += 'video/'
            cvp_info = self._extract_nba_cvp_info(
                base_path + seo_name + '.xml', video_id, False)
            if cvp_info:
                formats.extend(cvp_info['formats'])
                info = merge_dicts(info, cvp_info)
        self._sort_formats(formats)
        info['formats'] = formats
        return info
 class NBAWatchEmbedIE(NBAWatchBaseIE):
    IENAME = 'nba:watch:embed'
    _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://watch.nba.com/embed?id=659395',
        'md5': 'b7e3f9946595f4ca0a13903ce5edd120',
        'info_dict': {
            'id': '659395',
            'ext': 'mp4',
            'title': 'Mix clip: More than 7 points of  Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
            'description': 'Mix clip: More than 7 points of  Joe Ingles, Luc Mbah a Moute, Blake Griffin and 6 more in Utah Jazz vs. the Clippers, 4/15/2017',
            'timestamp': 1492228800,
            'upload_date': '20170415',
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        return self._extract_video('pid', video_id)
 class NBAWatchIE(NBAWatchBaseIE):
    IE_NAME = 'nba:watch'
    _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P<id>.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)'
    _TESTS = [{
        'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
-        'md5': '9e7729d3010a9c71506fd1248f74e4f4',
+        'md5': '9d902940d2a127af3f7f9d2f3dc79c96',
        'info_dict': {
-            'id': '0021200253-okc-bkn-recap',
+            'id': '70946',
            'ext': 'mp4',
            'title': 'Thunder vs. Nets',
            'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
            'duration': 181,
-            'timestamp': 1354638466,
+            'timestamp': 1354597200,
            'upload_date': '20121204',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }, {
        'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
        'only_matching': True,
@ -39,116 +143,286 @@ class NBAIE(TurnerBaseIE):
        'url': 'http://watch.nba.com/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
        'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
        'info_dict': {
-            'id': 'channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
+            'id': '330865',
            'ext': 'mp4',
            'title': 'Hawks vs. Cavaliers Game 1',
            'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
            'duration': 228,
-            'timestamp': 1432134543,
+            'timestamp': 1432094400,
-            'upload_date': '20150520',
+            'upload_date': '20150521',
        },
        'expected_warnings': ['Unable to download f4m manifest'],
    }, {
        'url': 'http://www.nba.com/clippers/news/doc-rivers-were-not-trading-blake',
        'info_dict': {
            'id': 'teams/clippers/2016/02/17/1455672027478-Doc_Feb16_720.mov-297324',
            'ext': 'mp4',
            'title': 'Practice: Doc Rivers - 2/16/16',
            'description': 'Head Coach Doc Rivers addresses the media following practice.',
            'upload_date': '20160216',
            'timestamp': 1455672000,
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
        'expected_warnings': ['Unable to download f4m manifest'],
    }, {
        'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
        'info_dict': {
            'id': 'timberwolves',
            'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
        },
        'playlist_count': 30,
        'params': {
            # Download the whole playlist takes too long time
            'playlist_items': '1-30',
        },
    }, {
-        'url': 'http://www.nba.com/timberwolves/wiggins-shootaround#',
+        'url': 'http://watch.nba.com/nba/video/channels/nba_tv/2015/06/11/YT_go_big_go_home_Game4_061115',
-        'info_dict': {
+        'only_matching': True,
-            'id': 'teams/timberwolves/2014/12/12/Wigginsmp4-3462601',
+    }, {
-            'ext': 'mp4',
+        # only CVP mp4 format available
-            'title': 'Shootaround Access - Dec. 12 | Andrew Wiggins',
+        'url': 'https://watch.nba.com/video/teams/cavaliers/2012/10/15/sloan121015mov-2249106',
-            'description': 'Wolves rookie Andrew Wiggins addresses the media after Friday\'s shootaround.',
+        'only_matching': True,
-            'upload_date': '20141212',
+    }, {
-            'timestamp': 1418418600,
+        'url': 'https://watch.nba.com/video/top-100-dunks-from-the-2019-20-season?plsrc=nba&collection=2019-20-season-highlights',
-        },
+        'only_matching': True,
        'params': {
            'noplaylist': True,
            # m3u8 download
            'skip_download': True,
        },
        'expected_warnings': ['Unable to download f4m manifest'],
    }]
-    _PAGE_SIZE = 30
+    def _real_extract(self, url):
-
+        display_id = self._match_id(url)
-    def _fetch_page(self, team, video_id, page):
+        collection_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('collection', [None])[0]
-        search_url = 'http://searchapp2.nba.com/nba-search/query.jsp?' + compat_urllib_parse_urlencode({
+        if collection_id:
            'type': 'teamvideo',
            'start': page * self._PAGE_SIZE + 1,
            'npp': (page + 1) * self._PAGE_SIZE + 1,
            'sort': 'recent',
            'output': 'json',
            'site': team,
        })
        results = self._download_json(
            search_url, video_id, note='Download page %d of playlist data' % page)['results'][0]
        for item in results:
            yield self.url_result(compat_urlparse.urljoin('http://www.nba.com/', item['url']))
    def _extract_playlist(self, orig_path, video_id, webpage):
        team = orig_path.split('/')[0]
            if self._downloader.params.get('noplaylist'):
-            self.to_screen('Downloading just video because of --no-playlist')
+                self.to_screen('Downloading just video %s because of --no-playlist' % display_id)
-            video_path = self._search_regex(
+            else:
-                r'nbaVideoCore\.firstVideo\s*=\s*\'([^\']+)\';', webpage, 'video path')
+                self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % collection_id)
-            video_url = 'http://www.nba.com/%s/video/%s' % (team, video_path)
+                return self.url_result(
-            return self.url_result(video_url)
+                    'https://www.nba.com/watch/list/collection/' + collection_id,
                    NBAWatchCollectionIE.ie_key(), collection_id)
        return self._extract_video('seoName', display_id)
        self.to_screen('Downloading playlist - add --no-playlist to just download video')
        playlist_title = self._og_search_title(webpage, fatal=False)
        entries = OnDemandPagedList(
            functools.partial(self._fetch_page, team, video_id),
            self._PAGE_SIZE)
-        return self.playlist_result(entries, team, playlist_title)
+class NBAWatchCollectionIE(NBAWatchBaseIE):
    IE_NAME = 'nba:watch:collection'
    _VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://watch.nba.com/list/collection/season-preview-2020',
        'info_dict': {
            'id': 'season-preview-2020',
        },
        'playlist_mincount': 43,
    }]
    _PAGE_SIZE = 100
    def _fetch_page(self, collection_id, page):
        page += 1
        videos = self._download_json(
            'https://content-api-prod.nba.com/public/1/endeavor/video-list/collection/' + collection_id,
            collection_id, 'Downloading page %d JSON metadata' % page, query={
                'count': self._PAGE_SIZE,
                'page': page,
            })['results']['videos']
        for video in videos:
            program = video.get('program') or {}
            seo_name = program.get('seoName') or program.get('slug')
            if not seo_name:
                continue
            yield {
                '_type': 'url',
                'id': program.get('id'),
                'title': program.get('title') or video.get('title'),
                'url': 'https://www.nba.com/watch/video/' + seo_name,
                'thumbnail': video.get('image'),
                'description': program.get('description') or video.get('description'),
                'duration': parse_duration(program.get('runtimeHours')),
                'timestamp': parse_iso8601(video.get('releaseDate')),
            }
    def _real_extract(self, url):
-        path, video_id = re.match(self._VALID_URL, url).groups()
+        collection_id = self._match_id(url)
-        orig_path = path
+        entries = OnDemandPagedList(
-        if path.startswith('nba/'):
+            functools.partial(self._fetch_page, collection_id),
-            path = path[3:]
+            self._PAGE_SIZE)
        return self.playlist_result(entries, collection_id)
        if 'video/' not in path:
            webpage = self._download_webpage(url, video_id)
            path = remove_start(self._search_regex(r'data-videoid="([^"]+)"', webpage, 'video id'), '/')
-            if path == '{{id}}':
+class NBABaseIE(NBACVPBaseIE):
-                return self._extract_playlist(orig_path, video_id, webpage)
+    _VALID_URL_BASE = r'''(?x)
        https?://(?:www\.)?nba\.com/
            (?P<team>
                blazers|
                bucks|
                bulls|
                cavaliers|
                celtics|
                clippers|
                grizzlies|
                hawks|
                heat|
                hornets|
                jazz|
                kings|
                knicks|
                lakers|
                magic|
                mavericks|
                nets|
                nuggets|
                pacers|
                pelicans|
                pistons|
                raptors|
                rockets|
                sixers|
                spurs|
                suns|
                thunder|
                timberwolves|
                warriors|
                wizards
            )
        (?:/play\#)?/'''
    _CHANNEL_PATH_REGEX = r'video/channel|series'
-            # See prepareContentId() of pkgCvp.js
+    def _embed_url_result(self, team, content_id):
-            if path.startswith('video/teams'):
+        return self.url_result(update_url_query(
-                path = 'video/channels/proxy/' + path[6:]
+            'https://secure.nba.com/assets/amp/include/video/iframe.html', {
                'contentId': content_id,
                'team': team,
            }), NBAEmbedIE.ie_key())
-        return self._extract_cvp_info(
+    def _call_api(self, team, content_id, query, resource):
-            'http://www.nba.com/%s.xml' % path, video_id, {
+        return self._download_json(
-                'default': {
+            'https://api.nba.net/2/%s/video,imported_video,wsc/' % team,
-                    'media_src': 'http://nba.cdn.turner.com/nba/big',
+            content_id, 'Download %s JSON metadata' % resource,
-                },
+            query=query, headers={
-                'm3u8': {
+                'accessToken': 'internal|bb88df6b4c2244e78822812cecf1ee1b',
-                    'media_src': 'http://nbavod-f.akamaihd.net',
+            })['response']['result']
-                },
+
    def _extract_video(self, video, team, extract_all=True):
        video_id = compat_str(video['nid'])
        team = video['brand']
        info = {
            'id': video_id,
            'title': video.get('title') or video.get('headline') or video['shortHeadline'],
            'description': video.get('description'),
            'timestamp': parse_iso8601(video.get('published')),
        }
        subtitles = {}
        captions = try_get(video, lambda x: x['videoCaptions']['sidecars'], dict) or {}
        for caption_url in captions.values():
            subtitles.setdefault('en', []).append({'url': caption_url})
        formats = []
        mp4_url = video.get('mp4')
        if mp4_url:
            formats.append({
                'url': mp4_url,
            })
        if extract_all:
            source_url = video.get('videoSource')
            if source_url and not source_url.startswith('s3://') and self._is_valid_url(source_url, video_id, 'source'):
                formats.append({
                    'format_id': 'source',
                    'url': source_url,
                    'preference': 1,
                })
            m3u8_url = video.get('m3u8')
            if m3u8_url:
                if '.akamaihd.net/i/' in m3u8_url:
                    formats.extend(self._extract_akamai_formats(
                        m3u8_url, video_id, {'http': 'pmd.cdn.turner.com'}))
                else:
                    formats.extend(self._extract_m3u8_formats(
                        m3u8_url, video_id, 'mp4',
                        'm3u8_native', m3u8_id='hls', fatal=False))
            content_xml = video.get('contentXml')
            if team and content_xml:
                cvp_info = self._extract_nba_cvp_info(
                    team + content_xml, video_id, fatal=False)
                if cvp_info:
                    formats.extend(cvp_info['formats'])
                    subtitles = self._merge_subtitles(subtitles, cvp_info['subtitles'])
                    info = merge_dicts(info, cvp_info)
            self._sort_formats(formats)
        else:
            info.update(self._embed_url_result(team, video['videoId']))
        info.update({
            'formats': formats,
            'subtitles': subtitles,
        })
        return info
    def _real_extract(self, url):
        team, display_id = re.match(self._VALID_URL, url).groups()
        if '/play#/' in url:
            display_id = compat_urllib_parse_unquote(display_id)
        else:
            webpage = self._download_webpage(url, display_id)
            display_id = self._search_regex(
                self._CONTENT_ID_REGEX + r'\s*:\s*"([^"]+)"', webpage, 'video id')
        return self._extract_url_results(team, display_id)
 class NBAEmbedIE(NBABaseIE):
    IENAME = 'nba:embed'
    _VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
    _TESTS = [{
        'url': 'https://secure.nba.com/assets/amp/include/video/topIframe.html?contentId=teams/bulls/2020/12/04/3478774/1607105587854-20201204_SCHEDULE_RELEASE_FINAL_DRUPAL-3478774&team=bulls&adFree=false&profile=71&videoPlayerName=TAMPCVP&baseUrl=&videoAdsection=nba.com_mobile_web_teamsites_chicagobulls&ampEnv=',
        'only_matching': True,
    }, {
        'url': 'https://secure.nba.com/assets/amp/include/video/iframe.html?contentId=2016/10/29/0021600027boschaplay7&adFree=false&profile=71&team=&videoPlayerName=LAMPCVP',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
        content_id = qs['contentId'][0]
        team = qs.get('team', [None])[0]
        if not team:
            return self.url_result(
                'https://watch.nba.com/video/' + content_id, NBAWatchIE.ie_key())
        video = self._call_api(team, content_id, {'videoid': content_id}, 'video')[0]
        return self._extract_video(video, team)
 class NBAIE(NBABaseIE):
    IENAME = 'nba'
    _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?!%s)video/(?P<id>(?:[^/]+/)*[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
    _TESTS = [{
        'url': 'https://www.nba.com/bulls/video/teams/bulls/2020/12/04/3478774/1607105587854-20201204schedulereleasefinaldrupal-3478774',
        'info_dict': {
            'id': '45039',
            'ext': 'mp4',
            'title': 'AND WE BACK.',
            'description': 'Part 1 of our 2020-21 schedule is here! Watch our games on NBC Sports Chicago.',
            'duration': 94,
            'timestamp': 1607112000,
            'upload_date': '20201218',
        },
    }, {
        'url': 'https://www.nba.com/bucks/play#/video/teams%2Fbucks%2F2020%2F12%2F17%2F64860%2F1608252863446-Op_Dream_16x9-64860',
        'only_matching': True,
    }, {
        'url': 'https://www.nba.com/bucks/play#/video/wsc%2Fteams%2F2787C911AA1ACD154B5377F7577CCC7134B2A4B0',
        'only_matching': True,
    }]
    _CONTENT_ID_REGEX = r'videoID'
    def _extract_url_results(self, team, content_id):
        return self._embed_url_result(team, content_id)
 class NBAChannelIE(NBABaseIE):
    IENAME = 'nba:channel'
    _VALID_URL = NBABaseIE._VALID_URL_BASE + '(?:%s)/(?P<id>[^/?#&]+)' % NBABaseIE._CHANNEL_PATH_REGEX
    _TESTS = [{
        'url': 'https://www.nba.com/blazers/video/channel/summer_league',
        'info_dict': {
            'title': 'Summer League',
        },
        'playlist_mincount': 138,
    }, {
        'url': 'https://www.nba.com/bucks/play#/series/On%20This%20Date',
        'only_matching': True,
    }]
    _CONTENT_ID_REGEX = r'videoSubCategory'
    _PAGE_SIZE = 100
    def _fetch_page(self, team, channel, page):
        results = self._call_api(team, channel, {
            'channels': channel,
            'count': self._PAGE_SIZE,
            'offset': page * self._PAGE_SIZE,
        }, 'page %d' % (page + 1))
        for video in results:
            yield self._extract_video(video, team, False)
    def _extract_url_results(self, team, content_id):
        entries = OnDemandPagedList(
            functools.partial(self._fetch_page, team, content_id),
            self._PAGE_SIZE)
        return self.playlist_result(entries, playlist_title=content_id)
--- a/haruhi_dl/extractor/nbc.py
+++ b/haruhi_dl/extractor/nbc.py
@ -10,7 +10,6 @@ from .adobepass import AdobePassIE
 from ..compat import compat_urllib_parse_unquote
 from ..utils import (
    int_or_none,
    js_to_json,
    parse_duration,
    smuggle_url,
    try_get,
@ -159,7 +158,8 @@ class NBCIE(AdobePassIE):
 class NBCSportsVPlayerIE(InfoExtractor):
-    _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
+    _VALID_URL_BASE = r'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
    _VALID_URL = _VALID_URL_BASE + r'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
    _TESTS = [{
        'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
@ -175,12 +175,15 @@ class NBCSportsVPlayerIE(InfoExtractor):
    }, {
        'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z',
        'only_matching': True,
    }, {
        'url': 'https://www.nbcsports.com/vplayer/p/BxmELC/nbcsports/select/PHJSaFWbrTY9?form=html&autoPlay=true',
        'only_matching': True,
    }]
    @staticmethod
    def _extract_url(webpage):
        iframe_m = re.search(
-            r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
+            r'<(?:iframe[^>]+|div[^>]+data-(?:mpx-)?)src="(?P<url>%s[^"]+)"' % NBCSportsVPlayerIE._VALID_URL_BASE, webpage)
        if iframe_m:
            return iframe_m.group('url')
@ -193,21 +196,29 @@ class NBCSportsVPlayerIE(InfoExtractor):
 class NBCSportsIE(InfoExtractor):
-    # Does not include https because its certificate is invalid
+    _VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
    _VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
-    _TEST = {
+    _TESTS = [{
        # iframe src
        'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
        'info_dict': {
            'id': 'PHJSaFWbrTY9',
-            'ext': 'flv',
+            'ext': 'mp4',
            'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
            'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
            'uploader': 'NBCU-SPORTS',
            'upload_date': '20150330',
            'timestamp': 1427726529,
        }
-    }
+    }, {
        # data-mpx-src
        'url': 'https://www.nbcsports.com/philadelphia/philadelphia-phillies/bruce-bochy-hector-neris-hes-idiot',
        'only_matching': True,
    }, {
        # data-src
        'url': 'https://www.nbcsports.com/boston/video/report-card-pats-secondary-no-match-josh-allen',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -275,33 +286,6 @@ class NBCSportsStreamIE(AdobePassIE):
        }
 class CSNNEIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)'
    _TEST = {
        'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter',
        'info_dict': {
            'id': 'yvBLLUgQ8WU0',
            'ext': 'mp4',
            'title': 'SNC evening update: Wright named Red Sox\' No. 5 starter.',
            'description': 'md5:1753cfee40d9352b19b4c9b3e589b9e3',
            'timestamp': 1459369979,
            'upload_date': '20160330',
            'uploader': 'NBCU-SPORTS',
        }
    }
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        return {
            '_type': 'url_transparent',
            'ie_key': 'ThePlatform',
            'url': self._html_search_meta('twitter:player:stream', webpage),
            'display_id': display_id,
        }
 class NBCNewsIE(ThePlatformIE):
    _VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
@ -394,8 +378,8 @@ class NBCNewsIE(ThePlatformIE):
        webpage = self._download_webpage(url, video_id)
        data = self._parse_json(self._search_regex(
-            r'window\.__data\s*=\s*({.+});', webpage,
+            r'<script[^>]+id="__NEXT_DATA__"[^>]*>({.+?})</script>',
-            'bootstrap json'), video_id, js_to_json)
+            webpage, 'bootstrap json'), video_id)['props']['initialState']
        video_data = try_get(data, lambda x: x['video']['current'], dict)
        if not video_data:
            video_data = data['article']['content'][0]['primaryMedia']['video']
--- a/haruhi_dl/extractor/ndr.py
+++ b/haruhi_dl/extractor/ndr.py
@ -81,6 +81,29 @@ class NDRIE(NDRBaseIE):
        'params': {
            'skip_download': True,
        },
    }, {
        # with subtitles
        'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html',
        'info_dict': {
            'id': 'extra18674',
            'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring',
            'ext': 'mp4',
            'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring',
            'description': 'md5:42ee53990a715eaaf4dc7f13a3bd56c6',
            'uploader': 'ndrtv',
            'upload_date': '20201113',
            'duration': 1749,
            'subtitles': {
                'de': [{
                    'ext': 'ttml',
                    'url': r're:^https://www\.ndr\.de.+',
                }],
            },
        },
        'params': {
            'skip_download': True,
        },
        'expected_warnings': ['Unable to download f4m manifest'],
    }, {
        'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
        'only_matching': True,
@ -239,6 +262,20 @@ class NDREmbedBaseIE(InfoExtractor):
                'preference': quality_key(thumbnail.get('quality')),
            })
        subtitles = {}
        tracks = config.get('tracks')
        if tracks and isinstance(tracks, list):
            for track in tracks:
                if not isinstance(track, dict):
                    continue
                track_url = urljoin(url, track.get('src'))
                if not track_url:
                    continue
                subtitles.setdefault(track.get('srclang') or 'de', []).append({
                    'url': track_url,
                    'ext': 'ttml',
                })
        return {
            'id': video_id,
            'title': title,
@ -248,6 +285,7 @@ class NDREmbedBaseIE(InfoExtractor):
            'duration': duration,
            'thumbnails': thumbnails,
            'formats': formats,
            'subtitles': subtitles,
        }
--- a/haruhi_dl/extractor/nfl.py
+++ b/haruhi_dl/extractor/nfl.py
@ -4,19 +4,15 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..compat import (
    compat_urllib_parse_urlparse,
 )
 from ..utils import (
-    ExtractorError,
+    clean_html,
-    int_or_none,
+    determine_ext,
-    remove_end,
+    get_element_by_class,
 )
-class NFLIE(InfoExtractor):
+class NFLBaseIE(InfoExtractor):
-    IE_NAME = 'nfl.com'
+    _VALID_URL_BASE = r'''(?x)
    _VALID_URL = r'''(?x)
                    https?://
                        (?P<host>
                            (?:www\.)?
@ -34,15 +30,15 @@ class NFLIE(InfoExtractor):
                                    houstontexans|
                                    colts|
                                    jaguars|
-                                    titansonline|
+                                    (?:titansonline|tennesseetitans)|
                                    denverbroncos|
-                                    kcchiefs|
+                                    (?:kc)?chiefs|
                                    raiders|
                                    chargers|
                                    dallascowboys|
                                    giants|
                                    philadelphiaeagles|
-                                    redskins|
+                                    (?:redskins|washingtonfootball)|
                                    chicagobears|
                                    detroitlions|
                                    packers|
@ -52,180 +48,113 @@ class NFLIE(InfoExtractor):
                                    neworleanssaints|
                                    buccaneers|
                                    azcardinals|
-                                    stlouisrams|
+                                    (?:stlouis|the)rams|
                                    49ers|
                                    seahawks
                                )\.com|
                                .+?\.clubs\.nfl\.com
                            )
                        )/
                        (?:.+?/)*
                        (?P<id>[^/#?&]+)
                    '''
    _VIDEO_CONFIG_REGEX = r'<script[^>]+id="[^"]*video-config-[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}[^"]*"[^>]*>\s*({.+})'
    _WORKING = False
    def _parse_video_config(self, video_config, display_id):
        video_config = self._parse_json(video_config, display_id)
        item = video_config['playlist'][0]
        mcp_id = item.get('mcpID')
        if mcp_id:
            info = self.url_result(
                'anvato:GXvEgwyJeWem8KCYXfeoHWknwP48Mboj:' + mcp_id,
                'Anvato', mcp_id)
        else:
            media_id = item.get('id') or item['entityId']
            title = item['title']
            item_url = item['url']
            info = {'id': media_id}
            ext = determine_ext(item_url)
            if ext == 'm3u8':
                info['formats'] = self._extract_m3u8_formats(item_url, media_id, 'mp4')
                self._sort_formats(info['formats'])
            else:
                info['url'] = item_url
                if item.get('audio') is True:
                    info['vcodec'] = 'none'
            is_live = video_config.get('live') is True
            thumbnails = None
            image_url = item.get(item.get('imageSrc')) or item.get(item.get('posterImage'))
            if image_url:
                thumbnails = [{
                    'url': image_url,
                    'ext': determine_ext(image_url, 'jpg'),
                }]
            info.update({
                'title': self._live_title(title) if is_live else title,
                'is_live': is_live,
                'description': clean_html(item.get('description')),
                'thumbnails': thumbnails,
            })
        return info
 class NFLIE(NFLBaseIE):
    IE_NAME = 'nfl.com'
    _VALID_URL = NFLBaseIE._VALID_URL_BASE + r'(?:videos?|listen|audio)/(?P<id>[^/#?&]+)'
    _TESTS = [{
-        'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights',
+        'url': 'https://www.nfl.com/videos/baker-mayfield-s-game-changing-plays-from-3-td-game-week-14',
        'md5': '394ef771ddcd1354f665b471d78ec4c6',
        'info_dict': {
-            'id': '0ap3000000398478',
+            'id': '899441',
            'ext': 'mp4',
-            'title': 'Week 3: Redskins vs. Eagles highlights',
+            'title': "Baker Mayfield's game-changing plays from 3-TD game Week 14",
-            'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478',
+            'description': 'md5:85e05a3cc163f8c344340f220521136d',
-            'upload_date': '20140921',
+            'upload_date': '20201215',
-            'timestamp': 1411337580,
+            'timestamp': 1608009755,
            'thumbnail': r're:^https?://.*\.jpg$',
            'uploader': 'NFL',
        }
    }, {
-        'url': 'http://prod.www.steelers.clubs.nfl.com/video-and-audio/videos/LIVE_Post_Game_vs_Browns/9d72f26a-9e2b-4718-84d3-09fb4046c266',
+        'url': 'https://www.chiefs.com/listen/patrick-mahomes-travis-kelce-react-to-win-over-dolphins-the-breakdown',
-        'md5': 'cf85bdb4bc49f6e9d3816d130c78279c',
+        'md5': '6886b32c24b463038c760ceb55a34566',
        'info_dict': {
-            'id': '9d72f26a-9e2b-4718-84d3-09fb4046c266',
+            'id': 'd87e8790-3e14-11eb-8ceb-ff05c2867f99',
-            'ext': 'mp4',
+            'ext': 'mp3',
-            'title': 'LIVE: Post Game vs. Browns',
+            'title': 'Patrick Mahomes, Travis Kelce React to Win Over Dolphins | The Breakdown',
-            'description': 'md5:6a97f7e5ebeb4c0e69a418a89e0636e8',
+            'description': 'md5:12ada8ee70e6762658c30e223e095075',
            'upload_date': '20131229',
            'timestamp': 1388354455,
            'thumbnail': r're:^https?://.*\.jpg$',
        }
    }, {
-        'url': 'http://www.nfl.com/news/story/0ap3000000467586/article/patriots-seahawks-involved-in-lategame-skirmish',
+        'url': 'https://www.buffalobills.com/video/buffalo-bills-military-recognition-week-14',
        'info_dict': {
            'id': '0ap3000000467607',
            'ext': 'mp4',
            'title': 'Frustrations flare on the field',
            'description': 'Emotions ran high at the end of the Super Bowl on both sides of the ball after a dramatic finish.',
            'timestamp': 1422850320,
            'upload_date': '20150202',
        },
    }, {
        'url': 'http://www.patriots.com/video/2015/09/18/10-days-gillette',
        'md5': '4c319e2f625ffd0b481b4382c6fc124c',
        'info_dict': {
            'id': 'n-238346',
            'ext': 'mp4',
            'title': '10 Days at Gillette',
            'description': 'md5:8cd9cd48fac16de596eadc0b24add951',
            'timestamp': 1442618809,
            'upload_date': '20150918',
        },
    }, {
        # lowercase data-contentid
        'url': 'http://www.steelers.com/news/article-1/Tomlin-on-Ben-getting-Vick-ready/56399c96-4160-48cf-a7ad-1d17d4a3aef7',
        'info_dict': {
            'id': '12693586-6ea9-4743-9c1c-02c59e4a5ef2',
            'ext': 'mp4',
            'title': 'Tomlin looks ahead to Ravens on a short week',
            'description': 'md5:32f3f7b139f43913181d5cbb24ecad75',
            'timestamp': 1443459651,
            'upload_date': '20150928',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://www.nfl.com/videos/nfl-network-top-ten/09000d5d810a6bd4/Top-10-Gutsiest-Performances-Jack-Youngblood',
        'only_matching': True,
    }, {
-        'url': 'http://www.buffalobills.com/video/videos/Rex_Ryan_Show_World_Wide_Rex/b1dcfab2-3190-4bb1-bfc0-d6e603d6601a',
+        'url': 'https://www.raiders.com/audio/instant-reactions-raiders-week-14-loss-to-indianapolis-colts-espn-jason-fitz',
        'only_matching': True,
    }]
-    @staticmethod
+    def _real_extract(self, url):
-    def prepend_host(host, url):
+        display_id = self._match_id(url)
-        if not url.startswith('http'):
+        webpage = self._download_webpage(url, display_id)
-            if not url.startswith('/'):
+        return self._parse_video_config(self._search_regex(
-                url = '/%s' % url
+            self._VIDEO_CONFIG_REGEX, webpage, 'video config'), display_id)
            url = 'http://{0:}{1:}'.format(host, url)
        return url
-    @staticmethod
+
-    def format_from_stream(stream, protocol, host, path_prefix='',
+class NFLArticleIE(NFLBaseIE):
-                           preference=0, note=None):
+    IE_NAME = 'nfl.com:article'
-        url = '{protocol:}://{host:}/{prefix:}{path:}'.format(
+    _VALID_URL = NFLBaseIE._VALID_URL_BASE + r'news/(?P<id>[^/#?&]+)'
-            protocol=protocol,
+    _TEST = {
-            host=host,
+        'url': 'https://www.buffalobills.com/news/the-only-thing-we-ve-earned-is-the-noise-bills-coaches-discuss-handling-rising-e',
-            prefix=path_prefix,
+        'info_dict': {
-            path=stream.get('path'),
+            'id': 'the-only-thing-we-ve-earned-is-the-noise-bills-coaches-discuss-handling-rising-e',
-        )
+            'title': "'The only thing we've earned is the noise' | Bills coaches discuss handling rising expectations",
-        return {
+        },
-            'url': url,
+        'playlist_count': 4,
            'vbr': int_or_none(stream.get('rate', 0), 1000),
            'preference': preference,
            'format_note': note,
    }
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        display_id = self._match_id(url)
-        video_id, host = mobj.group('id'), mobj.group('host')
+        webpage = self._download_webpage(url, display_id)
-
+        entries = []
-        webpage = self._download_webpage(url, video_id)
+        for video_config in re.findall(self._VIDEO_CONFIG_REGEX, webpage):
-
+            entries.append(self._parse_video_config(video_config, display_id))
-        config_url = NFLIE.prepend_host(host, self._search_regex(
+        title = clean_html(get_element_by_class(
-            r'(?:(?:config|configURL)\s*:\s*|<nflcs:avplayer[^>]+data-config\s*=\s*)(["\'])(?P<config>.+?)\1',
+            'nfl-c-article__title', webpage)) or self._html_search_meta(
-            webpage, 'config URL', default='static/content/static/config/video/config.json',
+            ['og:title', 'twitter:title'], webpage)
-            group='config'))
+        return self.playlist_result(entries, display_id, title)
        # For articles, the id in the url is not the video id
        video_id = self._search_regex(
            r'(?:<nflcs:avplayer[^>]+data-content[Ii]d\s*=\s*|content[Ii]d\s*:\s*)(["\'])(?P<id>(?:(?!\1).)+)\1',
            webpage, 'video id', default=video_id, group='id')
        config = self._download_json(config_url, video_id, 'Downloading player config')
        url_template = NFLIE.prepend_host(
            host, '{contentURLTemplate:}'.format(**config))
        video_data = self._download_json(
            url_template.format(id=video_id), video_id)
        formats = []
        cdn_data = video_data.get('cdnData', {})
        streams = cdn_data.get('bitrateInfo', [])
        if cdn_data.get('format') == 'EXTERNAL_HTTP_STREAM':
            parts = compat_urllib_parse_urlparse(cdn_data.get('uri'))
            protocol, host = parts.scheme, parts.netloc
            for stream in streams:
                formats.append(
                    NFLIE.format_from_stream(stream, protocol, host))
        else:
            cdns = config.get('cdns')
            if not cdns:
                raise ExtractorError('Failed to get CDN data', expected=True)
            for name, cdn in cdns.items():
                # LimeLight streams don't seem to work
                if cdn.get('name') == 'LIMELIGHT':
                    continue
                protocol = cdn.get('protocol')
                host = remove_end(cdn.get('host', ''), '/')
                if not (protocol and host):
                    continue
                prefix = cdn.get('pathprefix', '')
                if prefix and not prefix.endswith('/'):
                    prefix = '%s/' % prefix
                preference = 0
                if protocol == 'rtmp':
                    preference = -2
                elif 'prog' in name.lower():
                    preference = 1
                for stream in streams:
                    formats.append(
                        NFLIE.format_from_stream(stream, protocol, host,
                                                 prefix, preference, name))
        self._sort_formats(formats)
        thumbnail = None
        for q in ('xl', 'l', 'm', 's', 'xs'):
            thumbnail = video_data.get('imagePaths', {}).get(q)
            if thumbnail:
                break
        return {
            'id': video_id,
            'title': video_data.get('headline'),
            'formats': formats,
            'description': video_data.get('caption'),
            'duration': video_data.get('duration'),
            'thumbnail': thumbnail,
            'timestamp': int_or_none(video_data.get('posted'), 1000),
        }
--- a/haruhi_dl/extractor/nhk.py
+++ b/haruhi_dl/extractor/nhk.py
@ -3,51 +3,33 @@ from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import urljoin
-class NhkVodIE(InfoExtractor):
+class NhkBaseIE(InfoExtractor):
-    _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand/(?P<type>video|audio)/(?P<id>\d{7}|[^/]+?-\d{8}-\d+)'
+    _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/%s/%s/%s/all%s.json'
-    # Content available only for a limited period of time. Visit
+    _BASE_URL_REGEX = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand'
-    # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
+    _TYPE_REGEX = r'/(?P<type>video|audio)/'
    _TESTS = [{
        # clip
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
        'md5': '256a1be14f48d960a7e61e2532d95ec3',
        'info_dict': {
            'id': 'a95j5iza',
            'ext': 'mp4',
            'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU",
            'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
            'timestamp': 1565965194,
            'upload_date': '20190816',
        },
    }, {
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
        'only_matching': True,
    }, {
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/',
        'only_matching': True,
    }, {
        'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
        'only_matching': True,
    }, {
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
        'only_matching': True,
    }]
    _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/episode/%s/%s/all%s.json'
-    def _real_extract(self, url):
+    def _call_api(self, m_id, lang, is_video, is_episode, is_clip):
-        lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
+        return self._download_json(
            self._API_URL_TEMPLATE % (
                'v' if is_video else 'r',
                'clip' if is_clip else 'esd',
                'episode' if is_episode else 'program',
                m_id, lang, '/all' if is_video else ''),
            m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or []
    def _extract_episode_info(self, url, episode=None):
        fetch_episode = episode is None
        lang, m_type, episode_id = re.match(NhkVodIE._VALID_URL, url).groups()
        if episode_id.isdigit():
            episode_id = episode_id[:4] + '-' + episode_id[4:]
        is_video = m_type == 'video'
-        episode = self._download_json(
+        if fetch_episode:
-            self._API_URL_TEMPLATE % (
+            episode = self._call_api(
-                'v' if is_video else 'r',
+                episode_id, lang, is_video, True, episode_id[:4] == '9999')[0]
                'clip' if episode_id[:4] == '9999' else 'esd',
                episode_id, lang, '/all' if is_video else ''),
            episode_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'][0]
        title = episode.get('sub_title_clean') or episode['sub_title']
        def get_clean_field(key):
@ -76,18 +58,121 @@ class NhkVodIE(InfoExtractor):
            'episode': title,
        }
        if is_video:
            vod_id = episode['vod_id']
            info.update({
                '_type': 'url_transparent',
                'ie_key': 'Piksel',
-                'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + episode['vod_id'],
+                'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + vod_id,
                'id': vod_id,
            })
        else:
-            audio = episode['audio']
+            if fetch_episode:
-            audio_path = audio['audio']
+                audio_path = episode['audio']['audio']
                info['formats'] = self._extract_m3u8_formats(
                    'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
                    episode_id, 'm4a', entry_protocol='m3u8_native',
                    m3u8_id='hls', fatal=False)
                for f in info['formats']:
                    f['language'] = lang
            else:
                info.update({
                    '_type': 'url_transparent',
                    'ie_key': NhkVodIE.ie_key(),
                    'url': url,
                })
        return info
 class NhkVodIE(NhkBaseIE):
    _VALID_URL = r'%s%s(?P<id>\d{7}|[^/]+?-\d{8}-[0-9a-z]+)' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
    # Content available only for a limited period of time. Visit
    # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
    _TESTS = [{
        # video clip
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
        'md5': '7a90abcfe610ec22a6bfe15bd46b30ca',
        'info_dict': {
            'id': 'a95j5iza',
            'ext': 'mp4',
            'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU",
            'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
            'timestamp': 1565965194,
            'upload_date': '20190816',
        },
    }, {
        # audio clip
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/r_inventions-20201104-1/',
        'info_dict': {
            'id': 'r_inventions-20201104-1-en',
            'ext': 'm4a',
            'title': "Japan's Top Inventions - Miniature Video Cameras",
            'description': 'md5:07ea722bdbbb4936fdd360b6a480c25b',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }, {
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
        'only_matching': True,
    }, {
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/',
        'only_matching': True,
    }, {
        'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
        'only_matching': True,
    }, {
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        return self._extract_episode_info(url)
 class NhkVodProgramIE(NhkBaseIE):
    _VALID_URL = r'%s/program%s(?P<id>[0-9a-z]+)(?:.+?\btype=(?P<episode_type>clip|(?:radio|tv)Episode))?' % (NhkBaseIE._BASE_URL_REGEX, NhkBaseIE._TYPE_REGEX)
    _TESTS = [{
        # video program episodes
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway',
        'info_dict': {
            'id': 'japanrailway',
            'title': 'Japan Railway Journal',
        },
        'playlist_mincount': 1,
    }, {
        # video program clips
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/japanrailway/?type=clip',
        'info_dict': {
            'id': 'japanrailway',
            'title': 'Japan Railway Journal',
        },
        'playlist_mincount': 5,
    }, {
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/video/10yearshayaomiyazaki/',
        'only_matching': True,
    }, {
        # audio program
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/program/audio/listener/',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        lang, m_type, program_id, episode_type = re.match(self._VALID_URL, url).groups()
        episodes = self._call_api(
            program_id, lang, m_type == 'video', False, episode_type == 'clip')
        entries = []
        for episode in episodes:
            episode_path = episode.get('url')
            if not episode_path:
                continue
            entries.append(self._extract_episode_info(
                urljoin(url, episode_path), episode))
        program_title = None
        if entries:
            program_title = entries[0].get('series')
        return self.playlist_result(entries, program_id, program_title)
--- a/haruhi_dl/extractor/niconico.py
+++ b/haruhi_dl/extractor/niconico.py
@ -1,20 +1,23 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import json
 import datetime
 import functools
 import json
 import math
 from .common import InfoExtractor
 from ..compat import (
    compat_parse_qs,
-    compat_urlparse,
+    compat_urllib_parse_urlparse,
 )
 from ..utils import (
    determine_ext,
    dict_get,
    ExtractorError,
    int_or_none,
    float_or_none,
    InAdvancePagedList,
    int_or_none,
    parse_duration,
    parse_iso8601,
    remove_start,
@ -181,7 +184,7 @@ class NiconicoIE(InfoExtractor):
        if urlh is False:
            login_ok = False
        else:
-            parts = compat_urlparse.urlparse(urlh.geturl())
+            parts = compat_urllib_parse_urlparse(urlh.geturl())
            if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
                login_ok = False
        if not login_ok:
@ -292,7 +295,7 @@ class NiconicoIE(InfoExtractor):
                'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
                video_id, 'Downloading flv info')
-            flv_info = compat_urlparse.parse_qs(flv_info_webpage)
+            flv_info = compat_parse_qs(flv_info_webpage)
            if 'url' not in flv_info:
                if 'deleted' in flv_info:
                    raise ExtractorError('The video has been deleted.',
@ -437,34 +440,76 @@ class NiconicoIE(InfoExtractor):
 class NiconicoPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/mylist/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.nicovideo.jp/mylist/27411728',
        'info_dict': {
            'id': '27411728',
            'title': 'AKB48のオールナイトニッポン',
            'description': 'md5:d89694c5ded4b6c693dea2db6e41aa08',
            'uploader': 'のっく',
            'uploader_id': '805442',
        },
        'playlist_mincount': 225,
    }, {
        'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
        'only_matching': True,
    }]
    _PAGE_SIZE = 100
    def _call_api(self, list_id, resource, query):
        return self._download_json(
            'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
            'Downloading %s JSON metatdata' % resource, query=query,
            headers={'X-Frontend-Id': 6})['data']['mylist']
    def _parse_owner(self, item):
        owner = item.get('owner') or {}
        if owner:
            return {
                'uploader': owner.get('name'),
                'uploader_id': owner.get('id'),
            }
        return {}
    def _fetch_page(self, list_id, page):
        page += 1
        items = self._call_api(list_id, 'page %d' % page, {
            'page': page,
            'pageSize': self._PAGE_SIZE,
        })['items']
        for item in items:
            video = item.get('video') or {}
            video_id = video.get('id')
            if not video_id:
                continue
            count = video.get('count') or {}
            get_count = lambda x: int_or_none(count.get(x))
            info = {
                '_type': 'url',
                'id': video_id,
                'title': video.get('title'),
                'url': 'https://www.nicovideo.jp/watch/' + video_id,
                'description': video.get('shortDescription'),
                'duration': int_or_none(video.get('duration')),
                'view_count': get_count('view'),
                'comment_count': get_count('comment'),
                'ie_key': NiconicoIE.ie_key(),
            }
            info.update(self._parse_owner(video))
            yield info
    def _real_extract(self, url):
        list_id = self._match_id(url)
-        webpage = self._download_webpage(url, list_id)
+        mylist = self._call_api(list_id, 'list', {
-
+            'pageSize': 1,
-        entries_json = self._search_regex(r'Mylist\.preload\(\d+, (\[.*\])\);',
+        })
-                                          webpage, 'entries')
+        entries = InAdvancePagedList(
-        entries = json.loads(entries_json)
+            functools.partial(self._fetch_page, list_id),
-        entries = [{
+            math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE),
-            '_type': 'url',
+            self._PAGE_SIZE)
-            'ie_key': NiconicoIE.ie_key(),
+        result = self.playlist_result(
-            'url': ('http://www.nicovideo.jp/watch/%s' %
+            entries, list_id, mylist.get('name'), mylist.get('description'))
-                    entry['item_data']['video_id']),
+        result.update(self._parse_owner(mylist))
-        } for entry in entries]
+        return result
        return {
            '_type': 'playlist',
            'title': self._search_regex(r'\s+name: "(.*?)"', webpage, 'title'),
            'id': list_id,
            'entries': entries,
        }
--- a/haruhi_dl/extractor/ninecninemedia.py
+++ b/haruhi_dl/extractor/ninecninemedia.py
@ -5,10 +5,11 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    parse_iso8601,
    float_or_none,
    ExtractorError,
    float_or_none,
    int_or_none,
    parse_iso8601,
    try_get,
 )
@ -35,7 +36,7 @@ class NineCNineMediaIE(InfoExtractor):
                '$include': '[HasClosedCaptions]',
            })
-        if content_package.get('Constraints', {}).get('Security', {}).get('Type'):
+        if try_get(content_package, lambda x: x['Constraints']['Security']['Type']):
            raise ExtractorError('This video is DRM protected.', expected=True)
        manifest_base_url = content_package_url + 'manifest.'
@ -52,7 +53,7 @@ class NineCNineMediaIE(InfoExtractor):
        self._sort_formats(formats)
        thumbnails = []
-        for image in content.get('Images', []):
+        for image in (content.get('Images') or []):
            image_url = image.get('Url')
            if not image_url:
                continue
@ -70,7 +71,7 @@ class NineCNineMediaIE(InfoExtractor):
                    continue
                container.append(e_name)
-        season = content.get('Season', {})
+        season = content.get('Season') or {}
        info = {
            'id': content_id,
@ -79,13 +80,14 @@ class NineCNineMediaIE(InfoExtractor):
            'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
            'episode_number': int_or_none(content.get('Episode')),
            'season': season.get('Name'),
-            'season_number': season.get('Number'),
+            'season_number': int_or_none(season.get('Number')),
            'season_id': season.get('Id'),
-            'series': content.get('Media', {}).get('Name'),
+            'series': try_get(content, lambda x: x['Media']['Name']),
            'tags': tags,
            'categories': categories,
            'duration': float_or_none(content_package.get('Duration')),
            'formats': formats,
            'thumbnails': thumbnails,
        }
        if content_package.get('HasClosedCaptions'):
--- a/haruhi_dl/extractor/ninegag.py
+++ b/haruhi_dl/extractor/ninegag.py
@ -1,104 +1,130 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
-from ..utils import str_to_int
+from ..utils import (
    ExtractorError,
    determine_ext,
    int_or_none,
    try_get,
    unescapeHTML,
    url_or_none,
 )
 class NineGagIE(InfoExtractor):
    IE_NAME = '9gag'
-    _VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?'
+    _VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
    _TESTS = [{
-        'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
+        'url': 'https://9gag.com/gag/ae5Ag7B',
        'info_dict': {
-            'id': 'kXzwOKyGlSA',
+            'id': 'ae5Ag7B',
            'ext': 'mp4',
-            'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
+            'title': 'Capybara Agility Training',
-            'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
+            'upload_date': '20191108',
-            'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
+            'timestamp': 1573237208,
-            'uploader': 'CompilationChannel',
+            'categories': ['Awesome'],
-            'upload_date': '20131110',
+            'tags': ['Weimaraner', 'American Pit Bull Terrier'],
-            'view_count': int,
+            'duration': 44,
-        },
+            'like_count': int,
-        'add_ie': ['Youtube'],
+            'dislike_count': int,
            'comment_count': int,
        }
    }, {
-        'url': 'http://9gag.com/tv/p/aKolP3',
+        # HTML escaped title
-        'info_dict': {
+        'url': 'https://9gag.com/gag/av5nvyb',
            'id': 'aKolP3',
            'ext': 'mp4',
            'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video',
            'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!",
            'uploader_id': 'rickmereki',
            'uploader': 'Rick Mereki',
            'upload_date': '20110803',
            'view_count': int,
        },
        'add_ie': ['Vimeo'],
    }, {
        'url': 'http://9gag.com/tv/p/KklwM',
        'only_matching': True,
    }, {
        'url': 'http://9gag.tv/p/Kk2X5',
        'only_matching': True,
    }, {
        'url': 'http://9gag.com/tv/embed/a5Dmvl',
        'only_matching': True,
    }]
    _EXTERNAL_VIDEO_PROVIDER = {
        '1': {
            'url': '%s',
            'ie_key': 'Youtube',
        },
        '2': {
            'url': 'http://player.vimeo.com/video/%s',
            'ie_key': 'Vimeo',
        },
        '3': {
            'url': 'http://instagram.com/p/%s',
            'ie_key': 'Instagram',
        },
        '4': {
            'url': 'http://vine.co/v/%s',
            'ie_key': 'Vine',
        },
    }
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        post_id = self._match_id(url)
-        video_id = mobj.group('id')
+        post = self._download_json(
-        display_id = mobj.group('display_id') or video_id
+            'https://9gag.com/v1/post', post_id, query={
                'id': post_id
            })['data']['post']
-        webpage = self._download_webpage(url, display_id)
+        if post.get('type') != 'Animated':
            raise ExtractorError(
                'The given url does not contain a video',
                expected=True)
-        post_view = self._parse_json(
+        title = unescapeHTML(post['title'])
            self._search_regex(
                r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost',
                webpage, 'post view'),
            display_id)
-        ie_key = None
+        duration = None
-        source_url = post_view.get('sourceUrl')
+        formats = []
-        if not source_url:
+        thumbnails = []
-            external_video_id = post_view['videoExternalId']
+        for key, image in (post.get('images') or {}).items():
-            external_video_provider = post_view['videoExternalProvider']
+            image_url = url_or_none(image.get('url'))
-            source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id
+            if not image_url:
-            ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key']
+                continue
-        title = post_view['title']
+            ext = determine_ext(image_url)
-        description = post_view.get('description')
+            image_id = key.strip('image')
-        view_count = str_to_int(post_view.get('externalView'))
+            common = {
-        thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
+                'url': image_url,
                'width': int_or_none(image.get('width')),
                'height': int_or_none(image.get('height')),
            }
            if ext in ('jpg', 'png'):
                webp_url = image.get('webpUrl')
                if webp_url:
                    t = common.copy()
                    t.update({
                        'id': image_id + '-webp',
                        'url': webp_url,
                    })
                    thumbnails.append(t)
                common.update({
                    'id': image_id,
                    'ext': ext,
                })
                thumbnails.append(common)
            elif ext in ('webm', 'mp4'):
                if not duration:
                    duration = int_or_none(image.get('duration'))
                common['acodec'] = 'none' if image.get('hasAudio') == 0 else None
                for vcodec in ('vp8', 'vp9', 'h265'):
                    c_url = image.get(vcodec + 'Url')
                    if not c_url:
                        continue
                    c_f = common.copy()
                    c_f.update({
                        'format_id': image_id + '-' + vcodec,
                        'url': c_url,
                        'vcodec': vcodec,
                    })
                    formats.append(c_f)
                common.update({
                    'ext': ext,
                    'format_id': image_id,
                })
                formats.append(common)
        self._sort_formats(formats)
        section = try_get(post, lambda x: x['postSection']['name'])
        tags = None
        post_tags = post.get('tags')
        if post_tags:
            tags = []
            for tag in post_tags:
                tag_key = tag.get('key')
                if not tag_key:
                    continue
                tags.append(tag_key)
        get_count = lambda x: int_or_none(post.get(x + 'Count'))
        return {
-            '_type': 'url_transparent',
+            'id': post_id,
            'url': source_url,
            'ie_key': ie_key,
            'id': video_id,
            'display_id': display_id,
            'title': title,
-            'description': description,
+            'timestamp': int_or_none(post.get('creationTs')),
-            'view_count': view_count,
+            'duration': duration,
-            'thumbnail': thumbnail,
+            'formats': formats,
            'thumbnails': thumbnails,
            'like_count': get_count('upVote'),
            'dislike_count': get_count('downVote'),
            'comment_count': get_count('comments'),
            'age_limit': 18 if post.get('nsfw') == 1 else None,
            'categories': [section] if section else None,
            'tags': tags,
        }
--- a/Show more
+++ b/Show more