* Change LGPL to Unlicense

fix/speedup ci
vider support
2021-10-23 01:11:44 +02:00 · 2021-10-23 01:06:55 +02:00 · 2021-10-23 01:06:55 +02:00 · 2021-10-23 01:06:55 +02:00 · 2021-10-23 01:06:55 +02:00 · 2021-10-23 01:06:55 +02:00
97 changed files with 3978 additions and 2081 deletions
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@ -0,0 +1,2 @@
+github: selfisekai
+ko_fi: selfisekai
--- a/.gitignore
+++ b/.gitignore
@ -15,6 +15,7 @@ haruhi-dl.1
 haruhi-dl.bash-completion
 haruhi-dl.fish
 haruhi_dl/extractor/lazy_extractors.py
+haruhi_dl/extractor_artifacts/
 haruhi-dl
 haruhi-dl.exe
 haruhi-dl.tar.gz
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -1,5 +1,6 @@
 default:
  before_script:
+    - sed -i "s@dl-cdn.alpinelinux.org@alpine.sakamoto.pl@g" /etc/apk/repositories
    - apk add bash
    - pip install nose

--- a/104
+++ b/104
@ -1,3 +1,107 @@
+version 2021.08.01
+Extractor
+* [youtube] fixed agegate
+* [niconico] dmc downloader from youtube-dlp
+* [peertube] new URL schemas
+
+version 2021.06.20
+Core
+* [playwright] fixed headlessness
+ [playwright] option to force a specific browser
+
+Extractor
+* [tiktok] fix empty video lists
+* [youtube] fix and speed-up age-gate circumvention
+* [youtube] fix videos with JS-like syntax
+
+
+version 2021.06.01
+Core
+* merging formats by codecs
+* [json_ld] better author extraction
+ --force-use-mastodon option
+* support for HTTP 308 redirects
+ [test_execution] add test for lazy extractors
+* Improve extract_info doc
+* [options] Fix thumbnail option group name
+
+Extractor
+* [tvp:series] fallback to web
+- [ninateka] remove extractor
+* [tvn24] refactor handling next.js frontend
+* [cda] fix premium videos for premium users (?)
+* [tvp] support for tvp.info vue.js pages
+ [sejm.gov.pl] new extractors
+ [senat.gov.pl] new extractors
+* [spreaker] new url schemes
+* [spreaker] support for embedded player
+ [spryciarze.pl] new extractors
+ [castos] new extractors
+ [magentamusik360] new extractor
+ [arnes] new extractor
+ [palcomp3] new extractor
+* [screencastomatic] fix extraction
+* [youku] update ccode
+ [line] support live.line.me
+* [curiositystream] fix format extraction
+* [jamendo] fix track extraction
+* [pornhub] extracting DASH and HLS formats
+* [mtv] fix Viacom A/B testing video player
+ [maoritv] new extractor
+* [pluralsight] extend anti-throttling timeout
+* [mastodon] support for soapbox and audio files
+* [tvp] fix jp2.tvp.pl
+* [youtube:channel] fix multiple page extraction
+* [tvp:embed] handling formats better way
+* [tvn] better extraction method choosing
+* [tvp] fix tvp:website extracting with weird urls
+ [wppilot] new extractors
+ [mastodon] logging in to mastodon/pleroma
+ [mastodon] fetching posts via different instances
+ [mastodon] fetching peertube videos via pleroma instances
+* [bbc] extract full description from __INITIAL_DATA__
+* [tver] redirect all downloads to Brightcove
+* [medaltv] fix extraction
+* [francetvinfo] improve video id extraction
+* [xfileshare] support for wolfstream.tv
+* [tv2dk] fix extraction
+* [svtplay] improve extraction
+* [xtube] fix formats extraction
+* [twitter] improve formats extraction from vmap URL
+* [mastodon] cache apps on logging in
+* [mastodon] support cards to external services
+* [peertube] logging in
+* [tiktok] deduplicate videos
+ [misskey] new extractor
+ [radiokapital] new extractors
+* [youtube] fix videos with age gate
+* [kaltura] Make embed code alternatives actually work
+* [kaltura] Improve iframe extraction
+* [dispeak] Improve FLV extraction
+* [dispeak] DRY and update tests
+* [gdcvault] Add support for HTML5 videos
+* [funimation] Add support for optional lang code in URLs
+* [medaltv] Relax _VALID_URL
+- [blinkx] Remove extractor
+* [orf:radio] Switch download URLs to HTTPS
+ [generic] Add Referer header for direct videojs download URLs
+ [vk] Add support for sibnet embeds
+ [generic] Add support for sibnet embeds
+* [phoenix] Fix extraction
+* [generic] Add support for og:audio
+* [vivo] Add support for vivo.st
+* [eroprofile] Fix extraction
+* [playstuff] Add extractor
+* [shahid] relax _VALID_URL
+* [redbulltv] fix embed data extraction
+* [vimeo] fix vimeo pro embed extraction
+* [twitch:clips] Add access token query to download URLs
+* [twitch:clips] Improve extraction
+* [ted] Prefer own formats over external sources
+* [ustream] Detect https embeds
+* [ard] Relax _VALID_URL and fix video ids
+
+
 version 2021.04.01
 Core
 - Removed Herobrine
--- a/181
+++ b/181
@ -1,165 +1,24 @@
-                   GNU LESSER GENERAL PUBLIC LICENSE
-                       Version 3, 29 June 2007
+This is free and unencumbered software released into the public domain.

- Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.

+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.

-  This version of the GNU Lesser General Public License incorporates
-the terms and conditions of version 3 of the GNU General Public
-License, supplemented by the additional permissions listed below.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.

-  0. Additional Definitions.
-
-  As used herein, "this License" refers to version 3 of the GNU Lesser
-General Public License, and the "GNU GPL" refers to version 3 of the GNU
-General Public License.
-
-  "The Library" refers to a covered work governed by this License,
-other than an Application or a Combined Work as defined below.
-
-  An "Application" is any work that makes use of an interface provided
-by the Library, but which is not otherwise based on the Library.
-Defining a subclass of a class defined by the Library is deemed a mode
-of using an interface provided by the Library.
-
-  A "Combined Work" is a work produced by combining or linking an
-Application with the Library.  The particular version of the Library
-with which the Combined Work was made is also called the "Linked
-Version".
-
-  The "Minimal Corresponding Source" for a Combined Work means the
-Corresponding Source for the Combined Work, excluding any source code
-for portions of the Combined Work that, considered in isolation, are
-based on the Application, and not on the Linked Version.
-
-  The "Corresponding Application Code" for a Combined Work means the
-object code and/or source code for the Application, including any data
-and utility programs needed for reproducing the Combined Work from the
-Application, but excluding the System Libraries of the Combined Work.
-
-  1. Exception to Section 3 of the GNU GPL.
-
-  You may convey a covered work under sections 3 and 4 of this License
-without being bound by section 3 of the GNU GPL.
-
-  2. Conveying Modified Versions.
-
-  If you modify a copy of the Library, and, in your modifications, a
-facility refers to a function or data to be supplied by an Application
-that uses the facility (other than as an argument passed when the
-facility is invoked), then you may convey a copy of the modified
-version:
-
-   a) under this License, provided that you make a good faith effort to
-   ensure that, in the event an Application does not supply the
-   function or data, the facility still operates, and performs
-   whatever part of its purpose remains meaningful, or
-
-   b) under the GNU GPL, with none of the additional permissions of
-   this License applicable to that copy.
-
-  3. Object Code Incorporating Material from Library Header Files.
-
-  The object code form of an Application may incorporate material from
-a header file that is part of the Library.  You may convey such object
-code under terms of your choice, provided that, if the incorporated
-material is not limited to numerical parameters, data structure
-layouts and accessors, or small macros, inline functions and templates
-(ten or fewer lines in length), you do both of the following:
-
-   a) Give prominent notice with each copy of the object code that the
-   Library is used in it and that the Library and its use are
-   covered by this License.
-
-   b) Accompany the object code with a copy of the GNU GPL and this license
-   document.
-
-  4. Combined Works.
-
-  You may convey a Combined Work under terms of your choice that,
-taken together, effectively do not restrict modification of the
-portions of the Library contained in the Combined Work and reverse
-engineering for debugging such modifications, if you also do each of
-the following:
-
-   a) Give prominent notice with each copy of the Combined Work that
-   the Library is used in it and that the Library and its use are
-   covered by this License.
-
-   b) Accompany the Combined Work with a copy of the GNU GPL and this license
-   document.
-
-   c) For a Combined Work that displays copyright notices during
-   execution, include the copyright notice for the Library among
-   these notices, as well as a reference directing the user to the
-   copies of the GNU GPL and this license document.
-
-   d) Do one of the following:
-
-       0) Convey the Minimal Corresponding Source under the terms of this
-       License, and the Corresponding Application Code in a form
-       suitable for, and under terms that permit, the user to
-       recombine or relink the Application with a modified version of
-       the Linked Version to produce a modified Combined Work, in the
-       manner specified by section 6 of the GNU GPL for conveying
-       Corresponding Source.
-
-       1) Use a suitable shared library mechanism for linking with the
-       Library.  A suitable mechanism is one that (a) uses at run time
-       a copy of the Library already present on the user's computer
-       system, and (b) will operate properly with a modified version
-       of the Library that is interface-compatible with the Linked
-       Version.
-
-   e) Provide Installation Information, but only if you would otherwise
-   be required to provide such information under section 6 of the
-   GNU GPL, and only to the extent that such information is
-   necessary to install and execute a modified version of the
-   Combined Work produced by recombining or relinking the
-   Application with a modified version of the Linked Version. (If
-   you use option 4d0, the Installation Information must accompany
-   the Minimal Corresponding Source and Corresponding Application
-   Code. If you use option 4d1, you must provide the Installation
-   Information in the manner specified by section 6 of the GNU GPL
-   for conveying Corresponding Source.)
-
-  5. Combined Libraries.
-
-  You may place library facilities that are a work based on the
-Library side by side in a single library together with other library
-facilities that are not Applications and are not covered by this
-License, and convey such a combined library under terms of your
-choice, if you do both of the following:
-
-   a) Accompany the combined library with a copy of the same work based
-   on the Library, uncombined with any other library facilities,
-   conveyed under the terms of this License.
-
-   b) Give prominent notice with the combined library that part of it
-   is a work based on the Library, and explaining where to find the
-   accompanying uncombined form of the same work.
-
-  6. Revised Versions of the GNU Lesser General Public License.
-
-  The Free Software Foundation may publish revised and/or new versions
-of the GNU Lesser General Public License from time to time. Such new
-versions will be similar in spirit to the present version, but may
-differ in detail to address new problems or concerns.
-
-  Each version is given a distinguishing version number. If the
-Library as you received it specifies that a certain numbered version
-of the GNU Lesser General Public License "or any later version"
-applies to it, you have the option of following the terms and
-conditions either of that published version or of any later version
-published by the Free Software Foundation. If the Library as you
-received it does not specify a version number of the GNU Lesser
-General Public License, you may choose any version of the GNU Lesser
-General Public License ever published by the Free Software Foundation.
-
-  If the Library as you received it specifies that a proxy can decide
-whether future versions of the GNU Lesser General Public License shall
-apply, that proxy's public statement of acceptance of any version is
-permanent authorization for you to choose that version for the
-Library.
+For more information, please refer to <http://unlicense.org/>
--- a/README.md
+++ b/README.md
@ -2,10 +2,11 @@

 [![build status](https://img.shields.io/gitlab/pipeline/laudom/haruhi-dl/master?gitlab_url=https%3A%2F%2Fgit.sakamoto.pl&style=flat-square)](https://git.sakamoto.pl/laudom/haruhi-dl/-/pipelines)
 [![PyPI Downloads](https://img.shields.io/pypi/dm/haruhi-dl?style=flat-square)](https://pypi.org/project/haruhi-dl/)
-[![License: LGPL 3.0 or later](https://img.shields.io/pypi/l/haruhi-dl?style=flat-square)](https://git.sakamoto.pl/laudom/haruhi-dl/-/blob/master/README.md)
 [![Sasin stole 70 million PLN](https://img.shields.io/badge/Sasin-stole%2070%20million%20PLN-orange?style=flat-square)](https://www.planeta.pl/Wiadomosci/Polityka/Ile-kosztowaly-karty-wyborcze-Sasin-do-wiezienia-Wybory-odwolane)
 [![Trans rights!](https://img.shields.io/badge/Trans-rights!-5BCEFA?style=flat-square)](http://transfuzja.org/en/artykuly/trans_people_in_poland/situation.htm)

+# This project has ended. Our forces have moved into contributing to  [yt-dlp](https://github.com/yt-dlp/yt-dlp).
+
 This is a fork of [youtube-dl](https://yt-dl.org/), focused on bringing a fast, steady stream of updates. We'll do our best to merge patches to any site, not only youtube.

 Our main repository is on our GitLab: https://git.sakamoto.pl/laudompat/haruhi-dl
@ -14,30 +15,79 @@ A Microsoft GitHub mirror exists as well: https://github.com/haruhi-dl/haruhi-dl

 ## Installing

-haruhi-dl is available on PyPI: [![version on PyPI](https://img.shields.io/pypi/v/haruhi-dl?style=flat-square)](https://pypi.org/project/haruhi-dl/)
+System-specific ways:
+
+- [Windows .exe files](https://git.sakamoto.pl/laudompat/haruhi-dl/-/releases) ([mirror](https://github.com/haruhi-dl/haruhi-dl/releases)) - just unpack and run the exe file in cmd/powershell! (ffmpeg/rtmpdump not included, playwright extractors won't work)
+- [Arch Linux (AUR)](https://aur.archlinux.org/packages/haruhi-dl/) - `yay -S haruhi-dl` (managed by mlunax)
+- [macOS (homebrew)](https://formulae.brew.sh/formula/haruhi-dl) - `brew install haruhi-dl` (managed by Homebrew)
+
+haruhi-dl is also available on PyPI: [![version on PyPI](https://img.shields.io/pypi/v/haruhi-dl?style=flat-square)](https://pypi.org/project/haruhi-dl/)

 Install release from PyPI on Python 3.x:
+
 ```sh
 $ python3 -m pip install --upgrade haruhi-dl
 ```
+
 Install from master (unstable) on Python 3.x:
+
 ```sh
 $ python3 -m pip install --upgrade git+https://git.sakamoto.pl/laudompat/haruhi-dl.git
 ```

-**Python 2 support is dropped and we recommend to switch to Python 3**, though it may still work.
+**Python 2 support is dropped, use Python 3.**

 ## Usage

 ```sh
 $ haruhi-dl "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
 ```
+
 That's it! You just got rickrolled!

 Full manual with all options:
+
 ```sh
 $ haruhi-dl --help
 ```
+
+## Differences from youtube-dl
+
+_This is not a complete list._
+
+- Extracting and downloading video with subtitles from m3u8 (HLS) - this also includes subtitles from Twitter and some other services
+- Support for BitTorrent protocol (only used when explicitly enabled by user with `--allow-p2p` or `--prefer-p2p`; aria2c required)
+- Specific way to handle selfhosted services (untied to specific providers/domains, like PeerTube, Funkwhale, Mastodon)
+- Specific way to handle content proxy sites (like Nitter for Twitter)
+- Merging formats by codecs instead of file extensions, if possible (you'd rather like your AV1+opus downloads from YouTube to be .webm, than .mkv, don't you?)
+- New/improved/fixed extractors:
+  - PeerTube (extracting playlists, channels and user accounts, optionally downloading with BitTorrent)
+  - Funkwhale
+  - TikTok (extractors for user profiles, hashtags and music - all except single video and music with `--no-playlist` require Playwright)
+  - cda.pl
+  - Ipla
+  - Weibo (DASH formats)
+  - LinkedIn (videos from user posts)
+  - Acast
+  - Mastodon (including Pleroma, Gab Social, Soapbox)
+  - Ring Publishing (aka PulsEmbed, PulseVideo, OnetMVP; Ringier Axel Springer)
+  - TVP (support for TVPlayer2, client-rendered sites and TVP ABC, refactored some extractors to use mobile JSON API)
+  - TVN24 (support for main page, Fakty and magazine frontend)
+  - PolskieRadio
+  - Agora (wyborcza.pl video, wyborcza.pl/wysokieobcasy.pl/audycje.tokfm.pl podcasts, tuba.fm)
+  - sejm.gov.pl/senat.gov.pl
+- Some improvements with handling JSON-LD
+
+## Bug reports
+
+Please send the bug details to <bug@haruhi.download> or on [Microsoft GitHub](https://github.com/haruhi-dl/haruhi-dl/issues).
+
 ## Contributing

-If you want to contribute, send us a diff to <contribute@haruhi.download>, or submit a Pull Request on [our mirror at Microsoft GitHub](https://github.com/haruhi-dl/haruhi-dl).
+The project has ended. As an alternative, use [yt-dlp](https://github.com/yt-dlp/yt-dlp) - we're going to contribute there from now on :3
+
+## Donations
+
+If my contributions helped you, please consider sending me a small tip.
+
+[![Buy Me a Coffee at ko-fi.com](https://cdn.ko-fi.com/cdn/kofi1.png?v=2)](https://ko-fi.com/selfisekai)
--- a/bin/haruhi-dl
+++ b/bin/haruhi-dl
@ -1,10 +1,6 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python

-import sys
+import haruhi_dl

 if __name__ == '__main__':
-    if sys.version_info[0] == 2:
-        sys.exit('haruhi-dl no longer works on Python 2, use Python 3 instead')
-    else:
-        import haruhi_dl
-        haruhi_dl.main()
+    haruhi_dl.main()
--- a/devscripts/ExtractCryptoYT.sh
+++ b/devscripts/ExtractCryptoYT.sh
@ -1,6 +1,6 @@
 #!/bin/bash
 data="$(curl -s "https://www.youtube.com/s/player/$1/player_ias.vflset/en_GB/base.js")"
-func="$(grep -P '[a-z]\=a\.split.*a\.join' <<< "$data")"
+func="$(grep -P '[a-z]\=a\.split\([\"'"'"']{2}.*a\.join' <<< "$data")"
 echo "full extracted function: $func"

 obfuscatedName="$(grep -Poh '\(""\);[A-Za-z]+' <<< "$func" | sed -s 's/("");//')"
--- a/devscripts/copykitku-patch-hook.js
+++ b/devscripts/copykitku-patch-hook.js
@ -5,6 +5,17 @@
 module.exports = function patchHook(patchContent) {
    [
        [/(?:youtube-|yt-?)dl\.org/g, 'haruhi.download'],
+
+        // fork: https://github.com/blackjack4494/yt-dlc
+        [/youtube_dlc/g, 'haruhi_dl'],
+        [/youtube-dlc/g, 'haruhi-dl'],
+        [/ytdlc/g, 'hdl'],
+        [/yt-dlc/g, 'hdl'],
+        // fork: https://github.com/yt-dlp/yt-dlp
+        [/yt_dlp/g, 'haruhi_dl'],
+        [/yt-dlp/g, 'haruhi-dl'],
+        [/ytdlp/g, 'hdl'],
+
        [/youtube_dl/g, 'haruhi_dl'],
        [/youtube-dl/g, 'haruhi-dl'],
        [/youtubedl/g, 'haruhidl'],
@ -16,6 +27,8 @@ module.exports = function patchHook(patchContent) {
        // prevent from linking to non-existent repository
        [/github\.com\/(?:yt|h)dl-org\/haruhi-dl/g, 'github.com/ytdl-org/youtube-dl'],
        [/github\.com\/rg3\/haruhi-dl/g, 'github.com/ytdl-org/youtube-dl'],
+        [/github\.com\/blackjack4494\/hdl/g, 'github.com/blackjack4494/yt-dlc'],
+        [/github\.com\/hdl\/hdl/g, 'github.com/yt-dlp/yt-dlp'],
        // prevent changing the smuggle URLs (for compatibility with ytdl)
        [/__haruhidl_smuggle/g, '__youtubedl_smuggle'],
    ].forEach(([regex, replacement]) => patchContent = patchContent.replace(regex, replacement));
--- a/devscripts/prerelease_codegen.py
+++ b/devscripts/prerelease_codegen.py
@ -0,0 +1,32 @@
+# this is intended to speed-up some extractors,
+# which sometimes need to extract some data that doesn't change very much often,
+# but it does on random times, like youtube's signature "crypto" or soundcloud's client id
+
+import os
+from os.path import dirname as dirn
+import sys
+sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
+
+from haruhi_dl import HaruhiDL
+from haruhi_dl.utils import (
+    ExtractorError,
+)
+
+hdl = HaruhiDL(params={
+    'quiet': True,
+})
+artifact_dir = os.path.join(dirn(dirn((os.path.abspath(__file__)))), 'haruhi_dl', 'extractor_artifacts')
+if not os.path.exists(artifact_dir):
+    os.mkdir(artifact_dir)
+
+for ie_name in (
+    'Youtube',
+    'Soundcloud',
+):
+    ie = hdl.get_info_extractor(ie_name)
+    try:
+        file_contents = ie._generate_prerelease_file()
+        with open(os.path.join(artifact_dir, ie_name.lower() + '.py'), 'w') as file:
+            file.write(file_contents)
+    except ExtractorError as err:
+        print(err)
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@ -1,141 +1,24 @@
 #!/bin/bash

-# IMPORTANT: the following assumptions are made
-# * the GH repo is on the origin remote
-# * the gh-pages branch is named so locally
-# * the git config user.signingkey is properly set
-
-# You will need
-# pip install coverage nose rsa wheel
-
-# TODO
-# release notes
-# make hash on local files
-
-set -e
-
-skip_tests=true
-gpg_sign_commits=""
-buildserver='localhost:8142'
-
-while true
-do
-case "$1" in
-    --run-tests)
-        skip_tests=false
-        shift
-    ;;
-    --gpg-sign-commits|-S)
-        gpg_sign_commits="-S"
-        shift
-    ;;
-    --buildserver)
-        buildserver="$2"
-        shift 2
-    ;;
-    --*)
-        echo "ERROR: unknown option $1"
-        exit 1
-    ;;
-    *)
-        break
-    ;;
-esac
-done
-
-if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
-version="$1"
-major_version=$(echo "$version" | sed -n 's#^\([0-9]*\.[0-9]*\.[0-9]*\).*#\1#p')
-if test "$major_version" '!=' "$(date '+%Y.%m.%d')"; then
-    echo "$version does not start with today's date!"
-    exit 1
+if [[ "$(basename $(pwd))" == 'devscripts' ]]; then
+	cd ..
 fi

-if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already present'; exit 1; fi
-if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi
-useless_files=$(find haruhi_dl -type f -not -name '*.py')
-if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in haruhi_dl: $useless_files"; exit 1; fi
-if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
-if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi
-if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
-if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
+v="$(date "+%Y.%m.%d")"

-read -p "Is ChangeLog up to date? (y/n) " -n 1
-if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
-
-/bin/echo -e "\n### First of all, testing..."
-make clean
-if $skip_tests ; then
-    echo 'SKIPPING TESTS'
-else
-    nosetests --verbose --with-coverage --cover-package=haruhi_dl --cover-html test --stop || exit 1
+if [[ "$(grep "'$v" haruhi_dl/version.py)" != '' ]]; then #' is this the first release of the day?
+	if [[ "$(grep -Poh '[0-9]{4}\.[0-9]{2}\.[0-9]{2}\.[0-9]' haruhi_dl/version.py)" != '' ]]; then # so, 2nd or nth?
+		v="$v.$(($(cat haruhi_dl/version.py | grep -Poh '[0-9]{4}\.[0-9]{2}\.[0-9]{2}\.[0-9]' | grep -Poh '[0-9]+$')+1))"
+	else
+		v="$v.1"
+	fi
 fi

-/bin/echo -e "\n### Changing version in version.py..."
-sed -i "s/__version__ = '.*'/__version__ = '$version'/" haruhi_dl/version.py
+sed "s/__version__ = '.*'/__version__ = '$v'/g" -i haruhi_dl/version.py

-/bin/echo -e "\n### Changing version in ChangeLog..."
-sed -i "s/<unreleased>/$version/" ChangeLog
-
-/bin/echo -e "\n### Committing documentation, templates and haruhi_dl/version.py..."
-make README.md CONTRIBUTING.md issuetemplates supportedsites
-git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md .github/ISSUE_TEMPLATE/6_question.md docs/supportedsites.md haruhi_dl/version.py ChangeLog
-git commit $gpg_sign_commits -m "release $version"
-
-/bin/echo -e "\n### Now tagging, signing and pushing..."
-git tag -s -m "Release $version" "$version"
-git show "$version"
-read -p "Is it good, can I push? (y/n) " -n 1
-if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
-echo
-MASTER=$(git rev-parse --abbrev-ref HEAD)
-git push origin $MASTER:master
-git push origin "$version"
-
-/bin/echo -e "\n### OK, now it is time to build the binaries..."
-REV=$(git rev-parse HEAD)
-make haruhi-dl haruhi-dl.tar.gz
-read -p "VM running? (y/n) " -n 1
-wget "http://$buildserver/build/ytdl-org/haruhi-dl/haruhi-dl.exe?rev=$REV" -O haruhi-dl.exe
-mkdir -p "build/$version"
-mv haruhi-dl haruhi-dl.exe "build/$version"
-mv haruhi-dl.tar.gz "build/$version/haruhi-dl-$version.tar.gz"
-RELEASE_FILES="haruhi-dl haruhi-dl.exe haruhi-dl-$version.tar.gz"
-(cd build/$version/ && md5sum $RELEASE_FILES > MD5SUMS)
-(cd build/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS)
-(cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
-(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
-
-/bin/echo -e "\n### Signing and uploading the new binaries to GitHub..."
-for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
-
-ROOT=$(pwd)
-python devscripts/create-github-release.py ChangeLog $version "$ROOT/build/$version"
-
-#ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
-
-/bin/echo -e "\n### Now switching to gh-pages..."
-git clone --branch gh-pages --single-branch . build/gh-pages
-(
-    set -e
-    ORIGIN_URL=$(git config --get remote.origin.url)
-    cd build/gh-pages
-    "$ROOT/devscripts/gh-pages/add-version.py" $version
-    "$ROOT/devscripts/gh-pages/update-feed.py"
-    "$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
-    "$ROOT/devscripts/gh-pages/generate-download.py"
-    "$ROOT/devscripts/gh-pages/update-copyright.py"
-    "$ROOT/devscripts/gh-pages/update-sites.py"
-    git add *.html *.html.in update
-    git commit $gpg_sign_commits -m "release $version"
-    git push "$ROOT" gh-pages
-    git push "$ORIGIN_URL" gh-pages
-)
-rm -rf build
-
-make pypi-files
-echo "Uploading to PyPi ..."
-python setup.py sdist bdist_wheel upload
-make clean
-
-/bin/echo -e "\n### DONE!"
+python3 setup.py build_lazy_extractors
+python3 devscripts/prerelease_codegen.py
+rm -R build dist
+python3 setup.py sdist bdist_wheel
+python3 -m twine upload dist/*
+devscripts/wine-py2exe.sh setup.py
--- a/haruhi_dl/HaruhiDL.py
+++ b/haruhi_dl/HaruhiDL.py
@ -778,15 +778,28 @@ class HaruhiDL(object):

    def extract_info(self, url, download=True, ie_key=None, extra_info={},
                     process=True, force_generic_extractor=False):
-        '''
-        Returns a list with a dictionary for each video we find.
-        If 'download', also downloads the videos.
-        extra_info is a dict containing the extra values to add to each result
-        '''
+        """
+        Return a list with a dictionary for each video extracted.
+
+        Arguments:
+        url -- URL to extract
+
+        Keyword arguments:
+        download -- whether to download videos during extraction
+        ie_key -- extractor key hint
+        extra_info -- dictionary containing the extra values to add to each result
+        process -- whether to resolve all unresolved references (URLs, playlist items),
+            must be True for download to work.
+        force_generic_extractor -- force using the generic extractor
+        """

        if not ie_key and force_generic_extractor:
            ie_key = 'Generic'

+        force_use_mastodon = self.params.get('force_use_mastodon')
+        if not ie_key and force_use_mastodon:
+            ie_key = 'MastodonSH'
+
        if not ie_key:
            ie_key = self.params.get('ie_key')

@ -796,7 +809,7 @@ class HaruhiDL(object):
            ies = self._ies

        for ie in ies:
-            if not ie.suitable(url):
+            if not force_use_mastodon and not ie.suitable(url):
                continue

            ie = self.get_info_extractor(ie.ie_key())
@ -1947,8 +1960,32 @@ class HaruhiDL(object):

                    def compatible_formats(formats):
                        video, audio = formats
-                        # Check extension
+                        # Check extensions and codecs
                        video_ext, audio_ext = video.get('ext'), audio.get('ext')
+                        video_codec, audio_codec = video.get('vcodec'), audio.get('acodec')
+
+                        if video_codec and audio_codec:
+                            COMPATIBLE_CODECS = {
+                                'mp4': (
+                                    # fourcc (m3u8, mpd)
+                                    'av01', 'hevc', 'avc1', 'mp4a',
+                                    # whatever the ism does
+                                    'h264', 'aacl',
+                                ),
+                                'webm': (
+                                    'av01', 'vp9', 'vp8', 'opus', 'vrbs',
+                                    # these are in the webm spec, so putting it here to be sure
+                                    'vp9x', 'vp8x',
+                                ),
+                            }
+                            video_codec = video_codec[:4].lower()
+                            audio_codec = audio_codec[:4].lower()
+                            for ext in COMPATIBLE_CODECS:
+                                if all(codec in COMPATIBLE_CODECS[ext]
+                                       for codec in (video_codec, audio_codec)):
+                                    info_dict['ext'] = ext
+                                    return True
+
                        if video_ext and audio_ext:
                            COMPATIBLE_EXTS = (
                                ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
@ -1957,7 +1994,6 @@ class HaruhiDL(object):
                            for exts in COMPATIBLE_EXTS:
                                if video_ext in exts and audio_ext in exts:
                                    return True
-                        # TODO: Check acodec/vcodec
                        return False

                    filename_real_ext = os.path.splitext(filename)[1][1:]
--- a/haruhi_dl/init.py
+++ b/haruhi_dl/init.py
@ -176,6 +176,10 @@ def _real_main(argv=None):
        opts.max_sleep_interval = opts.sleep_interval
    if opts.ap_mso and opts.ap_mso not in MSO_INFO:
        parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
+    if opts.force_generic_extractor and opts.force_use_mastodon:
+        parser.error('force either generic extractor or Mastodon')
+    if opts.force_playwright_browser not in ('firefox', 'chromium', 'webkit', None):
+        parser.error('invalid browser forced, must be on of: firefox, chromium, webkit')

    def parse_retries(retries):
        if retries in ('inf', 'infinite'):
@ -348,6 +352,7 @@ def _real_main(argv=None):
        'restrictfilenames': opts.restrictfilenames,
        'ignoreerrors': opts.ignoreerrors,
        'force_generic_extractor': opts.force_generic_extractor,
+        'force_use_mastodon': opts.force_use_mastodon,
        'ie_key': opts.ie_key,
        'ratelimit': opts.ratelimit,
        'nooverwrites': opts.nooverwrites,
@ -421,6 +426,7 @@ def _real_main(argv=None):
        'headless_playwright': opts.headless_playwright,
        'sleep_interval': opts.sleep_interval,
        'max_sleep_interval': opts.max_sleep_interval,
+        'force_playwright_browser': opts.force_playwright_browser,
        'external_downloader': opts.external_downloader,
        'list_thumbnails': opts.list_thumbnails,
        'playlist_items': opts.playlist_items,
--- a/haruhi_dl/compat.py
+++ b/haruhi_dl/compat.py
@ -28,6 +28,7 @@ import urllib.response as compat_urllib_response
 import http.cookiejar as compat_cookiejar
 compat_cookiejar_Cookie = compat_cookiejar.Cookie
 import http.cookies as compat_cookies
+compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
 import html.entities as compat_html_entities
 compat_html_entities_html5 = compat_html_entities.html5
 import http.client as compat_http_client
@ -155,6 +156,7 @@ __all__ = [
    'compat_cookiejar',
    'compat_cookiejar_Cookie',
    'compat_cookies',
+    'compat_cookies_SimpleCookie',
    'compat_ctypes_WINFUNCTYPE',
    'compat_etree_Element',
    'compat_etree_fromstring',
--- a/haruhi_dl/downloader/init.py
+++ b/haruhi_dl/downloader/init.py
@ -1,5 +1,18 @@
 from __future__ import unicode_literals

+from ..utils import (
+    determine_protocol,
+)
+
+
+def _get_real_downloader(info_dict, protocol=None, *args, **kwargs):
+    info_copy = info_dict.copy()
+    if protocol:
+        info_copy['protocol'] = protocol
+    return get_suitable_downloader(info_copy, *args, **kwargs)
+
+
+# Some of these require _get_real_downloader
 from .common import FileDownloader
 from .f4m import F4mFD
 from .hls import HlsFD
@ -8,16 +21,13 @@ from .rtmp import RtmpFD
 from .dash import DashSegmentsFD
 from .rtsp import RtspFD
 from .ism import IsmFD
+from .niconico import NiconicoDmcFD
 from .external import (
    get_external_downloader,
    Aria2cFD,
    FFmpegFD,
 )

-from ..utils import (
-    determine_protocol,
-)
-
 PROTOCOL_MAP = {
    'rtmp': RtmpFD,
    'm3u8_native': HlsFD,
@ -28,6 +38,7 @@ PROTOCOL_MAP = {
    'http_dash_segments': DashSegmentsFD,
    'ism': IsmFD,
    'bittorrent': Aria2cFD,
+    'niconico_dmc': NiconicoDmcFD,
 }


--- a/haruhi_dl/downloader/niconico.py
+++ b/haruhi_dl/downloader/niconico.py
@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import threading
+
+from .common import FileDownloader
+from ..downloader import _get_real_downloader
+from ..extractor.niconico import NiconicoIE
+from ..compat import compat_urllib_request
+
+
+class NiconicoDmcFD(FileDownloader):
+    """ Downloading niconico douga from DMC with heartbeat """
+
+    FD_NAME = 'niconico_dmc'
+
+    def real_download(self, filename, info_dict):
+        self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
+
+        ie = NiconicoIE(self.hdl)
+        info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
+
+        fd = _get_real_downloader(info_dict, params=self.params)(self.hdl, self.params)
+
+        success = download_complete = False
+        timer = [None]
+        heartbeat_lock = threading.Lock()
+        heartbeat_url = heartbeat_info_dict['url']
+        heartbeat_data = heartbeat_info_dict['data'].encode()
+        heartbeat_interval = heartbeat_info_dict.get('interval', 30)
+
+        def heartbeat():
+            try:
+                compat_urllib_request.urlopen(url=heartbeat_url, data=heartbeat_data)
+            except Exception:
+                self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
+
+            with heartbeat_lock:
+                if not download_complete:
+                    timer[0] = threading.Timer(heartbeat_interval, heartbeat)
+                    timer[0].start()
+
+        heartbeat_info_dict['ping']()
+        self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
+        try:
+            heartbeat()
+            if type(fd).__name__ == 'HlsFD':
+                info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
+            success = fd.real_download(filename, info_dict)
+        finally:
+            if heartbeat_lock:
+                with heartbeat_lock:
+                    timer[0].cancel()
+                    download_complete = True
+            return success
--- a/haruhi_dl/extractor/appleconnect.py
+++ b/haruhi_dl/extractor/appleconnect.py
@ -9,10 +9,10 @@ from ..utils import (


 class AppleConnectIE(InfoExtractor):
-    _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
-    _TEST = {
+    _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)'
+    _TESTS = [{
        'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
-        'md5': 'e7c38568a01ea45402570e6029206723',
+        'md5': 'c1d41f72c8bcaf222e089434619316e4',
        'info_dict': {
            'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
            'ext': 'm4v',
@ -22,7 +22,10 @@ class AppleConnectIE(InfoExtractor):
            'upload_date': '20150710',
            'timestamp': 1436545535,
        },
-    }
+    }, {
+        'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9',
+        'only_matching': True,
+    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -36,7 +39,7 @@ class AppleConnectIE(InfoExtractor):

        video_data = self._parse_json(video_json, video_id)
        timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
-        like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
+        like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None))

        return {
            'id': video_id,
--- a/haruhi_dl/extractor/ard.py
+++ b/haruhi_dl/extractor/ard.py
@ -249,14 +249,14 @@ class ARDMediathekIE(ARDMediathekBaseIE):


 class ARDIE(InfoExtractor):
-    _VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?:video-?)?(?P<id>[0-9]+))\.html'
+    _VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html'
    _TESTS = [{
        # available till 7.01.2022
        'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
        'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
        'info_dict': {
-            'display_id': 'maischberger-die-woche',
-            'id': '100',
+            'id': 'maischberger-die-woche-video100',
+            'display_id': 'maischberger-die-woche-video100',
            'ext': 'mp4',
            'duration': 3687.0,
            'title': 'maischberger. die woche vom 7. Januar 2021',
@ -264,16 +264,25 @@ class ARDIE(InfoExtractor):
            'thumbnail': r're:^https?://.*\.jpg$',
        },
    }, {
-        'url': 'https://www.daserste.de/information/reportage-dokumentation/erlebnis-erde/videosextern/woelfe-und-herdenschutzhunde-ungleiche-brueder-102.html',
+        'url': 'https://www.daserste.de/information/politik-weltgeschehen/morgenmagazin/videosextern/dominik-kahun-aus-der-nhl-direkt-zur-weltmeisterschaft-100.html',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.daserste.de/information/nachrichten-wetter/tagesthemen/videosextern/tagesthemen-17736.html',
        'only_matching': True,
    }, {
        'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
        'only_matching': True,
+    }, {
+        'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/Drehpause-100.html',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.daserste.de/unterhaltung/film/filmmittwoch-im-ersten/videos/making-ofwendezeit-video-100.html',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
+        display_id = mobj.group('id')

        player_url = mobj.group('mainurl') + '~playerXml.xml'
        doc = self._download_xml(player_url, display_id)
@ -324,7 +333,7 @@ class ARDIE(InfoExtractor):
        self._sort_formats(formats)

        return {
-            'id': mobj.group('id'),
+            'id': xpath_text(video_node, './videoId', default=display_id),
            'formats': formats,
            'display_id': display_id,
            'title': video_node.find('./title').text,
--- a/haruhi_dl/extractor/arnes.py
+++ b/haruhi_dl/extractor/arnes.py
@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_parse_qs,
+    compat_urllib_parse_urlparse,
+)
+from ..utils import (
+    float_or_none,
+    int_or_none,
+    parse_iso8601,
+    remove_start,
+)
+
+
+class ArnesIE(InfoExtractor):
+    IE_NAME = 'video.arnes.si'
+    IE_DESC = 'Arnes Video'
+    _VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch|embed|api/(?:asset|public/video))/(?P<id>[0-9a-zA-Z]{12})'
+    _TESTS = [{
+        'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
+        'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
+        'info_dict': {
+            'id': 'a1qrWTOQfVoU',
+            'ext': 'mp4',
+            'title': 'Linearna neodvisnost, definicija',
+            'description': 'Linearna neodvisnost, definicija',
+            'license': 'PRIVATE',
+            'creator': 'Polona Oblak',
+            'timestamp': 1585063725,
+            'upload_date': '20200324',
+            'channel': 'Polona Oblak',
+            'channel_id': 'q6pc04hw24cj',
+            'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
+            'duration': 596.75,
+            'view_count': int,
+            'tags': ['linearna_algebra'],
+            'start_time': 10,
+        }
+    }, {
+        'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
+        'only_matching': True,
+    }]
+    _BASE_URL = 'https://video.arnes.si'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        video = self._download_json(
+            self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
+        title = video['title']
+
+        formats = []
+        for media in (video.get('media') or []):
+            media_url = media.get('url')
+            if not media_url:
+                continue
+            formats.append({
+                'url': self._BASE_URL + media_url,
+                'format_id': remove_start(media.get('format'), 'FORMAT_'),
+                'format_note': media.get('formatTranslation'),
+                'width': int_or_none(media.get('width')),
+                'height': int_or_none(media.get('height')),
+            })
+        self._sort_formats(formats)
+
+        channel = video.get('channel') or {}
+        channel_id = channel.get('url')
+        thumbnail = video.get('thumbnailUrl')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': self._BASE_URL + thumbnail,
+            'description': video.get('description'),
+            'license': video.get('license'),
+            'creator': video.get('author'),
+            'timestamp': parse_iso8601(video.get('creationTime')),
+            'channel': channel.get('name'),
+            'channel_id': channel_id,
+            'channel_url': self._BASE_URL + '/?channel=' + channel_id if channel_id else None,
+            'duration': float_or_none(video.get('duration'), 1000),
+            'view_count': int_or_none(video.get('views')),
+            'tags': video.get('hashtags'),
+            'start_time': int_or_none(compat_parse_qs(
+                compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
+        }
--- a/haruhi_dl/extractor/bbc.py
+++ b/haruhi_dl/extractor/bbc.py
@ -11,6 +11,7 @@ from ..compat import (
    compat_etree_Element,
    compat_HTTPError,
    compat_parse_qs,
+    compat_str,
    compat_urllib_parse_urlparse,
    compat_urlparse,
 )
@ -25,8 +26,10 @@ from ..utils import (
    js_to_json,
    parse_duration,
    parse_iso8601,
+    strip_or_none,
    try_get,
    unescapeHTML,
+    unified_timestamp,
    url_or_none,
    urlencode_postdata,
    urljoin,
@ -761,8 +764,17 @@ class BBCIE(BBCCoUkIE):
        'only_matching': True,
    }, {
        # custom redirection to www.bbc.com
+        # also, video with window.__INITIAL_DATA__
        'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
-        'only_matching': True,
+        'info_dict': {
+            'id': 'p02xzws1',
+            'ext': 'mp4',
+            'title': "Pluto may have 'nitrogen glaciers'",
+            'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
+            'thumbnail': r're:https?://.+/.+\.jpg',
+            'timestamp': 1437785037,
+            'upload_date': '20150725',
+        },
    }, {
        # single video article embedded with data-media-vpid
        'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
@ -1164,12 +1176,29 @@ class BBCIE(BBCCoUkIE):
                        continue
                    formats, subtitles = self._download_media_selector(item_id)
                    self._sort_formats(formats)
+                    item_desc = None
+                    blocks = try_get(media, lambda x: x['summary']['blocks'], list)
+                    if blocks:
+                        summary = []
+                        for block in blocks:
+                            text = try_get(block, lambda x: x['model']['text'], compat_str)
+                            if text:
+                                summary.append(text)
+                        if summary:
+                            item_desc = '\n\n'.join(summary)
+                    item_time = None
+                    for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
+                        if try_get(meta, lambda x: x['label']) == 'Published':
+                            item_time = unified_timestamp(meta.get('timestamp'))
+                            break
                    entries.append({
                        'id': item_id,
                        'title': item_title,
                        'thumbnail': item.get('holdingImageUrl'),
                        'formats': formats,
                        'subtitles': subtitles,
+                        'timestamp': item_time,
+                        'description': strip_or_none(item_desc),
                    })
            for resp in (initial_data.get('data') or {}).values():
                name = resp.get('name')
--- a/haruhi_dl/extractor/bilibili.py
+++ b/haruhi_dl/extractor/bilibili.py
@ -233,7 +233,7 @@ class BiliBiliIE(InfoExtractor):
            webpage)
        if uploader_mobj:
            info.update({
-                'uploader': uploader_mobj.group('name'),
+                'uploader': uploader_mobj.group('name').strip(),
                'uploader_id': uploader_mobj.group('id'),
            })
        if not info.get('uploader'):
--- a/haruhi_dl/extractor/blinkx.py
+++ b/haruhi_dl/extractor/blinkx.py
@ -1,86 +0,0 @@
-from __future__ import unicode_literals
-
-import json
-
-from .common import InfoExtractor
-from ..utils import (
-    remove_start,
-    int_or_none,
-)
-
-
-class BlinkxIE(InfoExtractor):
-    _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
-    IE_NAME = 'blinkx'
-
-    _TEST = {
-        'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
-        'md5': '337cf7a344663ec79bf93a526a2e06c7',
-        'info_dict': {
-            'id': 'Da0Gw3xc',
-            'ext': 'mp4',
-            'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
-            'uploader': 'IGN News',
-            'upload_date': '20150217',
-            'timestamp': 1424215740,
-            'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
-            'duration': 47.743333,
-        },
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        display_id = video_id[:8]
-
-        api_url = ('https://apib4.blinkx.com/api.php?action=play_video&'
-                   + 'video=%s' % video_id)
-        data_json = self._download_webpage(api_url, display_id)
-        data = json.loads(data_json)['api']['results'][0]
-        duration = None
-        thumbnails = []
-        formats = []
-        for m in data['media']:
-            if m['type'] == 'jpg':
-                thumbnails.append({
-                    'url': m['link'],
-                    'width': int(m['w']),
-                    'height': int(m['h']),
-                })
-            elif m['type'] == 'original':
-                duration = float(m['d'])
-            elif m['type'] == 'youtube':
-                yt_id = m['link']
-                self.to_screen('Youtube video detected: %s' % yt_id)
-                return self.url_result(yt_id, 'Youtube', video_id=yt_id)
-            elif m['type'] in ('flv', 'mp4'):
-                vcodec = remove_start(m['vcodec'], 'ff')
-                acodec = remove_start(m['acodec'], 'ff')
-                vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
-                abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
-                tbr = vbr + abr if vbr and abr else None
-                format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
-                formats.append({
-                    'format_id': format_id,
-                    'url': m['link'],
-                    'vcodec': vcodec,
-                    'acodec': acodec,
-                    'abr': abr,
-                    'vbr': vbr,
-                    'tbr': tbr,
-                    'width': int_or_none(m.get('w')),
-                    'height': int_or_none(m.get('h')),
-                })
-
-        self._sort_formats(formats)
-
-        return {
-            'id': display_id,
-            'fullid': video_id,
-            'title': data['title'],
-            'formats': formats,
-            'uploader': data['channel_name'],
-            'timestamp': data['pubdate_epoch'],
-            'description': data.get('description'),
-            'thumbnails': thumbnails,
-            'duration': duration,
-        }
--- a/haruhi_dl/extractor/castos.py
+++ b/haruhi_dl/extractor/castos.py
@ -0,0 +1,91 @@
+# coding: utf-8
+
+from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+)
+
+import re
+
+
+class CastosHostedIE(InfoExtractor):
+    _VALID_URL = r'https?://[^/.]+\.castos\.com/(?:player|episodes)/(?P<id>[\da-zA-Z-]+)'
+    IE_NAME = 'castos:hosted'
+
+    _TESTS = [{
+        'url': 'https://audience.castos.com/player/408278',
+        'info_dict': {
+            'id': '408278',
+            'ext': 'mp3',
+        },
+    }, {
+        'url': 'https://audience.castos.com/episodes/improve-your-podcast-production',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _extract_urls(webpage, **kw):
+        return [mobj.group(1) for mobj
+                in re.finditer(
+                    r'<iframe\b[^>]+(?<!-)src="(https?://[^/.]+\.castos\.com/player/\d+)',
+                    webpage)]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        series = self._html_search_regex(
+            r'<div class="show">\s+<strong>([^<]+)</strong>', webpage, 'series name')
+        title = self._html_search_regex(
+            r'<div class="episode-title">([^<]+)</div>', webpage, 'episode title')
+
+        audio_url = self._html_search_regex(
+            r'<audio class="clip">\s+<source\b[^>]+src="(https?://[^"]+)"', webpage, 'audio url')
+        duration = parse_duration(self._search_regex(
+            r'<time id="duration">(\d\d(?::\d\d)+)</time>', webpage, 'duration'))
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': audio_url,
+            'duration': duration,
+            'series': series,
+            'episode': title,
+        }
+
+
+class CastosSSPIE(InfoExtractor):
+    @classmethod
+    def _extract_entries(self, webpage, **kw):
+        entries = []
+        for found in re.finditer(
+                r'(?s)<div class="castos-player[^"]*"[^>]*data-episode="(\d+)-[a-z\d]+">(.+?</nav>)\s*</div>',
+                webpage):
+            video_id, entry = found.group(1, 2)
+
+            def search_entry(regex):
+                res = re.search(regex, entry)
+                if res:
+                    return res.group(1)
+
+            series = search_entry(r'<div class="show">\s+<strong>([^<]+)</strong>')
+            title = search_entry(r'<div class="episode-title">([^<]+)</div>')
+
+            audio_url = search_entry(
+                r'<audio class="clip[^"]*">\s+<source\b[^>]+src="(https?://[^"]+)"')
+            duration = parse_duration(
+                search_entry(r'<time id="duration[^"]*">(\d\d(?::\d\d)+)</time>'))
+
+            if not title or not audio_url:
+                continue
+
+            entries.append({
+                'id': video_id,
+                'title': title,
+                'url': audio_url,
+                'duration': duration,
+                'series': series,
+                'episode': title,
+            })
+        return entries
--- a/haruhi_dl/extractor/cbsnews.py
+++ b/haruhi_dl/extractor/cbsnews.py
@ -26,7 +26,7 @@ class CBSNewsEmbedIE(CBSIE):
    def _real_extract(self, url):
        item = self._parse_json(zlib.decompress(compat_b64decode(
            compat_urllib_parse_unquote(self._match_id(url))),
-            -zlib.MAX_WBITS), None)['video']['items'][0]
+            -zlib.MAX_WBITS).decode('utf-8'), None)['video']['items'][0]
        return self._extract_video_info(item['mpxRefId'], 'cbsnews')


--- a/haruhi_dl/extractor/cbssports.py
+++ b/haruhi_dl/extractor/cbssports.py
@ -1,38 +1,113 @@
 from __future__ import unicode_literals

-from .cbs import CBSBaseIE
+import re
+
+# from .cbs import CBSBaseIE
+from .common import InfoExtractor
+from ..utils import (
+    int_or_none,
+    try_get,
+)


-class CBSSportsIE(CBSBaseIE):
-    _VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/(?:video|news)/(?P<id>[^/?#&]+)'
-
+# class CBSSportsEmbedIE(CBSBaseIE):
+class CBSSportsEmbedIE(InfoExtractor):
+    IE_NAME = 'cbssports:embed'
+    _VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
+        (?:
+            ids%3D(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})|
+            pcid%3D(?P<pcid>\d+)
+        )'''
    _TESTS = [{
-        'url': 'https://www.cbssports.com/nba/video/donovan-mitchell-flashes-star-potential-in-game-2-victory-over-thunder/',
-        'info_dict': {
-            'id': '1214315075735',
-            'ext': 'mp4',
-            'title': 'Donovan Mitchell flashes star potential in Game 2 victory over Thunder',
-            'description': 'md5:df6f48622612c2d6bd2e295ddef58def',
-            'timestamp': 1524111457,
-            'upload_date': '20180419',
-            'uploader': 'CBSI-NEW',
-        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        }
+        'url': 'https://www.cbssports.com/player/embed/?args=player_id%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26ids%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26resizable%3D1%26autoplay%3Dtrue%26domain%3Dcbssports.com%26comp_ads_enabled%3Dfalse%26watchAndRead%3D0%26startTime%3D0%26env%3Dprod',
+        'only_matching': True,
    }, {
-        'url': 'https://www.cbssports.com/nba/news/nba-playoffs-2018-watch-76ers-vs-heat-game-3-series-schedule-tv-channel-online-stream/',
+        'url': 'https://embed.247sports.com/player/embed/?args=%3fplayer_id%3d1827823171591%26channel%3dcollege-football-recruiting%26pcid%3d1827823171591%26width%3d640%26height%3d360%26autoplay%3dTrue%26comp_ads_enabled%3dFalse%26uvpc%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_v4%2526partner%253d247%26uvpc_m%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_m_v4%2526partner_m%253d247_mobile%26utag%3d247sportssite%26resizable%3dTrue',
        'only_matching': True,
    }]

-    def _extract_video_info(self, filter_query, video_id):
-        return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
+    # def _extract_video_info(self, filter_query, video_id):
+    #     return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)

+    def _real_extract(self, url):
+        uuid, pcid = re.match(self._VALID_URL, url).groups()
+        query = {'id': uuid} if uuid else {'pcid': pcid}
+        video = self._download_json(
+            'https://www.cbssports.com/api/content/video/',
+            uuid or pcid, query=query)[0]
+        video_id = video['id']
+        title = video['title']
+        metadata = video.get('metaData') or {}
+        # return self._extract_video_info('byId=%d' % metadata['mpxOutletId'], video_id)
+        # return self._extract_video_info('byGuid=' + metadata['mpxRefId'], video_id)
+
+        formats = self._extract_m3u8_formats(
+            metadata['files'][0]['url'], video_id, 'mp4',
+            'm3u8_native', m3u8_id='hls', fatal=False)
+        self._sort_formats(formats)
+
+        image = video.get('image')
+        thumbnails = None
+        if image:
+            image_path = image.get('path')
+            if image_path:
+                thumbnails = [{
+                    'url': image_path,
+                    'width': int_or_none(image.get('width')),
+                    'height': int_or_none(image.get('height')),
+                    'filesize': int_or_none(image.get('size')),
+                }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'thumbnails': thumbnails,
+            'description': video.get('description'),
+            'timestamp': int_or_none(try_get(video, lambda x: x['dateCreated']['epoch'])),
+            'duration': int_or_none(metadata.get('duration')),
+        }
+
+
+class CBSSportsBaseIE(InfoExtractor):
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
-        video_id = self._search_regex(
-            [r'(?:=|%26)pcid%3D(\d+)', r'embedVideo(?:Container)?_(\d+)'],
-            webpage, 'video id')
-        return self._extract_video_info('byId=%s' % video_id, video_id)
+        iframe_url = self._search_regex(
+            r'<iframe[^>]+(?:data-)?src="(https?://[^/]+/player/embed[^"]+)"',
+            webpage, 'embed url')
+        return self.url_result(iframe_url, CBSSportsEmbedIE.ie_key())
+
+
+class CBSSportsIE(CBSSportsBaseIE):
+    IE_NAME = 'cbssports'
+    _VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://www.cbssports.com/college-football/video/cover-3-stanford-spring-gleaning/',
+        'info_dict': {
+            'id': 'b56c03a6-231a-4bbe-9c55-af3c8a8e9636',
+            'ext': 'mp4',
+            'title': 'Cover 3: Stanford Spring Gleaning',
+            'description': 'The Cover 3 crew break down everything you need to know about the Stanford Cardinal this spring.',
+            'timestamp': 1617218398,
+            'upload_date': '20210331',
+            'duration': 502,
+        },
+    }]
+
+
+class TwentyFourSevenSportsIE(CBSSportsBaseIE):
+    IE_NAME = '247sports'
+    _VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://247sports.com/Video/2021-QB-Jake-Garcia-senior-highlights-through-five-games-10084854/',
+        'info_dict': {
+            'id': '4f1265cb-c3b5-44a8-bb1d-1914119a0ccc',
+            'ext': 'mp4',
+            'title': '2021 QB Jake Garcia senior highlights through five games',
+            'description': 'md5:8cb67ebed48e2e6adac1701e0ff6e45b',
+            'timestamp': 1607114223,
+            'upload_date': '20201204',
+            'duration': 208,
+        },
+    }]
--- a/haruhi_dl/extractor/cda.py
+++ b/haruhi_dl/extractor/cda.py
@ -126,9 +126,6 @@ class CDAIE(CDABaseExtractor):
        metadata = self._download_json(
            self._BASE_URL + '/video/' + video_id, video_id, headers=headers)['video']

-        if metadata.get('premium') is True and metadata.get('premium_free') is not True:
-            raise ExtractorError('This video is only available for premium users.', expected=True)
-
        uploader = try_get(metadata, lambda x: x['author']['login'])
        # anonymous uploader
        if uploader == 'anonim':
@ -136,6 +133,8 @@ class CDAIE(CDABaseExtractor):

        formats = []
        for quality in metadata['qualities']:
+            if not quality['file']:
+                continue
            formats.append({
                'url': quality['file'],
                'format': quality['title'],
@ -144,6 +143,13 @@ class CDAIE(CDABaseExtractor):
                'filesize': quality.get('length'),
            })

+        if not formats:
+            if metadata.get('premium') is True and metadata.get('premium_free') is not True:
+                raise ExtractorError('This video is only available for premium users.', expected=True)
+            raise ExtractorError('No video qualities found', video_id=video_id)
+
+        self._sort_formats(formats)
+
        return {
            'id': video_id,
            'title': metadata['title'],
--- a/haruhi_dl/extractor/common.py
+++ b/haruhi_dl/extractor/common.py
@ -17,7 +17,7 @@ import math

 from ..compat import (
    compat_cookiejar_Cookie,
-    compat_cookies,
+    compat_cookies_SimpleCookie,
    compat_etree_Element,
    compat_etree_fromstring,
    compat_getpass,
@ -70,6 +70,7 @@ from ..utils import (
    str_or_none,
    str_to_int,
    strip_or_none,
+    try_get,
    unescapeHTML,
    unified_strdate,
    unified_timestamp,
@ -204,6 +205,14 @@ class InfoExtractor(object):
                    * downloader_options  A dictionary of downloader options as
                                 described in FileDownloader

+                    Internally, extractors can include subtitles in the format
+                    list, in this format:
+                    * _subtitle  The subtitle object, in the same format
+                                 as in subtitles field
+                    * _key       The tag for the provided subtitle
+                    This is never included in the output JSON, but moved
+                    into the subtitles field.
+
    url:            Final video URL.
    ext:            Video filename extension.
    format:         The video format, defaults to ext (used for --get-format)
@ -247,11 +256,15 @@ class InfoExtractor(object):
    subtitles:      The available subtitles as a dictionary in the format
                    {tag: subformats}. "tag" is usually a language code, and
                    "subformats" is a list sorted from lower to higher
-                    preference, each element is a dictionary with the "ext"
-                    entry and one of:
+                    preference, each element is a dictionary,
+                    which must contain one of these values:
                        * "data": The subtitles file contents
                        * "url": A URL pointing to the subtitles file
-                    "ext" will be calculated from URL if missing
+                    These values are guessed based on other data, if missing,
+                    in a way analogic to the formats data:
+                        * "ext" - subtitle extension name (vtt, srt, ...)
+                        * "proto" - download protocol (https, http, m3u8, ...)
+                        * "http_headers"
    automatic_captions: Like 'subtitles', used by the YoutubeIE for
                    automatically generated captions
    duration:       Length of the video in seconds, as an integer or float.
@ -1275,6 +1288,23 @@ class InfoExtractor(object):
                    continue
                info[count_key] = interaction_count

+        def extract_author(e):
+            if not e:
+                return None
+            if not e.get('author'):
+                return None
+            e = e['author']
+            if isinstance(e, str):
+                info['uploader'] = e
+            elif isinstance(e, dict):
+                etype = e.get('@type')
+                if etype in ('Person', 'Organization'):
+                    info.update({
+                        'uploader': e.get('name'),
+                        'uploader_id': e.get('identifier'),
+                        'uploader_url': try_get(e, lambda x: x['url']['url'], str),
+                    })
+
        media_object_types = ('MediaObject', 'VideoObject', 'AudioObject', 'MusicVideoObject')

        def extract_media_object(e):
@ -1292,7 +1322,6 @@ class InfoExtractor(object):
                'thumbnails': thumbnails,
                'duration': parse_duration(e.get('duration')),
                'timestamp': unified_timestamp(e.get('uploadDate')),
-                'uploader': str_or_none(e.get('author')),
                'filesize': float_or_none(e.get('contentSize')),
                'tbr': int_or_none(e.get('bitrate')),
                'width': int_or_none(e.get('width')),
@ -1300,6 +1329,7 @@ class InfoExtractor(object):
                'view_count': int_or_none(e.get('interactionCount')),
            })
            extract_interaction_statistic(e)
+            extract_author(e)

        for e in json_ld:
            if '@context' in e:
@ -2952,10 +2982,10 @@ class InfoExtractor(object):
        self._downloader.cookiejar.set_cookie(cookie)

    def _get_cookies(self, url):
-        """ Return a compat_cookies.SimpleCookie with the cookies for the url """
+        """ Return a compat_cookies_SimpleCookie with the cookies for the url """
        req = sanitized_Request(url)
        self._downloader.cookiejar.add_cookie_header(req)
-        return compat_cookies.SimpleCookie(req.get_header('Cookie'))
+        return compat_cookies_SimpleCookie(req.get_header('Cookie'))

    def _apply_first_set_cookie_header(self, url_handle, cookie):
        """
--- a/haruhi_dl/extractor/curiositystream.py
+++ b/haruhi_dl/extractor/curiositystream.py
@ -25,12 +25,12 @@ class CuriosityStreamBaseIE(InfoExtractor):
            raise ExtractorError(
                '%s said: %s' % (self.IE_NAME, error), expected=True)

-    def _call_api(self, path, video_id):
+    def _call_api(self, path, video_id, query=None):
        headers = {}
        if self._auth_token:
            headers['X-Auth-Token'] = self._auth_token
        result = self._download_json(
-            self._API_BASE_URL + path, video_id, headers=headers)
+            self._API_BASE_URL + path, video_id, headers=headers, query=query)
        self._handle_errors(result)
        return result['data']

@ -52,62 +52,75 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
    _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
    _TEST = {
        'url': 'https://app.curiositystream.com/video/2',
-        'md5': '262bb2f257ff301115f1973540de8983',
        'info_dict': {
            'id': '2',
            'ext': 'mp4',
            'title': 'How Did You Develop The Internet?',
            'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
-        }
+        },
+        'params': {
+            'format': 'bestvideo',
+            # m3u8 download
+            'skip_download': True,
+        },
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
-        media = self._call_api('media/' + video_id, video_id)
-        title = media['title']

        formats = []
-        for encoding in media.get('encodings', []):
-            m3u8_url = encoding.get('master_playlist_url')
-            if m3u8_url:
-                formats.extend(self._extract_m3u8_formats(
-                    m3u8_url, video_id, 'mp4', 'm3u8_native',
-                    m3u8_id='hls', fatal=False))
-            encoding_url = encoding.get('url')
-            file_url = encoding.get('file_url')
-            if not encoding_url and not file_url:
-                continue
-            f = {
-                'width': int_or_none(encoding.get('width')),
-                'height': int_or_none(encoding.get('height')),
-                'vbr': int_or_none(encoding.get('video_bitrate')),
-                'abr': int_or_none(encoding.get('audio_bitrate')),
-                'filesize': int_or_none(encoding.get('size_in_bytes')),
-                'vcodec': encoding.get('video_codec'),
-                'acodec': encoding.get('audio_codec'),
-                'container': encoding.get('container_type'),
-            }
-            for f_url in (encoding_url, file_url):
-                if not f_url:
+        for encoding_format in ('m3u8', 'mpd'):
+            media = self._call_api('media/' + video_id, video_id, query={
+                'encodingsNew': 'true',
+                'encodingsFormat': encoding_format,
+            })
+            for encoding in media.get('encodings', []):
+                playlist_url = encoding.get('master_playlist_url')
+                if encoding_format == 'm3u8':
+                    # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
+                    formats.extend(self._extract_m3u8_formats(
+                        playlist_url, video_id, 'mp4',
+                        m3u8_id='hls', fatal=False))
+                elif encoding_format == 'mpd':
+                    formats.extend(self._extract_mpd_formats(
+                        playlist_url, video_id, mpd_id='dash', fatal=False))
+                encoding_url = encoding.get('url')
+                file_url = encoding.get('file_url')
+                if not encoding_url and not file_url:
                    continue
-                fmt = f.copy()
-                rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
-                if rtmp:
-                    fmt.update({
-                        'url': rtmp.group('url'),
-                        'play_path': rtmp.group('playpath'),
-                        'app': rtmp.group('app'),
-                        'ext': 'flv',
-                        'format_id': 'rtmp',
-                    })
-                else:
-                    fmt.update({
-                        'url': f_url,
-                        'format_id': 'http',
-                    })
-                formats.append(fmt)
+                f = {
+                    'width': int_or_none(encoding.get('width')),
+                    'height': int_or_none(encoding.get('height')),
+                    'vbr': int_or_none(encoding.get('video_bitrate')),
+                    'abr': int_or_none(encoding.get('audio_bitrate')),
+                    'filesize': int_or_none(encoding.get('size_in_bytes')),
+                    'vcodec': encoding.get('video_codec'),
+                    'acodec': encoding.get('audio_codec'),
+                    'container': encoding.get('container_type'),
+                }
+                for f_url in (encoding_url, file_url):
+                    if not f_url:
+                        continue
+                    fmt = f.copy()
+                    rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
+                    if rtmp:
+                        fmt.update({
+                            'url': rtmp.group('url'),
+                            'play_path': rtmp.group('playpath'),
+                            'app': rtmp.group('app'),
+                            'ext': 'flv',
+                            'format_id': 'rtmp',
+                        })
+                    else:
+                        fmt.update({
+                            'url': f_url,
+                            'format_id': 'http',
+                        })
+                    formats.append(fmt)
        self._sort_formats(formats)

+        title = media['title']
+
        subtitles = {}
        for closed_caption in media.get('closed_captions', []):
            sub_url = closed_caption.get('file')
@ -132,7 +145,7 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):

 class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
    IE_NAME = 'curiositystream:collection'
-    _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://app.curiositystream.com/collection/2',
        'info_dict': {
@ -140,10 +153,13 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
            'title': 'Curious Minds: The Internet',
            'description': 'How is the internet shaping our lives in the 21st Century?',
        },
-        'playlist_mincount': 17,
+        'playlist_mincount': 16,
    }, {
        'url': 'https://curiositystream.com/series/2',
        'only_matching': True,
+    }, {
+        'url': 'https://curiositystream.com/collections/36',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
--- a/haruhi_dl/extractor/dispeak.py
+++ b/haruhi_dl/extractor/dispeak.py
@ -32,6 +32,18 @@ class DigitallySpeakingIE(InfoExtractor):
        # From http://www.gdcvault.com/play/1013700/Advanced-Material
        'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
        'only_matching': True,
+    }, {
+        # From https://gdcvault.com/play/1016624, empty speakerVideo
+        'url': 'https://sevt.dispeak.com/ubm/gdc/online12/xml/201210-822101_1349794556671DDDD.xml',
+        'info_dict': {
+            'id': '201210-822101_1349794556671DDDD',
+            'ext': 'flv',
+            'title': 'Pre-launch - Preparing to Take the Plunge',
+        },
+    }, {
+        # From http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru, empty slideVideo
+        'url': 'http://events.digitallyspeaking.com/gdc/project25/xml/p25-miyamoto1999_1282467389849HSVB.xml',
+        'only_matching': True,
    }]

    def _parse_mp4(self, metadata):
@ -84,26 +96,20 @@ class DigitallySpeakingIE(InfoExtractor):
                'vcodec': 'none',
                'format_id': audio.get('code'),
            })
-        slide_video_path = xpath_text(metadata, './slideVideo', fatal=True)
-        formats.append({
-            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
-            'play_path': remove_end(slide_video_path, '.flv'),
-            'ext': 'flv',
-            'format_note': 'slide deck video',
-            'quality': -2,
-            'preference': -2,
-            'format_id': 'slides',
-        })
-        speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True)
-        formats.append({
-            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
-            'play_path': remove_end(speaker_video_path, '.flv'),
-            'ext': 'flv',
-            'format_note': 'speaker video',
-            'quality': -1,
-            'preference': -1,
-            'format_id': 'speaker',
-        })
+        for video_key, format_id, preference in (
+                ('slide', 'slides', -2), ('speaker', 'speaker', -1)):
+            video_path = xpath_text(metadata, './%sVideo' % video_key)
+            if not video_path:
+                continue
+            formats.append({
+                'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
+                'play_path': remove_end(video_path, '.flv'),
+                'ext': 'flv',
+                'format_note': '%s video' % video_key,
+                'quality': preference,
+                'preference': preference,
+                'format_id': format_id,
+            })
        return formats

    def _real_extract(self, url):
--- a/haruhi_dl/extractor/egghead.py
+++ b/haruhi_dl/extractor/egghead.py
@ -22,16 +22,19 @@ class EggheadBaseIE(InfoExtractor):
 class EggheadCourseIE(EggheadBaseIE):
    IE_DESC = 'egghead.io course'
    IE_NAME = 'egghead:course'
-    _VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
-    _TEST = {
+    _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:course|playlist)s/(?P<id>[^/?#&]+)'
+    _TESTS = [{
        'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
        'playlist_count': 29,
        'info_dict': {
-            'id': '72',
+            'id': '432655',
            'title': 'Professor Frisby Introduces Composable Functional JavaScript',
            'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
        },
-    }
+    }, {
+        'url': 'https://app.egghead.io/playlists/professor-frisby-introduces-composable-functional-javascript',
+        'only_matching': True,
+    }]

    def _real_extract(self, url):
        playlist_id = self._match_id(url)
@ -65,7 +68,7 @@ class EggheadCourseIE(EggheadBaseIE):
 class EggheadLessonIE(EggheadBaseIE):
    IE_DESC = 'egghead.io lesson'
    IE_NAME = 'egghead:lesson'
-    _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
        'info_dict': {
@ -88,6 +91,9 @@ class EggheadLessonIE(EggheadBaseIE):
    }, {
        'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
        'only_matching': True,
+    }, {
+        'url': 'https://app.egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
--- a/haruhi_dl/extractor/eroprofile.py
+++ b/haruhi_dl/extractor/eroprofile.py
@ -6,7 +6,7 @@ from .common import InfoExtractor
 from ..compat import compat_urllib_parse_urlencode
 from ..utils import (
    ExtractorError,
-    unescapeHTML
+    merge_dicts,
 )


@ -24,7 +24,8 @@ class EroProfileIE(InfoExtractor):
            'title': 'sexy babe softcore',
            'thumbnail': r're:https?://.*\.jpg',
            'age_limit': 18,
-        }
+        },
+        'skip': 'Video not found',
    }, {
        'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
        'md5': '1baa9602ede46ce904c431f5418d8916',
@ -77,19 +78,15 @@ class EroProfileIE(InfoExtractor):
            [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
            webpage, 'video id', default=None)

-        video_url = unescapeHTML(self._search_regex(
-            r'<source src="([^"]+)', webpage, 'video url'))
        title = self._html_search_regex(
-            r'Title:</th><td>([^<]+)</td>', webpage, 'title')
-        thumbnail = self._search_regex(
-            r'onclick="showVideoPlayer\(\)"><img src="([^"]+)',
-            webpage, 'thumbnail', fatal=False)
+            (r'Title:</th><td>([^<]+)</td>', r'<h1[^>]*>(.+?)</h1>'),
+            webpage, 'title')

-        return {
+        info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+
+        return merge_dicts(info, {
            'id': video_id,
            'display_id': display_id,
-            'url': video_url,
            'title': title,
-            'thumbnail': thumbnail,
            'age_limit': 18,
-        }
+        })
--- a/haruhi_dl/extractor/extractors.py
+++ b/haruhi_dl/extractor/extractors.py
@ -83,6 +83,7 @@ from .arte import (
    ArteTVEmbedIE,
    ArteTVPlaylistIE,
 )
+from .arnes import ArnesIE
 from .asiancrush import (
    AsianCrushIE,
    AsianCrushPlaylistIE,
@ -142,7 +143,6 @@ from .bleacherreport import (
    BleacherReportIE,
    BleacherReportCMSIE,
 )
-from .blinkx import BlinkxIE
 from .bloomberg import BloombergIE
 from .bokecc import BokeCCIE
 from .bongacams import BongaCamsIE
@ -183,6 +183,7 @@ from .carambatv import (
    CarambaTVPageIE,
 )
 from .cartoonnetwork import CartoonNetworkIE
+from .castos import CastosHostedIE
 from .cbc import (
    CBCIE,
    CBCPlayerIE,
@ -201,7 +202,11 @@ from .cbsnews import (
    CBSNewsIE,
    CBSNewsLiveVideoIE,
 )
-from .cbssports import CBSSportsIE
+from .cbssports import (
+    CBSSportsEmbedIE,
+    CBSSportsIE,
+    TwentyFourSevenSportsIE,
+)
 from .ccc import (
    CCCIE,
    CCCPlaylistIE,
@ -625,7 +630,11 @@ from .limelight import (
    LimelightChannelIE,
    LimelightChannelListIE,
 )
-from .line import LineTVIE
+from .line import (
+    LineTVIE,
+    LineLiveIE,
+    LineLiveChannelIE,
+)
 from .linkedin import (
    LinkedInPostIE,
    LinkedInLearningIE,
@ -634,10 +643,6 @@ from .linkedin import (
 from .linuxacademy import LinuxAcademyIE
 from .litv import LiTVIE
 from .livejournal import LiveJournalIE
-from .liveleak import (
-    LiveLeakIE,
-    LiveLeakEmbedIE,
-)
 from .livestream import (
    LivestreamIE,
    LivestreamOriginalIE,
@ -653,6 +658,7 @@ from .lynda import (
    LyndaCourseIE
 )
 from .m6 import M6IE
+from .magentamusik360 import MagentaMusik360IE
 from .mailru import (
    MailRuIE,
    MailRuMusicIE,
@ -664,6 +670,7 @@ from .mangomolo import (
    MangomoloLiveIE,
 )
 from .manyvids import ManyVidsIE
+from .maoritv import MaoriTVIE
 from .markiza import (
    MarkizaIE,
    MarkizaPageIE,
@ -701,6 +708,7 @@ from .minds import (
 from .ministrygrid import MinistryGridIE
 from .minoto import MinotoIE
 from .miomio import MioMioIE
+from .misskey import MisskeySHIE
 from .mit import TechTVMITIE, OCWMITIE
 from .mitele import MiTeleIE
 from .mixcloud import (
@ -819,7 +827,6 @@ from .nick import (
    NickRuIE,
 )
 from .niconico import NiconicoIE, NiconicoPlaylistIE
-from .ninateka import NinatekaIE
 from .ninecninemedia import NineCNineMediaIE
 from .ninegag import NineGagIE
 from .ninenow import NineNowIE
@ -916,6 +923,11 @@ from .packtpub import (
    PacktPubIE,
    PacktPubCourseIE,
 )
+from .palcomp3 import (
+    PalcoMP3IE,
+    PalcoMP3ArtistIE,
+    PalcoMP3VideoIE,
+)
 from .pandoratv import PandoraTVIE
 from .parliamentliveuk import ParliamentLiveUKIE
 from .patreon import PatreonIE
@ -955,6 +967,7 @@ from .platzi import (
 from .playfm import PlayFMIE
 from .playplustv import PlayPlusTVIE
 from .plays import PlaysTVIE
+from .playstuff import PlayStuffIE
 from .playtvak import PlaytvakIE
 from .playvid import PlayvidIE
 from .playwire import PlaywireIE
@ -1018,6 +1031,10 @@ from .radiode import RadioDeIE
 from .radiojavan import RadioJavanIE
 from .radiobremen import RadioBremenIE
 from .radiofrance import RadioFranceIE
+from .radiokapital import (
+    RadioKapitalIE,
+    RadioKapitalShowIE,
+)
 from .rai import (
    RaiPlayIE,
    RaiPlayLiveIE,
@ -1108,7 +1125,12 @@ from .scte import (
    SCTECourseIE,
 )
 from .seeker import SeekerIE
+from .sejmpl import (
+    SejmPlIE,
+    SejmPlVideoIE,
+)
 from .senateisvp import SenateISVPIE
+from .senatpl import SenatPlIE
 from .sendtonews import SendtoNewsIE
 from .servus import ServusIE
 from .sevenplus import SevenPlusIE
@ -1208,6 +1230,10 @@ from .spreaker import (
 )
 from .springboardplatform import SpringboardPlatformIE
 from .sprout import SproutIE
+from .spryciarze import (
+    SpryciarzeIE,
+    SpryciarzePageIE,
+)
 from .srgssr import (
    SRGSSRIE,
    SRGSSRPlayIE,
@ -1378,7 +1404,10 @@ from .tvc import (
 from .tver import TVerIE
 from .tvigle import TvigleIE
 from .tvland import TVLandIE
-from .tvn24 import TVN24IE
+from .tvn24 import (
+    TVN24IE,
+    TVN24NuviIE,
+)
 from .tvnet import TVNetIE
 from .tvnoe import TVNoeIE
 from .tvnow import (
@ -1484,6 +1513,7 @@ from .videomore import (
 )
 from .videopress import VideoPressIE
 from .videotarget import VideoTargetIE
+from .vider import ViderIE
 from .vidio import VidioIE
 from .vidlii import VidLiiIE
 from .vidme import (
@ -1590,6 +1620,10 @@ from .weibo import (
 from .weiqitv import WeiqiTVIE
 from .wistia import WistiaIE
 from .worldstarhiphop import WorldStarHipHopIE
+from .wppilot import (
+    WPPilotIE,
+    WPPilotChannelsIE,
+)
 from .wppl import WpPlIE
 from .wsj import (
    WSJIE,
--- a/haruhi_dl/extractor/facebook.py
+++ b/haruhi_dl/extractor/facebook.py
@ -521,7 +521,10 @@ class FacebookIE(InfoExtractor):
                raise ExtractorError(
                    'The video is not available, Facebook said: "%s"' % m_msg.group(1),
                    expected=True)
-            elif '>You must log in to continue' in webpage:
+            elif any(p in webpage for p in (
+                    '>You must log in to continue',
+                    'id="login_form"',
+                    'id="loginbutton"')):
                self.raise_login_required()

        if not video_data and '/watchparty/' in url:
--- a/haruhi_dl/extractor/formula1.py
+++ b/haruhi_dl/extractor/formula1.py
@ -5,29 +5,23 @@ from .common import InfoExtractor


 class Formula1IE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?formula1\.com/(?:content/fom-website/)?en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
-    _TESTS = [{
-        'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html',
-        'md5': '8c79e54be72078b26b89e0e111c0502b',
+    _VALID_URL = r'https?://(?:www\.)?formula1\.com/en/latest/video\.[^.]+\.(?P<id>\d+)\.html'
+    _TEST = {
+        'url': 'https://www.formula1.com/en/latest/video.race-highlights-spain-2016.6060988138001.html',
+        'md5': 'be7d3a8c2f804eb2ab2aa5d941c359f8',
        'info_dict': {
-            'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
+            'id': '6060988138001',
            'ext': 'mp4',
            'title': 'Race highlights - Spain 2016',
+            'timestamp': 1463332814,
+            'upload_date': '20160515',
+            'uploader_id': '6057949432001',
        },
-        'params': {
-            # m3u8 download
-            'skip_download': True,
-        },
-        'add_ie': ['Ooyala'],
-    }, {
-        'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html',
-        'only_matching': True,
-    }]
+        'add_ie': ['BrightcoveNew'],
+    }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/6057949432001/S1WMrhjlh_default/index.html?videoId=%s'

    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        ooyala_embed_code = self._search_regex(
-            r'data-videoid="([^"]+)"', webpage, 'ooyala embed code')
+        bc_id = self._match_id(url)
        return self.url_result(
-            'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code)
+            self.BRIGHTCOVE_URL_TEMPLATE % bc_id, 'BrightcoveNew', bc_id)
--- a/haruhi_dl/extractor/francetv.py
+++ b/haruhi_dl/extractor/francetv.py
@ -383,6 +383,10 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
    }, {
        'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
        'only_matching': True,
+    }, {
+        # "<figure id=" pattern (#28792)
+        'url': 'https://www.francetvinfo.fr/culture/patrimoine/incendie-de-notre-dame-de-paris/notre-dame-de-paris-de-l-incendie-de-la-cathedrale-a-sa-reconstruction_4372291.html',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
@ -400,7 +404,7 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
            (r'player\.load[^;]+src:\s*["\']([^"\']+)',
             r'id-video=([^@]+@[^"]+)',
             r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
-             r'data-id=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
+             r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
            webpage, 'video id')

        return self._make_url_result(video_id)
--- a/haruhi_dl/extractor/funimation.py
+++ b/haruhi_dl/extractor/funimation.py
@ -16,7 +16,7 @@ from ..utils import (


 class FunimationIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:[^/]+/)?shows/[^/]+/(?P<id>[^/?#&]+)'

    _NETRC_MACHINE = 'funimation'
    _TOKEN = None
@ -51,6 +51,10 @@ class FunimationIE(InfoExtractor):
    }, {
        'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
        'only_matching': True,
+    }, {
+        # with lang code
+        'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
+        'only_matching': True,
    }]

    def _login(self):
--- a/haruhi_dl/extractor/gdcvault.py
+++ b/haruhi_dl/extractor/gdcvault.py
@ -6,6 +6,7 @@ from .common import InfoExtractor
 from .kaltura import KalturaIE
 from ..utils import (
    HEADRequest,
+    remove_start,
    sanitized_Request,
    smuggle_url,
    urlencode_postdata,
@ -102,6 +103,26 @@ class GDCVaultIE(InfoExtractor):
                'format': 'mp4-408',
            },
        },
+        {
+            # Kaltura embed, whitespace between quote and embedded URL in iframe's src
+            'url': 'https://www.gdcvault.com/play/1025699',
+            'info_dict': {
+                'id': '0_zagynv0a',
+                'ext': 'mp4',
+                'title': 'Tech Toolbox',
+                'upload_date': '20190408',
+                'uploader_id': 'joe@blazestreaming.com',
+                'timestamp': 1554764629,
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
+        {
+            # HTML5 video
+            'url': 'http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru',
+            'only_matching': True,
+        },
    ]

    def _login(self, webpage_url, display_id):
@ -175,7 +196,18 @@ class GDCVaultIE(InfoExtractor):

            xml_name = self._html_search_regex(
                r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>',
-                start_page, 'xml filename')
+                start_page, 'xml filename', default=None)
+            if not xml_name:
+                info = self._parse_html5_media_entries(url, start_page, video_id)[0]
+                info.update({
+                    'title': remove_start(self._search_regex(
+                        r'>Session Name:\s*<.*?>\s*<td>(.+?)</td>', start_page,
+                        'title', default=None) or self._og_search_title(
+                        start_page, default=None), 'GDC Vault - '),
+                    'id': video_id,
+                    'display_id': display_id,
+                })
+                return info
            embed_url = '%s/xml/%s' % (xml_root, xml_name)
            ie_key = 'DigitallySpeaking'

--- a/haruhi_dl/extractor/generic.py
+++ b/haruhi_dl/extractor/generic.py
@ -84,7 +84,6 @@ from .jwplatform import JWPlatformIE
 from .digiteka import DigitekaIE
 from .arkena import ArkenaIE
 from .instagram import InstagramIE
-from .liveleak import LiveLeakIE
 from .threeqsdn import ThreeQSDNIE
 from .theplatform import ThePlatformIE
 from .kaltura import KalturaIE
@ -136,6 +135,12 @@ from .pulsembed import PulsEmbedIE
 from .arcpublishing import ArcPublishingIE
 from .medialaan import MedialaanIE
 from .simplecast import SimplecastIE
+from .spreaker import SpreakerIE
+from .castos import (
+    CastosHostedIE,
+    CastosSSPIE,
+)
+from .vk import VKIE


 class GenericIE(InfoExtractor):
@ -1634,34 +1639,6 @@ class GenericIE(InfoExtractor):
                'upload_date': '20160409',
            },
        },
-        # LiveLeak embed
-        {
-            'url': 'http://www.wykop.pl/link/3088787/',
-            'md5': '7619da8c820e835bef21a1efa2a0fc71',
-            'info_dict': {
-                'id': '874_1459135191',
-                'ext': 'mp4',
-                'title': 'Man shows poor quality of new apartment building',
-                'description': 'The wall is like a sand pile.',
-                'uploader': 'Lake8737',
-            },
-            'add_ie': [LiveLeakIE.ie_key()],
-            'params': {
-                'force_generic_extractor': True,
-            },
-        },
-        # Another LiveLeak embed pattern (#13336)
-        {
-            'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
-            'info_dict': {
-                'id': '2eb_1496309988',
-                'ext': 'mp4',
-                'title': 'Thief robs place where everyone was armed',
-                'description': 'md5:694d73ee79e535953cf2488562288eee',
-                'uploader': 'brazilwtf',
-            },
-            'add_ie': [LiveLeakIE.ie_key()],
-        },
        # Duplicated embedded video URLs
        {
            'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
@ -2301,6 +2278,43 @@ class GenericIE(InfoExtractor):
            },
            'playlist_mincount': 52,
        },
+        {
+            # Spreaker embed
+            'url': 'https://socjalizm.fm/jak-bedzie-w-socjalizmie/praca/',
+            'info_dict': {
+                'id': '44098221',
+                'ext': 'mp3',
+                'title': 'Jak będzie w socjalizmie? Praca.',
+                'uploader': 'Socjalizm FM',
+                'description': 'md5:d2833c41296a996153353890c329e1af',
+                'upload_date': '20210329',
+                'uploader_id': '13705223',
+                'timestamp': 1617024666,
+            },
+        },
+        {
+            # Castos (hosted) player
+            'url': 'https://castos.com/enhanced-podcast-player/',
+            'info_dict': {
+                'id': '210448',
+                'ext': 'mp3',
+                'title': '4 Ways To Create A Video Podcast (And Why You Should Try It)',
+            },
+        },
+        {
+            # Castos Super Simple Podcasting (WordPress plugin, selfhosted)
+            'url': 'https://pzbn.pl/4-heated-terf-moment/',
+            'info_dict': {
+                'id': '38',
+                'ext': 'mp3',
+                'title': '#4: Heated TERF moment',
+            },
+        },
+        {
+            # Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed)
+            'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
+            'only_matching': True,
+        },
    ]

    def report_following_redirect(self, new_url):
@ -2701,7 +2715,6 @@ class GenericIE(InfoExtractor):
            SoundcloudEmbedIE,
            TuneInBaseIE,
            JWPlatformIE,
-            LiveLeakIE,
            DBTVIE,
            VideaIE,
            TwentyMinutenIE,
@ -2739,6 +2752,8 @@ class GenericIE(InfoExtractor):
            ArcPublishingIE,
            MedialaanIE,
            SimplecastIE,
+            SpreakerIE,
+            CastosHostedIE,
        ):
            try:
                ie_key = embie.ie_key()
@ -3201,6 +3216,15 @@ class GenericIE(InfoExtractor):
        if pulsembed_entries:
            return self.playlist_result(pulsembed_entries, video_id, video_title)

+        castos_ssp_entries = CastosSSPIE._extract_entries(webpage)
+        if castos_ssp_entries:
+            return self.playlist_result(castos_ssp_entries, video_id, video_title)
+
+        # Look for sibnet embedded player
+        sibnet_urls = VKIE._extract_sibnet_urls(webpage)
+        if sibnet_urls:
+            return self.playlist_from_matches(sibnet_urls, video_id, video_title)
+
        # Look for HTML5 media
        entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
        if entries:
@ -3264,6 +3288,9 @@ class GenericIE(InfoExtractor):
                        'url': src,
                        'ext': (mimetype2ext(src_type)
                                or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
+                        'http_headers': {
+                            'Referer': full_response.geturl(),
+                        },
                    })
            if formats:
                self._sort_formats(formats)
@ -3332,7 +3359,7 @@ class GenericIE(InfoExtractor):
            m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
            # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
            if m_video_type is not None:
-                found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
+                found = filter_video(re.findall(r'<meta.*?property="og:(?:video|audio)".*?content="(.*?)"', webpage))
        if not found:
            REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
            found = re.search(
--- a/haruhi_dl/extractor/ipla.py
+++ b/haruhi_dl/extractor/ipla.py
@ -8,6 +8,7 @@ from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    url_or_none,
+    ExtractorError,
 )


@ -79,7 +80,11 @@ class IplaIE(InfoExtractor):
            'Content-type': 'application/json'
        }

-        res = self._download_json('http://b2c-mobile.redefine.pl/rpc/navigation/', media_id, data=req, headers=headers)
+        res = self._download_json('https://b2c-mobile.redefine.pl/rpc/navigation/', media_id, data=req, headers=headers)
+        if not res.get('result'):
+            if res['error']['code'] == 13404:
+                raise ExtractorError('Video requires DRM protection', expected=True)
+            raise ExtractorError(f"Ipla said: {res['error']['message']} - {res['error']['data']['userMessage']}")
        return res['result']['mediaItem']

    def get_url(self, media_id, source_id):
@ -93,4 +98,6 @@ class IplaIE(InfoExtractor):
        }

        res = self._download_json('https://b2c-mobile.redefine.pl/rpc/drm/', media_id, data=req, headers=headers)
+        if not res.get('result'):
+            raise ExtractorError(f"Ipla said: {res['error']['message']} - {res['error']['data']['userMessage']}")
        return res['result']['url']
--- a/haruhi_dl/extractor/jamendo.py
+++ b/haruhi_dl/extractor/jamendo.py
@ -29,34 +29,51 @@ class JamendoIE(InfoExtractor):
            'id': '196219',
            'display_id': 'stories-from-emona-i',
            'ext': 'flac',
-            'title': 'Maya Filipič - Stories from Emona I',
-            'artist': 'Maya Filipič',
+            # 'title': 'Maya Filipič - Stories from Emona I',
+            'title': 'Stories from Emona I',
+            # 'artist': 'Maya Filipič',
            'track': 'Stories from Emona I',
            'duration': 210,
            'thumbnail': r're:^https?://.*\.jpg',
            'timestamp': 1217438117,
            'upload_date': '20080730',
+            'license': 'by-nc-nd',
+            'view_count': int,
+            'like_count': int,
+            'average_rating': int,
+            'tags': ['piano', 'peaceful', 'newage', 'strings', 'upbeat'],
        }
    }, {
        'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
        'only_matching': True,
    }]

+    def _call_api(self, resource, resource_id):
+        path = '/api/%ss' % resource
+        rand = compat_str(random.random())
+        return self._download_json(
+            'https://www.jamendo.com' + path, resource_id, query={
+                'id[]': resource_id,
+            }, headers={
+                'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
+            })[0]
+
    def _real_extract(self, url):
        track_id, display_id = self._VALID_URL_RE.match(url).groups()
-        webpage = self._download_webpage(
-            'https://www.jamendo.com/track/' + track_id, track_id)
-        models = self._parse_json(self._html_search_regex(
-            r"data-bundled-models='([^']+)",
-            webpage, 'bundled models'), track_id)
-        track = models['track']['models'][0]
+        # webpage = self._download_webpage(
+        #     'https://www.jamendo.com/track/' + track_id, track_id)
+        # models = self._parse_json(self._html_search_regex(
+        #     r"data-bundled-models='([^']+)",
+        #     webpage, 'bundled models'), track_id)
+        # track = models['track']['models'][0]
+        track = self._call_api('track', track_id)
        title = track_name = track['name']
-        get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
-        artist = get_model('artist')
-        artist_name = artist.get('name')
-        if artist_name:
-            title = '%s - %s' % (artist_name, title)
-        album = get_model('album')
+        # get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
+        # artist = get_model('artist')
+        # artist_name = artist.get('name')
+        # if artist_name:
+        #     title = '%s - %s' % (artist_name, title)
+        # album = get_model('album')

        formats = [{
            'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
@ -74,7 +91,7 @@ class JamendoIE(InfoExtractor):

        urls = []
        thumbnails = []
-        for _, covers in track.get('cover', {}).items():
+        for covers in (track.get('cover') or {}).values():
            for cover_id, cover_url in covers.items():
                if not cover_url or cover_url in urls:
                    continue
@ -88,13 +105,14 @@ class JamendoIE(InfoExtractor):
                })

        tags = []
-        for tag in track.get('tags', []):
+        for tag in (track.get('tags') or []):
            tag_name = tag.get('name')
            if not tag_name:
                continue
            tags.append(tag_name)

        stats = track.get('stats') or {}
+        license = track.get('licenseCC') or []

        return {
            'id': track_id,
@ -103,11 +121,11 @@ class JamendoIE(InfoExtractor):
            'title': title,
            'description': track.get('description'),
            'duration': int_or_none(track.get('duration')),
-            'artist': artist_name,
+            # 'artist': artist_name,
            'track': track_name,
-            'album': album.get('name'),
+            # 'album': album.get('name'),
            'formats': formats,
-            'license': '-'.join(track.get('licenseCC', [])) or None,
+            'license': '-'.join(license) if license else None,
            'timestamp': int_or_none(track.get('dateCreated')),
            'view_count': int_or_none(stats.get('listenedAll')),
            'like_count': int_or_none(stats.get('favorited')),
@ -116,9 +134,9 @@ class JamendoIE(InfoExtractor):
        }


-class JamendoAlbumIE(InfoExtractor):
+class JamendoAlbumIE(JamendoIE):
    _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
        'info_dict': {
            'id': '121486',
@ -151,17 +169,7 @@ class JamendoAlbumIE(InfoExtractor):
        'params': {
            'playlistend': 2
        }
-    }
-
-    def _call_api(self, resource, resource_id):
-        path = '/api/%ss' % resource
-        rand = compat_str(random.random())
-        return self._download_json(
-            'https://www.jamendo.com' + path, resource_id, query={
-                'id[]': resource_id,
-            }, headers={
-                'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
-            })[0]
+    }]

    def _real_extract(self, url):
        album_id = self._match_id(url)
@ -169,7 +177,7 @@ class JamendoAlbumIE(InfoExtractor):
        album_name = album.get('name')

        entries = []
-        for track in album.get('tracks', []):
+        for track in (album.get('tracks') or []):
            track_id = track.get('id')
            if not track_id:
                continue
--- a/haruhi_dl/extractor/kaltura.py
+++ b/haruhi_dl/extractor/kaltura.py
@ -120,7 +120,7 @@ class KalturaIE(InfoExtractor):
    def _extract_urls(webpage, url=None):
        # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
        finditer = (
-            re.finditer(
+            list(re.finditer(
                r"""(?xs)
                    kWidget\.(?:thumb)?[Ee]mbed\(
                    \{.*?
@ -128,8 +128,8 @@ class KalturaIE(InfoExtractor):
                        (?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
                        (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
                        (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
-                """, webpage)
-            or re.finditer(
+                """, webpage))
+            or list(re.finditer(
                r'''(?xs)
                    (?P<q1>["'])
                        (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
@ -142,16 +142,16 @@ class KalturaIE(InfoExtractor):
                        \[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
                    )
                    (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
-                ''', webpage)
-            or re.finditer(
+                ''', webpage))
+            or list(re.finditer(
                r'''(?xs)
-                    <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
+                    <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])\s*
                      (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
                      (?:(?!(?P=q1)).)*
                      [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
                      (?:(?!(?P=q1)).)*
                    (?P=q1)
-                ''', webpage)
+                ''', webpage))
        )
        urls = []
        for mobj in finditer:
--- a/haruhi_dl/extractor/line.py
+++ b/haruhi_dl/extractor/line.py
@ -4,7 +4,13 @@ from __future__ import unicode_literals
 import re

 from .common import InfoExtractor
-from ..utils import js_to_json
+from ..compat import compat_str
+from ..utils import (
+    ExtractorError,
+    int_or_none,
+    js_to_json,
+    str_or_none,
+)


 class LineTVIE(InfoExtractor):
@ -88,3 +94,137 @@ class LineTVIE(InfoExtractor):
                           for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
            'view_count': video_info.get('meta', {}).get('count'),
        }
+
+
+class LineLiveBaseIE(InfoExtractor):
+    _API_BASE_URL = 'https://live-api.line-apps.com/web/v4.0/channel/'
+
+    def _parse_broadcast_item(self, item):
+        broadcast_id = compat_str(item['id'])
+        title = item['title']
+        is_live = item.get('isBroadcastingNow')
+
+        thumbnails = []
+        for thumbnail_id, thumbnail_url in (item.get('thumbnailURLs') or {}).items():
+            if not thumbnail_url:
+                continue
+            thumbnails.append({
+                'id': thumbnail_id,
+                'url': thumbnail_url,
+            })
+
+        channel = item.get('channel') or {}
+        channel_id = str_or_none(channel.get('id'))
+
+        return {
+            'id': broadcast_id,
+            'title': self._live_title(title) if is_live else title,
+            'thumbnails': thumbnails,
+            'timestamp': int_or_none(item.get('createdAt')),
+            'channel': channel.get('name'),
+            'channel_id': channel_id,
+            'channel_url': 'https://live.line.me/channels/' + channel_id if channel_id else None,
+            'duration': int_or_none(item.get('archiveDuration')),
+            'view_count': int_or_none(item.get('viewerCount')),
+            'comment_count': int_or_none(item.get('chatCount')),
+            'is_live': is_live,
+        }
+
+
+class LineLiveIE(LineLiveBaseIE):
+    _VALID_URL = r'https?://live\.line\.me/channels/(?P<channel_id>\d+)/broadcast/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://live.line.me/channels/4867368/broadcast/16331360',
+        'md5': 'bc931f26bf1d4f971e3b0982b3fab4a3',
+        'info_dict': {
+            'id': '16331360',
+            'title': '振りコピ講座😙😙😙',
+            'ext': 'mp4',
+            'timestamp': 1617095132,
+            'upload_date': '20210330',
+            'channel': '白川ゆめか',
+            'channel_id': '4867368',
+            'view_count': int,
+            'comment_count': int,
+            'is_live': False,
+        }
+    }, {
+        # archiveStatus == 'DELETED'
+        'url': 'https://live.line.me/channels/4778159/broadcast/16378488',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        channel_id, broadcast_id = re.match(self._VALID_URL, url).groups()
+        broadcast = self._download_json(
+            self._API_BASE_URL + '%s/broadcast/%s' % (channel_id, broadcast_id),
+            broadcast_id)
+        item = broadcast['item']
+        info = self._parse_broadcast_item(item)
+        protocol = 'm3u8' if info['is_live'] else 'm3u8_native'
+        formats = []
+        for k, v in (broadcast.get(('live' if info['is_live'] else 'archived') + 'HLSURLs') or {}).items():
+            if not v:
+                continue
+            if k == 'abr':
+                formats.extend(self._extract_m3u8_formats(
+                    v, broadcast_id, 'mp4', protocol,
+                    m3u8_id='hls', fatal=False))
+                continue
+            f = {
+                'ext': 'mp4',
+                'format_id': 'hls-' + k,
+                'protocol': protocol,
+                'url': v,
+            }
+            if not k.isdigit():
+                f['vcodec'] = 'none'
+            formats.append(f)
+        if not formats:
+            archive_status = item.get('archiveStatus')
+            if archive_status != 'ARCHIVED':
+                raise ExtractorError('this video has been ' + archive_status.lower(), expected=True)
+        self._sort_formats(formats)
+        info['formats'] = formats
+        return info
+
+
+class LineLiveChannelIE(LineLiveBaseIE):
+    _VALID_URL = r'https?://live\.line\.me/channels/(?P<id>\d+)(?!/broadcast/\d+)(?:[/?&#]|$)'
+    _TEST = {
+        'url': 'https://live.line.me/channels/5893542',
+        'info_dict': {
+            'id': '5893542',
+            'title': 'いくらちゃん',
+            'description': 'md5:c3a4af801f43b2fac0b02294976580be',
+        },
+        'playlist_mincount': 29
+    }
+
+    def _archived_broadcasts_entries(self, archived_broadcasts, channel_id):
+        while True:
+            for row in (archived_broadcasts.get('rows') or []):
+                share_url = str_or_none(row.get('shareURL'))
+                if not share_url:
+                    continue
+                info = self._parse_broadcast_item(row)
+                info.update({
+                    '_type': 'url',
+                    'url': share_url,
+                    'ie_key': LineLiveIE.ie_key(),
+                })
+                yield info
+            if not archived_broadcasts.get('hasNextPage'):
+                return
+            archived_broadcasts = self._download_json(
+                self._API_BASE_URL + channel_id + '/archived_broadcasts',
+                channel_id, query={
+                    'lastId': info['id'],
+                })
+
+    def _real_extract(self, url):
+        channel_id = self._match_id(url)
+        channel = self._download_json(self._API_BASE_URL + channel_id, channel_id)
+        return self.playlist_result(
+            self._archived_broadcasts_entries(channel.get('archivedBroadcasts') or {}, channel_id),
+            channel_id, channel.get('title'), channel.get('information'))
--- a/haruhi_dl/extractor/liveleak.py
+++ b/haruhi_dl/extractor/liveleak.py
@ -1,191 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class LiveLeakIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
-    _TESTS = [{
-        'url': 'http://www.liveleak.com/view?i=757_1364311680',
-        'md5': '0813c2430bea7a46bf13acf3406992f4',
-        'info_dict': {
-            'id': '757_1364311680',
-            'ext': 'mp4',
-            'description': 'extremely bad day for this guy..!',
-            'uploader': 'ljfriel2',
-            'title': 'Most unlucky car accident',
-            'thumbnail': r're:^https?://.*\.jpg$'
-        }
-    }, {
-        'url': 'http://www.liveleak.com/view?i=f93_1390833151',
-        'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
-        'info_dict': {
-            'id': 'f93_1390833151',
-            'ext': 'mp4',
-            'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
-            'uploader': 'ARD_Stinkt',
-            'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
-            'thumbnail': r're:^https?://.*\.jpg$'
-        }
-    }, {
-        # Prochan embed
-        'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
-        'md5': '42c6d97d54f1db107958760788c5f48f',
-        'info_dict': {
-            'id': '4f7_1392687779',
-            'ext': 'mp4',
-            'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing...  I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.",
-            'uploader': 'CapObveus',
-            'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
-            'age_limit': 18,
-        },
-        'skip': 'Video is dead',
-    }, {
-        # Covers https://github.com/ytdl-org/youtube-dl/pull/5983
-        # Multiple resolutions
-        'url': 'http://www.liveleak.com/view?i=801_1409392012',
-        'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
-        'info_dict': {
-            'id': '801_1409392012',
-            'ext': 'mp4',
-            'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
-            'uploader': 'bony333',
-            'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
-            'thumbnail': r're:^https?://.*\.jpg$'
-        }
-    }, {
-        # Covers https://github.com/ytdl-org/youtube-dl/pull/10664#issuecomment-247439521
-        'url': 'http://m.liveleak.com/view?i=763_1473349649',
-        'add_ie': ['Youtube'],
-        'info_dict': {
-            'id': '763_1473349649',
-            'ext': 'mp4',
-            'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty',
-            'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.',
-            'uploader': 'Ziz',
-            'upload_date': '20160908',
-            'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw'
-        },
-        'params': {
-            'skip_download': True,
-        },
-    }, {
-        'url': 'https://www.liveleak.com/view?i=677_1439397581',
-        'info_dict': {
-            'id': '677_1439397581',
-            'title': 'Fuel Depot in China Explosion caught on video',
-        },
-        'playlist_count': 3,
-    }, {
-        'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
-        'only_matching': True,
-    }, {
-        # No original video
-        'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
-        'only_matching': True,
-    }]
-
-    @staticmethod
-    def _extract_urls(webpage, **kwargs):
-        return re.findall(
-            r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
-            webpage)
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
-        video_description = self._og_search_description(webpage)
-        video_uploader = self._html_search_regex(
-            r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
-        age_limit = int_or_none(self._search_regex(
-            r'you confirm that you are ([0-9]+) years and over.',
-            webpage, 'age limit', default=None))
-        video_thumbnail = self._og_search_thumbnail(webpage)
-
-        entries = self._parse_html5_media_entries(url, webpage, video_id)
-        if not entries:
-            # Maybe an embed?
-            embed_url = self._search_regex(
-                r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
-                webpage, 'embed URL')
-            return {
-                '_type': 'url_transparent',
-                'url': embed_url,
-                'id': video_id,
-                'title': video_title,
-                'description': video_description,
-                'uploader': video_uploader,
-                'age_limit': age_limit,
-            }
-
-        for idx, info_dict in enumerate(entries):
-            formats = []
-            for a_format in info_dict['formats']:
-                if not a_format.get('height'):
-                    a_format['height'] = int_or_none(self._search_regex(
-                        r'([0-9]+)p\.mp4', a_format['url'], 'height label',
-                        default=None))
-                formats.append(a_format)
-
-                # Removing '.*.mp4' gives the raw video, which is essentially
-                # the same video without the LiveLeak logo at the top (see
-                # https://github.com/ytdl-org/youtube-dl/pull/4768)
-                orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
-                if a_format['url'] != orig_url:
-                    format_id = a_format.get('format_id')
-                    format_id = 'original' + ('-' + format_id if format_id else '')
-                    if self._is_valid_url(orig_url, video_id, format_id):
-                        formats.append({
-                            'format_id': format_id,
-                            'url': orig_url,
-                            'preference': 1,
-                        })
-            self._sort_formats(formats)
-            info_dict['formats'] = formats
-
-            # Don't append entry ID for one-video pages to keep backward compatibility
-            if len(entries) > 1:
-                info_dict['id'] = '%s_%s' % (video_id, idx + 1)
-            else:
-                info_dict['id'] = video_id
-
-            info_dict.update({
-                'title': video_title,
-                'description': video_description,
-                'uploader': video_uploader,
-                'age_limit': age_limit,
-                'thumbnail': video_thumbnail,
-            })
-
-        return self.playlist_result(entries, video_id, video_title)
-
-
-class LiveLeakEmbedIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
-
-    # See generic.py for actual test cases
-    _TESTS = [{
-        'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        kind, video_id = re.match(self._VALID_URL, url).groups()
-
-        if kind == 'f':
-            webpage = self._download_webpage(url, video_id)
-            liveleak_url = self._search_regex(
-                r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
-                webpage, 'LiveLeak URL', group='url')
-        else:
-            liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
-
-        return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
--- a/haruhi_dl/extractor/magentamusik360.py
+++ b/haruhi_dl/extractor/magentamusik360.py
@ -0,0 +1,61 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MagentaMusik360IE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?magenta-musik-360\.de/([a-z0-9-]+-(?P<id>[0-9]+)|festivals/.+)'
+    _TESTS = [{
+        'url': 'https://www.magenta-musik-360.de/within-temptation-wacken-2019-1-9208205928595185932',
+        'md5': '65b6f060b40d90276ec6fb9b992c1216',
+        'info_dict': {
+            'id': '9208205928595185932',
+            'ext': 'm3u8',
+            'title': 'WITHIN TEMPTATION',
+            'description': 'Robert Westerholt und Sharon Janny den Adel gründeten die Symphonic Metal-Band. Privat sind die Niederländer ein Paar und haben zwei Kinder. Die Single Ice Queen brachte ihnen Platin und Gold und verhalf 2002 zum internationalen Durchbruch. Charakteristisch für die Band war Anfangs der hohe Gesang von Frontfrau Sharon. Stilistisch fing die Band im Gothic Metal an. Mit neuem Sound, schnellen Gitarrenriffs und Gitarrensoli, avancierte Within Temptation zur erfolgreichen Rockband. Auch dieses Jahr wird die Band ihre Fangemeinde wieder mitreißen.',
+        }
+    }, {
+        'url': 'https://www.magenta-musik-360.de/festivals/wacken-world-wide-2020-body-count-feat-ice-t',
+        'md5': '81010d27d7cab3f7da0b0f681b983b7e',
+        'info_dict': {
+            'id': '9208205928595231363',
+            'ext': 'm3u8',
+            'title': 'Body Count feat. Ice-T',
+            'description': 'Body Count feat. Ice-T konnten bereits im vergangenen Jahr auf dem „Holy Ground“ in Wacken überzeugen. 2020 gehen die Crossover-Metaller aus einem Club in Los Angeles auf Sendung und bringen mit ihrer Mischung aus Metal und Hip-Hop Abwechslung und ordentlich Alarm zum WWW. Bereits seit 1990 stehen die beiden Gründer Ice-T (Gesang) und Ernie C (Gitarre) auf der Bühne. Sieben Studioalben hat die Gruppe bis jetzt veröffentlicht, darunter das Debüt „Body Count“ (1992) mit dem kontroversen Track „Cop Killer“.',
+        }
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        # _match_id casts to string, but since "None" is not a valid video_id for magenta
+        # there is no risk for confusion
+        if video_id == "None":
+            webpage = self._download_webpage(url, video_id)
+            video_id = self._html_search_regex(r'data-asset-id="([^"]+)"', webpage, 'video_id')
+        json = self._download_json("https://wcps.t-online.de/cvss/magentamusic/vodplayer/v3/player/58935/%s/Main%%20Movie" % video_id, video_id)
+        xml_url = json['content']['feature']['representations'][0]['contentPackages'][0]['media']['href']
+        metadata = json['content']['feature'].get('metadata')
+        title = None
+        description = None
+        duration = None
+        thumbnails = []
+        if metadata:
+            title = metadata.get('title')
+            description = metadata.get('fullDescription')
+            duration = metadata.get('runtimeInSeconds')
+            for img_key in ('teaserImageWide', 'smallCoverImage'):
+                if img_key in metadata:
+                    thumbnails.append({'url': metadata[img_key].get('href')})
+
+        xml = self._download_xml(xml_url, video_id)
+        final_url = xml[0][0][0].attrib['src']
+
+        return {
+            'id': video_id,
+            'title': title,
+            'description': description,
+            'url': final_url,
+            'duration': duration,
+            'thumbnails': thumbnails
+        }
--- a/haruhi_dl/extractor/maoritv.py
+++ b/haruhi_dl/extractor/maoritv.py
@ -0,0 +1,31 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class MaoriTVIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?maoritelevision\.com/shows/(?:[^/]+/)+(?P<id>[^/?&#]+)'
+    _TEST = {
+        'url': 'https://www.maoritelevision.com/shows/korero-mai/S01E054/korero-mai-series-1-episode-54',
+        'md5': '5ade8ef53851b6a132c051b1cd858899',
+        'info_dict': {
+            'id': '4774724855001',
+            'ext': 'mp4',
+            'title': 'Kōrero Mai, Series 1 Episode 54',
+            'upload_date': '20160226',
+            'timestamp': 1456455018,
+            'description': 'md5:59bde32fd066d637a1a55794c56d8dcb',
+            'uploader_id': '1614493167001',
+        },
+    }
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1614493167001/HJlhIQhQf_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        display_id = self._match_id(url)
+        webpage = self._download_webpage(url, display_id)
+        brightcove_id = self._search_regex(
+            r'data-main-video-id=["\'](\d+)', webpage, 'brightcove id')
+        return self.url_result(
+            self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+            'BrightcoveNew', brightcove_id)
--- a/haruhi_dl/extractor/mastodon.py
+++ b/haruhi_dl/extractor/mastodon.py
@ -5,12 +5,25 @@ from .common import SelfhostedInfoExtractor

 from ..utils import (
    clean_html,
+    float_or_none,
+    int_or_none,
    str_or_none,
+    try_get,
+    unescapeHTML,
+    url_or_none,
    ExtractorError,
 )

+from urllib.parse import (
+    parse_qs,
+    urlencode,
+    urlparse,
+)
+import json
 import re

+from .peertube import PeerTubeSHIE
+

 class MastodonSHIE(SelfhostedInfoExtractor):
    """
@ -23,6 +36,7 @@ class MastodonSHIE(SelfhostedInfoExtractor):
    """
    IE_NAME = 'mastodon'
    _VALID_URL = r'mastodon:(?P<host>[^:]+):(?P<id>.+)'
+    _NETRC_MACHINE = 'mastodon'
    _SH_VALID_URL = r'''(?x)
        https?://
            (?P<host>[^/\s]+)/
@ -45,6 +59,7 @@ class MastodonSHIE(SelfhostedInfoExtractor):
        '<li><a href="https://docs.joinmastodon.org/">Documentation</a></li>',
        '<title>Pleroma</title>',
        '<noscript>To use Pleroma, please enable JavaScript.</noscript>',
+        '<noscript>To use Soapbox, please enable JavaScript.</noscript>',
        'Alternatively, try one of the <a href="https://apps.gab.com">native apps</a> for Gab Social for your platform.',
    )
    _SH_VALID_CONTENT_REGEXES = (
@ -96,39 +111,238 @@ class MastodonSHIE(SelfhostedInfoExtractor):
            'title': 're:.+ - He shoots, he scores and the crowd went wild.... #Animal #Sports',
            'ext': 'mp4',
        },
+    }, {
+        # Soapbox, audio file
+        'url': 'https://gleasonator.com/notice/9zvJY6h7jJzwopKAIi',
+        'info_dict': {
+            'id': '9zvJY6h7jJzwopKAIi',
+            'title': 're:.+ - #FEDIBLOCK',
+            'ext': 'oga',
+        },
+    }, {
+        # mastodon, card to youtube
+        'url': 'https://mstdn.social/@polamatysiak/106183574509332910',
+        'info_dict': {
+            'id': 'RWDU0BjcYp0',
+            'ext': 'mp4',
+            'title': 'polamatysiak - Moje wczorajsze wystąpienie w Sejmie, koniecznie zobaczcie do końca 🙂 \n#pracaposłanki\n\nhttps://youtu.be/RWDU0BjcYp0',
+            'description': 'md5:0c16fa11a698d5d1b171963fd6833297',
+            'uploader': 'Paulina Matysiak',
+            'uploader_id': 'UCLRAd9-Hw6kEI1aPBrSaF9A',
+            'upload_date': '20210505',
+        },
    }]

+    def _determine_instance_software(self, host, webpage=None):
+        if webpage:
+            for i, string in enumerate(self._SH_VALID_CONTENT_STRINGS):
+                if string in webpage:
+                    return ['mastodon', 'mastodon', 'pleroma', 'pleroma', 'pleroma', 'gab'][i]
+            if any(s in webpage for s in PeerTubeSHIE._SH_VALID_CONTENT_STRINGS):
+                return 'peertube'
+
+        nodeinfo_href = self._download_json(
+            f'https://{host}/.well-known/nodeinfo', host, 'Downloading instance nodeinfo link')
+
+        nodeinfo = self._download_json(
+            nodeinfo_href['links'][-1]['href'], host, 'Downloading instance nodeinfo')
+
+        return nodeinfo['software']['name']
+
+    def _login(self):
+        username, password = self._get_login_info()
+        if not username:
+            return False
+
+        # very basic regex, but the instance domain (the one where user has an account)
+        # must be separated from the user login
+        mobj = re.match(r'^(?P<username>[^@]+(?:@[^@]+)?)@(?P<instance>.+)$', username)
+        if not mobj:
+            self.report_warning(
+                'Invalid login format - must be in format [username or email]@[instance]')
+        username, instance = mobj.group('username', 'instance')
+
+        app_info = self._downloader.cache.load('mastodon-apps', instance)
+        if not app_info:
+            app_info = self._download_json(
+                f'https://{instance}/api/v1/apps', None, 'Creating an app', headers={
+                    'Content-Type': 'application/json',
+                }, data=bytes(json.dumps({
+                    'client_name': 'haruhi-dl',
+                    'redirect_uris': 'urn:ietf:wg:oauth:2.0:oob',
+                    'scopes': 'read',
+                    'website': 'https://haruhi.download',
+                }).encode('utf-8')))
+            self._downloader.cache.store('mastodon-apps', instance, app_info)
+
+        login_webpage = self._download_webpage(
+            f'https://{instance}/oauth/authorize', None, 'Downloading login page', query={
+                'client_id': app_info['client_id'],
+                'scope': 'read',
+                'redirect_uri': 'urn:ietf:wg:oauth:2.0:oob',
+                'response_type': 'code',
+            })
+        oauth_token = None
+        # this needs to be codebase-specific, as the HTML page differs between codebases
+        if 'xlink:href="#mastodon-svg-logo-full"' in login_webpage:
+            # mastodon
+            if '@' not in username:
+                self.report_warning(
+                    'Invalid login format - for Mastodon instances e-mail address is required')
+            login_form = self._hidden_inputs(login_webpage)
+            login_form['user[email]'] = username
+            login_form['user[password]'] = password
+            login_req, urlh = self._download_webpage_handle(
+                f'https://{instance}/auth/sign_in', None, 'Sending login details',
+                headers={
+                    'Content-Type': 'application/x-www-form-urlencoded',
+                }, data=bytes(urlencode(login_form).encode('utf-8')))
+            # cached apps may already be authorized
+            if '/oauth/authorize/native' in urlh.url:
+                oauth_token = parse_qs(urlparse(urlh.url).query)['code'][0]
+            else:
+                auth_form = self._hidden_inputs(
+                    self._search_regex(
+                        r'(?s)(<form\b[^>]+>.+?>Authorize</.+?</form>)',
+                        login_req, 'authorization form'))
+                _, urlh = self._download_webpage_handle(
+                    f'https://{instance}/oauth/authorize', None, 'Confirming authorization',
+                    headers={
+                        'Content-Type': 'application/x-www-form-urlencoded',
+                    }, data=bytes(urlencode(auth_form).encode('utf-8')))
+                oauth_token = parse_qs(urlparse(urlh.url).query)['code'][0]
+        elif 'content: "✔\\fe0e";' in login_webpage:
+            # pleroma
+            login_form = self._hidden_inputs(login_webpage)
+            login_form['authorization[scope][]'] = 'read'
+            login_form['authorization[name]'] = username
+            login_form['authorization[password]'] = password
+            login_req = self._download_webpage(
+                f'https://{instance}/oauth/authorize', None, 'Sending login details',
+                headers={
+                    'Content-Type': 'application/x-www-form-urlencoded',
+                }, data=bytes(urlencode(login_form).encode('utf-8')))
+            # TODO: 2FA, error handling
+            oauth_token = self._search_regex(
+                r'<h2>\s*Token code is\s*<br>\s*([a-zA-Z\d_-]+)\s*</h2>',
+                login_req, 'oauth token')
+        else:
+            raise ExtractorError('Unknown instance type')
+
+        actual_token = self._download_json(
+            f'https://{instance}/oauth/token', None, 'Downloading the actual token',
+            headers={
+                'Content-Type': 'application/x-www-form-urlencoded',
+            }, data=bytes(urlencode({
+                'client_id': app_info['client_id'],
+                'client_secret': app_info['client_secret'],
+                'redirect_uri': 'urn:ietf:wg:oauth:2.0:oob',
+                'scope': 'read',
+                'code': oauth_token,
+                'grant_type': 'authorization_code',
+            }).encode('utf-8')))
+        return {
+            'instance': instance,
+            'authorization': f"{actual_token['token_type']} {actual_token['access_token']}",
+        }
+
    def _selfhosted_extract(self, url, webpage=None):
        mobj = re.match(self._VALID_URL, url)
+        ap_censorship_circuvement = False
        if not mobj:
            mobj = re.match(self._SH_VALID_URL, url)
+        if not mobj and self._downloader.params.get('force_use_mastodon'):
+            mobj = re.match(PeerTubeSHIE._VALID_URL, url)
+            if mobj:
+                ap_censorship_circuvement = 'peertube'
+        if not mobj and self._downloader.params.get('force_use_mastodon'):
+            mobj = re.match(PeerTubeSHIE._SH_VALID_URL, url)
+            if mobj:
+                ap_censorship_circuvement = 'peertube'
+        if not mobj:
+            raise ExtractorError('Unrecognized url type')
        host, id = mobj.group('host', 'id')

-        if any(frag in url for frag in ('/objects/', '/activities/')):
-            if not webpage:
-                webpage = self._download_webpage(url, '%s:%s' % (host, id), expected_status=302)
-            real_url = self._og_search_property('url', webpage, default=None)
-            if real_url:
-                return self.url_result(real_url, ie='MastodonSH')
+        login_info = self._login()

-        metadata = self._download_json('https://%s/api/v1/statuses/%s' % (host, id), '%s:%s' % (host, id))
-
-        if not metadata['media_attachments']:
-            raise ExtractorError('No attached medias')
+        if login_info and host != login_info['instance']:
+            wf_url = url
+            if not url.startswith('http'):
+                software = ap_censorship_circuvement
+                if not software:
+                    software = self._determine_instance_software(host, webpage)
+                url_part = None
+                if software == 'pleroma':
+                    if '-' in id:   # UUID
+                        url_part = 'objects'
+                    else:
+                        url_part = 'notice'
+                elif software == 'peertube':
+                    url_part = 'videos/watch'
+                elif software in ('mastodon', 'gab'):
+                    # mastodon and gab social require usernames in the url,
+                    # but we can't determine the username without fetching the post,
+                    # but we can't fetch the post without determining the username...
+                    raise ExtractorError(f'Use the full url with --force-use-mastodon to download from {software}', expected=True)
+                else:
+                    raise ExtractorError(f'Unknown software: {software}')
+                wf_url = f'https://{host}/{url_part}/{id}'
+            search = self._download_json(
+                f"https://{login_info['instance']}/api/v2/search", '%s:%s' % (host, id),
+                query={
+                    'q': wf_url,
+                    'type': 'statuses',
+                    'resolve': True,
+                }, headers={
+                    'Authorization': login_info['authorization'],
+                })
+            assert len(search['statuses']) == 1
+            metadata = search['statuses'][0]
+        else:
+            if not login_info and any(frag in url for frag in ('/objects/', '/activities/')):
+                if not webpage:
+                    webpage = self._download_webpage(url, '%s:%s' % (host, id), expected_status=302)
+                real_url = self._og_search_property('url', webpage, default=None)
+                if real_url:
+                    return self.url_result(real_url, ie='MastodonSH')
+            metadata = self._download_json(
+                'https://%s/api/v1/statuses/%s' % (host, id), '%s:%s' % (host, id),
+                headers={
+                    'Authorization': login_info['authorization'],
+                } if login_info else {})

        entries = []
-        for media in metadata['media_attachments']:
-            if media['type'] == 'video':
+        for media in metadata['media_attachments'] or ():
+            if media['type'] in ('video', 'audio'):
                entries.append({
                    'id': media['id'],
                    'title': str_or_none(media['description']),
                    'url': str_or_none(media['url']),
-                    'thumbnail': str_or_none(media['preview_url']),
+                    'thumbnail': str_or_none(media['preview_url']) if media['type'] == 'video' else None,
+                    'vcodec': 'none' if media['type'] == 'audio' else None,
+                    'duration': float_or_none(try_get(media, lambda x: x['meta']['original']['duration'])),
+                    'width': int_or_none(try_get(media, lambda x: x['meta']['original']['width'])),
+                    'height': int_or_none(try_get(media, lambda x: x['meta']['original']['height'])),
+                    'tbr': int_or_none(try_get(media, lambda x: x['meta']['original']['bitrate'])),
                })
-        if len(entries) == 0:
-            raise ExtractorError('No audio/video attachments')

        title = '%s - %s' % (str_or_none(metadata['account'].get('display_name') or metadata['account']['acct']), clean_html(str_or_none(metadata['content'])))
+        if ap_censorship_circuvement == 'peertube':
+            title = unescapeHTML(
+                self._search_regex(
+                    r'^<p><a href="[^"]+">(.+?)</a></p>',
+                    metadata['content'], 'video title'))
+
+        if len(entries) == 0:
+            card = metadata.get('card')
+            if card:
+                return {
+                    '_type': 'url_transparent',
+                    'url': card['url'],
+                    'title': title,
+                    'thumbnail': url_or_none(card.get('image')),
+                }
+            raise ExtractorError('No audio/video attachments')

        info_dict = {
            "id": id,
--- a/haruhi_dl/extractor/medaltv.py
+++ b/haruhi_dl/extractor/medaltv.py
@ -15,33 +15,39 @@ from ..utils import (


 class MedalTVIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[^/?#&]+)'
    _TESTS = [{
-        'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr',
+        'url': 'https://medal.tv/clips/2mA60jWAGQCBH',
        'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
        'info_dict': {
-            'id': '34934644',
+            'id': '2mA60jWAGQCBH',
            'ext': 'mp4',
            'title': 'Quad Cold',
            'description': 'Medal,https://medal.tv/desktop/',
            'uploader': 'MowgliSB',
            'timestamp': 1603165266,
            'upload_date': '20201020',
-            'uploader_id': 10619174,
+            'uploader_id': '10619174',
        }
    }, {
-        'url': 'https://medal.tv/clips/36787208',
+        'url': 'https://medal.tv/clips/2um24TWdty0NA',
        'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
        'info_dict': {
-            'id': '36787208',
+            'id': '2um24TWdty0NA',
            'ext': 'mp4',
            'title': 'u tk me i tk u bigger',
            'description': 'Medal,https://medal.tv/desktop/',
            'uploader': 'Mimicc',
            'timestamp': 1605580939,
            'upload_date': '20201117',
-            'uploader_id': 5156321,
+            'uploader_id': '5156321',
        }
+    }, {
+        'url': 'https://medal.tv/clips/37rMeFpryCC-9',
+        'only_matching': True,
+    }, {
+        'url': 'https://medal.tv/clips/2WRj40tpY_EU9',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
--- a/haruhi_dl/extractor/misskey.py
+++ b/haruhi_dl/extractor/misskey.py
@ -0,0 +1,74 @@
+# coding: utf-8
+
+from .common import SelfhostedInfoExtractor
+from ..utils import (
+    mimetype2ext,
+    parse_iso8601,
+    ExtractorError,
+)
+
+import json
+
+
+class MisskeySHIE(SelfhostedInfoExtractor):
+    IE_NAME = 'misskey'
+    _VALID_URL = r'misskey:(?P<host>[^:]+):(?P<id>[\da-z]+)'
+    _SH_VALID_URL = r'https?://(?P<host>[^/]+)/notes/(?P<id>[\da-z]+)'
+    _SH_VALID_CONTENT_STRINGS = (
+        '<meta name="application-name" content="Misskey"',
+        '<meta name="misskey:',
+        '<!-- If you are reading this message... how about joining the development of Misskey? -->',
+    )
+
+    _TESTS = [{
+        'url': 'https://catgirl.life/notes/8lh52dlrii',
+        'info_dict': {
+            'id': '8lh52dlrii',
+            'ext': 'mp4',
+            'timestamp': 1604387877,
+            'upload_date': '20201103',
+            'title': '@graf@poa.st @Moon@shitposter.club \n*kickstarts your federation*',
+        },
+    }]
+
+    def _selfhosted_extract(self, url, webpage=None):
+        host, video_id = self._match_id_and_host(url)
+
+        post = self._download_json(f'https://{host}/api/notes/show', video_id,
+                                   data=bytes(json.dumps({
+                                       'noteId': video_id,
+                                   }).encode('utf-8')),
+                                   headers={
+                                       'Content-Type': 'application/json',
+                                   })
+
+        entries = []
+        for file in post['files']:
+            if not file['type'].startswith('video/') and not file['type'].startswith('audio/'):
+                continue
+            entries.append({
+                'id': file['id'],
+                'url': file['url'],
+                'ext': mimetype2ext(file.get('type')),
+                'title': file.get('name'),
+                'thumbnail': file.get('thumbnailUrl'),
+                'timestamp': parse_iso8601(file.get('createdAt')),
+                'filesize': file['size'] if file.get('size') != 0 else None,
+                'age_limit': 18 if file.get('isSensitive') else 0,
+            })
+
+        if len(entries) == 0:
+            raise ExtractorError('No media found in post')
+        elif len(entries) == 1:
+            info_dict = entries[0]
+        else:
+            info_dict = {
+                '_type': 'playlist',
+                'entries': entries,
+            }
+
+        info_dict.update({
+            'id': video_id,
+            'title': post.get('text') or '_',
+        })
+        return info_dict
--- a/haruhi_dl/extractor/mtv.py
+++ b/haruhi_dl/extractor/mtv.py
@ -255,7 +255,9 @@ class MTVServicesInfoExtractor(InfoExtractor):

    @staticmethod
    def _extract_child_with_type(parent, t):
-        return next(c for c in parent['children'] if c.get('type') == t)
+        for c in parent['children']:
+            if c.get('type') == t:
+                return c

    def _extract_mgid(self, webpage):
        try:
@ -286,7 +288,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
            data = self._parse_json(self._search_regex(
                r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
            main_container = self._extract_child_with_type(data, 'MainContainer')
-            video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
+            ab_testing = self._extract_child_with_type(main_container, 'ABTesting')
+            video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer')
            mgid = video_player['props']['media']['video']['config']['uri']

        return mgid
--- a/haruhi_dl/extractor/niconico.py
+++ b/haruhi_dl/extractor/niconico.py
@ -1,25 +1,28 @@
+
 # coding: utf-8
 from __future__ import unicode_literals

-import datetime
-import functools
+import re
 import json
-import math
+import datetime

 from .common import InfoExtractor
+from ..postprocessor.ffmpeg import FFmpegPostProcessor
 from ..compat import (
+    compat_str,
    compat_parse_qs,
    compat_urllib_parse_urlparse,
 )
 from ..utils import (
-    determine_ext,
    dict_get,
    ExtractorError,
-    float_or_none,
-    InAdvancePagedList,
    int_or_none,
+    float_or_none,
+    OnDemandPagedList,
    parse_duration,
    parse_iso8601,
+    PostProcessingError,
+    str_or_none,
    remove_start,
    try_get,
    unified_timestamp,
@ -34,7 +37,7 @@ class NiconicoIE(InfoExtractor):

    _TESTS = [{
        'url': 'http://www.nicovideo.jp/watch/sm22312215',
-        'md5': 'd1a75c0823e2f629128c43e1212760f9',
+        'md5': 'a5bad06f1347452102953f323c69da34s',
        'info_dict': {
            'id': 'sm22312215',
            'ext': 'mp4',
@ -162,6 +165,11 @@ class NiconicoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
    _NETRC_MACHINE = 'niconico'

+    _API_HEADERS = {
+        'X-Frontend-ID': '6',
+        'X-Frontend-Version': '0'
+    }
+
    def _real_initialize(self):
        self._login()

@ -188,40 +196,92 @@ class NiconicoIE(InfoExtractor):
            if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
                login_ok = False
        if not login_ok:
-            self._downloader.report_warning('unable to log in: bad username or password')
+            self.report_warning('unable to log in: bad username or password')
        return login_ok

-    def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
-        def yesno(boolean):
-            return 'yes' if boolean else 'no'
+    def _get_heartbeat_info(self, info_dict):

-        session_api_data = api_data['video']['dmcInfo']['session_api']
-        session_api_endpoint = session_api_data['urls'][0]
+        video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')

-        format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
+        api_data = (
+            info_dict.get('_api_data')
+            or self._parse_json(
+                self._html_search_regex(
+                    'data-api-data="([^"]+)"',
+                    self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
+                    'API data', default='{}'),
+                video_id))
+
+        session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
+        session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
+
+        def ping():
+            status = try_get(
+                self._download_json(
+                    'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id,
+                    query={'t': try_get(api_data, lambda x: x['media']['delivery']['trackingId'])},
+                    note='Acquiring permission for downloading video',
+                    headers=self._API_HEADERS),
+                lambda x: x['meta']['status'])
+            if status != 200:
+                self.report_warning('Failed to acquire permission for playing video. The video may not download.')
+
+        yesno = lambda x: 'yes' if x else 'no'
+
+        # m3u8 (encryption)
+        if try_get(api_data, lambda x: x['media']['delivery']['encryption']) is not None:
+            protocol = 'm3u8'
+            encryption = self._parse_json(session_api_data['token'], video_id)['hls_encryption']
+            session_api_http_parameters = {
+                'parameters': {
+                    'hls_parameters': {
+                        'encryption': {
+                            encryption: {
+                                'encrypted_key': try_get(api_data, lambda x: x['media']['delivery']['encryption']['encryptedKey']),
+                                'key_uri': try_get(api_data, lambda x: x['media']['delivery']['encryption']['keyUri'])
+                            }
+                        },
+                        'transfer_preset': '',
+                        'use_ssl': yesno(session_api_endpoint['isSsl']),
+                        'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
+                        'segment_duration': 6000,
+                    }
+                }
+            }
+        # http
+        else:
+            protocol = 'http'
+            session_api_http_parameters = {
+                'parameters': {
+                    'http_output_download_parameters': {
+                        'use_ssl': yesno(session_api_endpoint['isSsl']),
+                        'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
+                    }
+                }
+            }

        session_response = self._download_json(
            session_api_endpoint['url'], video_id,
            query={'_format': 'json'},
            headers={'Content-Type': 'application/json'},
-            note='Downloading JSON metadata for %s' % format_id,
+            note='Downloading JSON metadata for %s' % info_dict['format_id'],
            data=json.dumps({
                'session': {
                    'client_info': {
-                        'player_id': session_api_data['player_id'],
+                        'player_id': session_api_data.get('playerId'),
                    },
                    'content_auth': {
-                        'auth_type': session_api_data['auth_types'][session_api_data['protocols'][0]],
-                        'content_key_timeout': session_api_data['content_key_timeout'],
+                        'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
+                        'content_key_timeout': session_api_data.get('contentKeyTimeout'),
                        'service_id': 'nicovideo',
-                        'service_user_id': session_api_data['service_user_id']
+                        'service_user_id': session_api_data.get('serviceUserId')
                    },
-                    'content_id': session_api_data['content_id'],
+                    'content_id': session_api_data.get('contentId'),
                    'content_src_id_sets': [{
                        'content_src_ids': [{
                            'src_id_to_mux': {
-                                'audio_src_ids': [audio_quality['id']],
-                                'video_src_ids': [video_quality['id']],
+                                'audio_src_ids': [audio_src_id],
+                                'video_src_ids': [video_src_id],
                            }
                        }]
                    }],
@ -229,52 +289,81 @@ class NiconicoIE(InfoExtractor):
                    'content_uri': '',
                    'keep_method': {
                        'heartbeat': {
-                            'lifetime': session_api_data['heartbeat_lifetime']
+                            'lifetime': session_api_data.get('heartbeatLifetime')
                        }
                    },
-                    'priority': session_api_data['priority'],
+                    'priority': session_api_data.get('priority'),
                    'protocol': {
                        'name': 'http',
                        'parameters': {
-                            'http_parameters': {
-                                'parameters': {
-                                    'http_output_download_parameters': {
-                                        'use_ssl': yesno(session_api_endpoint['is_ssl']),
-                                        'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
-                                    }
-                                }
-                            }
+                            'http_parameters': session_api_http_parameters
                        }
                    },
-                    'recipe_id': session_api_data['recipe_id'],
+                    'recipe_id': session_api_data.get('recipeId'),
                    'session_operation_auth': {
                        'session_operation_auth_by_signature': {
-                            'signature': session_api_data['signature'],
-                            'token': session_api_data['token'],
+                            'signature': session_api_data.get('signature'),
+                            'token': session_api_data.get('token'),
                        }
                    },
                    'timing_constraint': 'unlimited'
                }
            }).encode())

-        resolution = video_quality.get('resolution', {})
+        info_dict['url'] = session_response['data']['session']['content_uri']
+        info_dict['protocol'] = protocol
+
+        # get heartbeat info
+        heartbeat_info_dict = {
+            'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
+            'data': json.dumps(session_response['data']),
+            # interval, convert milliseconds to seconds, then halve to make a buffer.
+            'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
+            'ping': ping
+        }
+
+        return info_dict, heartbeat_info_dict
+
+    def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
+        def parse_format_id(id_code):
+            mobj = re.match(r'''(?x)
+                    (?:archive_)?
+                    (?:(?P<codec>[^_]+)_)?
+                    (?:(?P<br>[\d]+)kbps_)?
+                    (?:(?P<res>[\d+]+)p_)?
+                ''', '%s_' % id_code)
+            return mobj.groupdict() if mobj else {}
+
+        protocol = 'niconico_dmc'
+        format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
+        vdict = parse_format_id(video_quality['id'])
+        adict = parse_format_id(audio_quality['id'])
+        resolution = try_get(video_quality, lambda x: x['metadata']['resolution'], dict) or {'height': vdict.get('res')}
+        vbr = try_get(video_quality, lambda x: x['metadata']['bitrate'], float)

        return {
-            'url': session_response['data']['session']['content_uri'],
+            'url': '%s:%s/%s/%s' % (protocol, video_id, video_quality['id'], audio_quality['id']),
            'format_id': format_id,
+            'format_note': 'DMC %s' % try_get(video_quality, lambda x: x['metadata']['label'], compat_str),
            'ext': 'mp4',  # Session API are used in HTML5, which always serves mp4
-            'abr': float_or_none(audio_quality.get('bitrate'), 1000),
-            'vbr': float_or_none(video_quality.get('bitrate'), 1000),
-            'height': resolution.get('height'),
-            'width': resolution.get('width'),
+            'vcodec': vdict.get('codec'),
+            'acodec': adict.get('codec'),
+            'vbr': float_or_none(vbr, 1000) or float_or_none(vdict.get('br')),
+            'abr': float_or_none(audio_quality.get('bitrate'), 1000) or float_or_none(adict.get('br')),
+            'height': int_or_none(resolution.get('height', vdict.get('res'))),
+            'width': int_or_none(resolution.get('width')),
+            'quality': -2 if 'low' in format_id else -1,  # Default quality value is -1
+            'protocol': protocol,
+            'http_headers': {
+                'Origin': 'https://www.nicovideo.jp',
+                'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
+            }
        }

    def _real_extract(self, url):
        video_id = self._match_id(url)

-        # Get video webpage. We are not actually interested in it for normal
-        # cases, but need the cookies in order to be able to download the
-        # info webpage
+        # Get video webpage for API data.
        webpage, handle = self._download_webpage_handle(
            'http://www.nicovideo.jp/watch/' + video_id, video_id)
        if video_id.startswith('so'):
@ -284,86 +373,136 @@ class NiconicoIE(InfoExtractor):
            'data-api-data="([^"]+)"', webpage,
            'API data', default='{}'), video_id)

-        def _format_id_from_url(video_url):
-            return 'economy' if video_real_url.endswith('low') else 'normal'
+        def get_video_info_web(items):
+            return dict_get(api_data['video'], items)

-        try:
-            video_real_url = api_data['video']['smileInfo']['url']
-        except KeyError:  # Flash videos
-            # Get flv info
-            flv_info_webpage = self._download_webpage(
-                'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
-                video_id, 'Downloading flv info')
+        # Get video info
+        video_info_xml = self._download_xml(
+            'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
+            video_id, note='Downloading video info page')

-            flv_info = compat_parse_qs(flv_info_webpage)
-            if 'url' not in flv_info:
-                if 'deleted' in flv_info:
-                    raise ExtractorError('The video has been deleted.',
-                                         expected=True)
-                elif 'closed' in flv_info:
-                    raise ExtractorError('Niconico videos now require logging in',
-                                         expected=True)
-                elif 'error' in flv_info:
-                    raise ExtractorError('%s reports error: %s' % (
-                        self.IE_NAME, flv_info['error'][0]), expected=True)
-                else:
-                    raise ExtractorError('Unable to find video URL')
+        def get_video_info_xml(items):
+            if not isinstance(items, list):
+                items = [items]
+            for item in items:
+                ret = xpath_text(video_info_xml, './/' + item)
+                if ret:
+                    return ret

-            video_info_xml = self._download_xml(
-                'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
-                video_id, note='Downloading video info page')
+        if get_video_info_xml('error'):
+            error_code = get_video_info_xml('code')

-            def get_video_info(items):
-                if not isinstance(items, list):
-                    items = [items]
-                for item in items:
-                    ret = xpath_text(video_info_xml, './/' + item)
-                    if ret:
-                        return ret
+            if error_code == 'DELETED':
+                raise ExtractorError('The video has been deleted.',
+                                     expected=True)
+            elif error_code == 'NOT_FOUND':
+                raise ExtractorError('The video is not found.',
+                                     expected=True)
+            elif error_code == 'COMMUNITY':
+                self.to_screen('%s: The video is community members only.' % video_id)
+            else:
+                raise ExtractorError('%s reports error: %s' % (self.IE_NAME, error_code))

-            video_real_url = flv_info['url'][0]
+        # Start extracting video formats
+        formats = []

-            extension = get_video_info('movie_type')
-            if not extension:
-                extension = determine_ext(video_real_url)
+        # Get HTML5 videos info
+        quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie'])
+        if not quality_info:
+            raise ExtractorError('The video can\'t be downloaded', expected=True)

-            formats = [{
-                'url': video_real_url,
-                'ext': extension,
-                'format_id': _format_id_from_url(video_real_url),
-            }]
-        else:
-            formats = []
+        for audio_quality in quality_info.get('audios') or {}:
+            for video_quality in quality_info.get('videos') or {}:
+                if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
+                    continue
+                formats.append(self._extract_format_for_quality(
+                    api_data, video_id, audio_quality, video_quality))

-            dmc_info = api_data['video'].get('dmcInfo')
-            if dmc_info:  # "New" HTML5 videos
-                quality_info = dmc_info['quality']
-                for audio_quality in quality_info['audios']:
-                    for video_quality in quality_info['videos']:
-                        if not audio_quality['available'] or not video_quality['available']:
-                            continue
-                        formats.append(self._extract_format_for_quality(
-                            api_data, video_id, audio_quality, video_quality))
+        # Get flv/swf info
+        timestamp = None
+        video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url'])
+        if video_real_url:
+            is_economy = video_real_url.endswith('low')

-                self._sort_formats(formats)
-            else:  # "Old" HTML5 videos
-                formats = [{
+            if is_economy:
+                self.report_warning('Site is currently in economy mode! You will only have access to lower quality streams')
+
+            # Invoking ffprobe to determine resolution
+            pp = FFmpegPostProcessor(self._downloader)
+            cookies = self._get_cookies('https://nicovideo.jp').output(header='', sep='; path=/; domain=nicovideo.jp;\n')
+
+            self.to_screen('%s: %s' % (video_id, 'Checking smile format with ffprobe'))
+
+            try:
+                metadata = pp.get_metadata_object(video_real_url, ['-cookies', cookies])
+            except PostProcessingError as err:
+                raise ExtractorError(err.msg, expected=True)
+
+            v_stream = a_stream = {}
+
+            # Some complex swf files doesn't have video stream (e.g. nm4809023)
+            for stream in metadata['streams']:
+                if stream['codec_type'] == 'video':
+                    v_stream = stream
+                elif stream['codec_type'] == 'audio':
+                    a_stream = stream
+
+            # Community restricted videos seem to have issues with the thumb API not returning anything at all
+            filesize = int(
+                (get_video_info_xml('size_high') if not is_economy else get_video_info_xml('size_low'))
+                or metadata['format']['size']
+            )
+            extension = (
+                get_video_info_xml('movie_type')
+                or 'mp4' if 'mp4' in metadata['format']['format_name'] else metadata['format']['format_name']
+            )
+
+            # 'creation_time' tag on video stream of re-encoded SMILEVIDEO mp4 files are '1970-01-01T00:00:00.000000Z'.
+            timestamp = (
+                parse_iso8601(get_video_info_web('first_retrieve'))
+                or unified_timestamp(get_video_info_web('postedDateTime'))
+            )
+            metadata_timestamp = (
+                parse_iso8601(try_get(v_stream, lambda x: x['tags']['creation_time']))
+                or timestamp if extension != 'mp4' else 0
+            )
+
+            # According to compconf, smile videos from pre-2017 are always better quality than their DMC counterparts
+            smile_threshold_timestamp = parse_iso8601('2016-12-08T00:00:00+09:00')
+
+            is_source = timestamp < smile_threshold_timestamp or metadata_timestamp > 0
+
+            # If movie file size is unstable, old server movie is not source movie.
+            if filesize > 1:
+                formats.append({
                    'url': video_real_url,
-                    'ext': 'mp4',
-                    'format_id': _format_id_from_url(video_real_url),
-                }]
+                    'format_id': 'smile' if not is_economy else 'smile_low',
+                    'format_note': 'SMILEVIDEO source' if not is_economy else 'SMILEVIDEO low quality',
+                    'ext': extension,
+                    'container': extension,
+                    'vcodec': v_stream.get('codec_name'),
+                    'acodec': a_stream.get('codec_name'),
+                    # Some complex swf files doesn't have total bit rate metadata (e.g. nm6049209)
+                    'tbr': int_or_none(metadata['format'].get('bit_rate'), scale=1000),
+                    'vbr': int_or_none(v_stream.get('bit_rate'), scale=1000),
+                    'abr': int_or_none(a_stream.get('bit_rate'), scale=1000),
+                    'height': int_or_none(v_stream.get('height')),
+                    'width': int_or_none(v_stream.get('width')),
+                    'source_preference': 5 if not is_economy else -2,
+                    'quality': 5 if is_source and not is_economy else None,
+                    'filesize': filesize
+                })

-            def get_video_info(items):
-                return dict_get(api_data['video'], items)
+        self._sort_formats(formats)

        # Start extracting information
-        title = get_video_info('title')
-        if not title:
-            title = self._og_search_title(webpage, default=None)
-        if not title:
-            title = self._html_search_regex(
+        title = (
+            get_video_info_xml('title')  # prefer to get the untranslated original title
+            or get_video_info_web(['originalTitle', 'title'])
+            or self._og_search_title(webpage, default=None)
+            or self._html_search_regex(
                r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
-                webpage, 'video title')
+                webpage, 'video title'))

        watch_api_data_string = self._html_search_regex(
            r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
@ -372,14 +511,15 @@ class NiconicoIE(InfoExtractor):
        video_detail = watch_api_data.get('videoDetail', {})

        thumbnail = (
-            get_video_info(['thumbnail_url', 'thumbnailURL'])
+            self._html_search_regex(r'<meta property="og:image" content="([^"]+)">', webpage, 'thumbnail data', default=None)
+            or dict_get(  # choose highest from 720p to 240p
+                get_video_info_web('thumbnail'),
+                ['ogp', 'player', 'largeUrl', 'middleUrl', 'url'])
            or self._html_search_meta('image', webpage, 'thumbnail', default=None)
            or video_detail.get('thumbnail'))

-        description = get_video_info('description')
+        description = get_video_info_web('description')

-        timestamp = (parse_iso8601(get_video_info('first_retrieve'))
-                     or unified_timestamp(get_video_info('postedDateTime')))
        if not timestamp:
            match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
            if match:
@ -388,19 +528,25 @@ class NiconicoIE(InfoExtractor):
            timestamp = parse_iso8601(
                video_detail['postedAt'].replace('/', '-'),
                delimiter=' ', timezone=datetime.timedelta(hours=9))
+        timestamp = timestamp or try_get(api_data, lambda x: parse_iso8601(x['video']['registeredAt']))

-        view_count = int_or_none(get_video_info(['view_counter', 'viewCount']))
+        view_count = int_or_none(get_video_info_web(['view_counter', 'viewCount']))
        if not view_count:
            match = self._html_search_regex(
                r'>Views: <strong[^>]*>([^<]+)</strong>',
                webpage, 'view count', default=None)
            if match:
                view_count = int_or_none(match.replace(',', ''))
-        view_count = view_count or video_detail.get('viewCount')
+        view_count = (
+            view_count
+            or video_detail.get('viewCount')
+            or try_get(api_data, lambda x: x['video']['count']['view']))
+
+        comment_count = (
+            int_or_none(get_video_info_web('comment_num'))
+            or video_detail.get('commentCount')
+            or try_get(api_data, lambda x: x['video']['count']['comment']))

-        comment_count = (int_or_none(get_video_info('comment_num'))
-                         or video_detail.get('commentCount')
-                         or try_get(api_data, lambda x: x['thread']['commentCount']))
        if not comment_count:
            match = self._html_search_regex(
                r'>Comments: <strong[^>]*>([^<]+)</strong>',
@ -409,22 +555,41 @@ class NiconicoIE(InfoExtractor):
                comment_count = int_or_none(match.replace(',', ''))

        duration = (parse_duration(
-            get_video_info('length')
+            get_video_info_web('length')
            or self._html_search_meta(
                'video:duration', webpage, 'video duration', default=None))
            or video_detail.get('length')
-            or get_video_info('duration'))
+            or get_video_info_web('duration'))

-        webpage_url = get_video_info('watch_url') or url
+        webpage_url = get_video_info_web('watch_url') or url
+
+        # for channel movie and community movie
+        channel_id = try_get(
+            api_data,
+            (lambda x: x['channel']['globalId'],
+             lambda x: x['community']['globalId']))
+        channel = try_get(
+            api_data,
+            (lambda x: x['channel']['name'],
+             lambda x: x['community']['name']))

        # Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
        # in the JSON, which will cause None to be returned instead of {}.
        owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
-        uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
-        uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')
+        uploader_id = str_or_none(
+            get_video_info_web(['ch_id', 'user_id'])
+            or owner.get('id')
+            or channel_id
+        )
+        uploader = (
+            get_video_info_web(['ch_name', 'user_nickname'])
+            or owner.get('nickname')
+            or channel
+        )

        return {
            'id': video_id,
+            '_api_data': api_data,
            'title': title,
            'formats': formats,
            'thumbnail': thumbnail,
@ -432,6 +597,8 @@ class NiconicoIE(InfoExtractor):
            'uploader': uploader,
            'timestamp': timestamp,
            'uploader_id': uploader_id,
+            'channel': channel,
+            'channel_id': channel_id,
            'view_count': view_count,
            'comment_count': comment_count,
            'duration': duration,
@ -440,7 +607,7 @@ class NiconicoIE(InfoExtractor):


 class NiconicoPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P<id>\d+)'

    _TESTS = [{
        'url': 'http://www.nicovideo.jp/mylist/27411728',
@ -456,60 +623,77 @@ class NiconicoPlaylistIE(InfoExtractor):
        'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
        'only_matching': True,
    }]
-    _PAGE_SIZE = 100

-    def _call_api(self, list_id, resource, query):
-        return self._download_json(
-            'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
-            'Downloading %s JSON metatdata' % resource, query=query,
-            headers={'X-Frontend-Id': 6})['data']['mylist']
-
-    def _parse_owner(self, item):
-        owner = item.get('owner') or {}
-        if owner:
-            return {
-                'uploader': owner.get('name'),
-                'uploader_id': owner.get('id'),
-            }
-        return {}
-
-    def _fetch_page(self, list_id, page):
-        page += 1
-        items = self._call_api(list_id, 'page %d' % page, {
-            'page': page,
-            'pageSize': self._PAGE_SIZE,
-        })['items']
-        for item in items:
-            video = item.get('video') or {}
-            video_id = video.get('id')
-            if not video_id:
-                continue
-            count = video.get('count') or {}
-            get_count = lambda x: int_or_none(count.get(x))
-            info = {
-                '_type': 'url',
-                'id': video_id,
-                'title': video.get('title'),
-                'url': 'https://www.nicovideo.jp/watch/' + video_id,
-                'description': video.get('shortDescription'),
-                'duration': int_or_none(video.get('duration')),
-                'view_count': get_count('view'),
-                'comment_count': get_count('comment'),
-                'ie_key': NiconicoIE.ie_key(),
-            }
-            info.update(self._parse_owner(video))
-            yield info
+    _API_HEADERS = {
+        'X-Frontend-ID': '6',
+        'X-Frontend-Version': '0'
+    }

    def _real_extract(self, url):
        list_id = self._match_id(url)
-        mylist = self._call_api(list_id, 'list', {
-            'pageSize': 1,
-        })
-        entries = InAdvancePagedList(
-            functools.partial(self._fetch_page, list_id),
-            math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE),
-            self._PAGE_SIZE)
-        result = self.playlist_result(
-            entries, list_id, mylist.get('name'), mylist.get('description'))
-        result.update(self._parse_owner(mylist))
-        return result
+
+        def get_page_data(pagenum, pagesize):
+            return self._download_json(
+                'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
+                query={'page': 1 + pagenum, 'pageSize': pagesize},
+                headers=self._API_HEADERS).get('data').get('mylist')
+
+        data = get_page_data(0, 1)
+        title = data.get('name')
+        description = data.get('description')
+        uploader = data.get('owner').get('name')
+        uploader_id = data.get('owner').get('id')
+
+        def pagefunc(pagenum):
+            data = get_page_data(pagenum, 25)
+            return ({
+                '_type': 'url',
+                'url': 'http://www.nicovideo.jp/watch/' + item.get('watchId'),
+            } for item in data.get('items'))
+
+        return {
+            '_type': 'playlist',
+            'id': list_id,
+            'title': title,
+            'description': description,
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'entries': OnDemandPagedList(pagefunc, 25),
+        }
+
+
+class NiconicoUserIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
+    _TEST = {
+        'url': 'https://www.nicovideo.jp/user/419948',
+        'info_dict': {
+            'id': '419948',
+        },
+        'playlist_mincount': 101,
+    }
+    _API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
+    _PAGE_SIZE = 100
+
+    _API_HEADERS = {
+        'X-Frontend-ID': '6',
+        'X-Frontend-Version': '0'
+    }
+
+    def _entries(self, list_id, ):
+        total_count = 1
+        count = page_num = 0
+        while count < total_count:
+            json_parsed = self._download_json(
+                self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
+                headers=self._API_HEADERS,
+                note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
+            if not page_num:
+                total_count = int_or_none(json_parsed['data'].get('totalCount'))
+            for entry in json_parsed["data"]["items"]:
+                count += 1
+                yield self.url_result('https://www.nicovideo.jp/watch/%s' % entry['id'])
+            page_num += 1
+
+    def _real_extract(self, url):
+        list_id = self._match_id(url)
+        return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())
--- a/haruhi_dl/extractor/ninateka.py
+++ b/haruhi_dl/extractor/ninateka.py
@ -1,100 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
-    determine_ext,
-    js_to_json,
-)
-
-
-class NinatekaIE(InfoExtractor):
-    IE_NAME = 'ninateka'
-    _VALID_URL = r'https?://ninateka\.pl/(?:film|audio)/(?P<id>[^/\?#]+)'
-    _TESTS = [{
-        'url': 'https://ninateka.pl/film/dziwne-przygody-kota-filemona-7',
-        'md5': '8b25c2998b48e1add7d93a5e27030786',
-        'info_dict': {
-            'id': 'dziwne-przygody-kota-filemona-7',
-            'ext': 'mp4',
-            'title': 'Dziwny świat kota Filemona | Poważne zmartwienie',
-            'description': 'Filemon ma kłopot z własnym wyglądem, czy uda mu się z nim uporać?',
-        },
-    }, {
-        'url': 'https://ninateka.pl/audio/telefon-drony-fisz-1-12',
-        'md5': 'fa03fc229d3b4d8eaa18976a7020909e',
-        'info_dict': {
-            'id': 'telefon-drony-fisz-1-12',
-            'ext': 'm4a',
-            'title': 'Telefon | Drony | Fisz Emade Tworzywo | 1/12',
-            'description': 'Utwór z długo wyczekiwanego albumu studyjnego Fisz Emade Tworzywo pt. „Drony”',
-        },
-    }]
-
-    def decode_url(self, encoded):
-        xor_val = ord('h') ^ ord(encoded[0])
-        return ''.join(chr(ord(c) ^ xor_val) for c in encoded)
-
-    def extract_formats(self, data, video_id, name):
-        info = self._parse_json(data, video_id, transform_source=js_to_json)
-        formats = []
-
-        for source_info in info['sources']:
-            url = self.decode_url(source_info['src'])
-            type_ = source_info.get('type')
-
-            if type_ == 'application/vnd.ms-sstr+xml' or url.endswith('/Manifest'):
-                formats.extend(self._extract_ism_formats(
-                    url, video_id, ism_id='mss-{}'.format(name), fatal=False))
-
-            elif type_ == 'application/x-mpegURL' or url.endswith('.m3u8'):
-                formats.extend(self._extract_m3u8_formats(
-                    url, video_id, ext='mp4', m3u8_id='hls-{}'.format(name), fatal=False))
-
-            elif type_ == 'application/dash+xml' or url.endswith('.mpd'):
-                formats.extend(self._extract_mpd_formats(
-                    url, video_id, mpd_id='dash-{}'.format(name), fatal=False))
-
-            elif url.endswith('.f4m'):
-                formats.extend(self._extract_f4m_formats(
-                    url, video_id, f4m_id='hds-{}'.format(name), fatal=False))
-
-            else:
-                formats.append({
-                    'format_id': 'direct-{}'.format(name),
-                    'url': url,
-                    'ext': determine_ext(url, 'mp4'),
-                })
-
-        return formats
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        main = self._search_regex(
-            r'(?m)(?:var|let|const)\s+playerOptionsWithMainSource\s*=\s*(\{.*?\})\s*;\s*?$',
-            webpage, 'main source')
-        formats = self.extract_formats(main, video_id, 'main')
-
-        audiodesc = self._search_regex(
-            r'(?m)(?:var|let|const)\s+playerOptionsWithAudioDescriptionSource\s*=\s*(\{.*?\})\s*;\s*?$',
-            webpage, 'audio description', default=None)
-        if audiodesc:
-            formats.extend(self.extract_formats(audiodesc, video_id, 'audiodescription'))
-
-        english_ver = self._search_regex(
-            r'(?m)(?:var|let|const)\s+playerOptionsWithEnglishVersion\s*=\s*(\{.*?\})\s*;\s*?$',
-            webpage, 'english version', default=None)
-        if english_ver:
-            formats.extend(self.extract_formats(english_ver, video_id, 'english'))
-
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': self._og_search_title(webpage),
-            'formats': formats,
-            'description': self._og_search_description(webpage),
-            'thumbnail': self._og_search_thumbnail(webpage),
-        }
--- a/haruhi_dl/extractor/nrk.py
+++ b/haruhi_dl/extractor/nrk.py
@ -58,7 +58,7 @@ class NRKBaseIE(InfoExtractor):

    def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
        return self._download_json(
-            urljoin('http://psapi.nrk.no/', path),
+            urljoin('https://psapi.nrk.no/', path),
            video_id, note or 'Downloading %s JSON' % item,
            fatal=fatal, query=query,
            headers={'Accept-Encoding': 'gzip, deflate, br'})
--- a/haruhi_dl/extractor/orf.py
+++ b/haruhi_dl/extractor/orf.py
@ -98,6 +98,9 @@ class ORFTVthekIE(InfoExtractor):
                elif ext == 'f4m':
                    formats.extend(self._extract_f4m_formats(
                        src, video_id, f4m_id=format_id, fatal=False))
+                elif ext == 'mpd':
+                    formats.extend(self._extract_mpd_formats(
+                        src, video_id, mpd_id=format_id, fatal=False))
                else:
                    formats.append({
                        'format_id': format_id,
@ -140,6 +143,25 @@ class ORFTVthekIE(InfoExtractor):
                })

            upload_date = unified_strdate(sd.get('created_date'))
+
+            thumbnails = []
+            preview = sd.get('preview_image_url')
+            if preview:
+                thumbnails.append({
+                    'id': 'preview',
+                    'url': preview,
+                    'preference': 0,
+                })
+            image = sd.get('image_full_url')
+            if not image and len(data_jsb) == 1:
+                image = self._og_search_thumbnail(webpage)
+            if image:
+                thumbnails.append({
+                    'id': 'full',
+                    'url': image,
+                    'preference': 1,
+                })
+
            entries.append({
                '_type': 'video',
                'id': video_id,
@ -149,7 +171,7 @@ class ORFTVthekIE(InfoExtractor):
                'description': sd.get('description'),
                'duration': int_or_none(sd.get('duration_in_seconds')),
                'upload_date': upload_date,
-                'thumbnail': sd.get('image_full_url'),
+                'thumbnails': thumbnails,
            })

        return {
@ -182,7 +204,7 @@ class ORFRadioIE(InfoExtractor):
            duration = end - start if end and start else None
            entries.append({
                'id': loop_stream_id.replace('.mp3', ''),
-                'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
+                'url': 'https://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
                'title': title,
                'description': clean_html(data.get('subtitle')),
                'duration': duration,
--- a/haruhi_dl/extractor/palcomp3.py
+++ b/haruhi_dl/extractor/palcomp3.py
@ -0,0 +1,148 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    int_or_none,
+    str_or_none,
+    try_get,
+)
+
+
+class PalcoMP3BaseIE(InfoExtractor):
+    _GQL_QUERY_TMPL = '''{
+  artist(slug: "%s") {
+    %s
+  }
+}'''
+    _ARTIST_FIELDS_TMPL = '''music(slug: "%%s") {
+      %s
+    }'''
+    _MUSIC_FIELDS = '''duration
+      hls
+      mp3File
+      musicID
+      plays
+      title'''
+
+    def _call_api(self, artist_slug, artist_fields):
+        return self._download_json(
+            'https://www.palcomp3.com.br/graphql/', artist_slug, query={
+                'query': self._GQL_QUERY_TMPL % (artist_slug, artist_fields),
+            })['data']
+
+    def _parse_music(self, music):
+        music_id = compat_str(music['musicID'])
+        title = music['title']
+
+        formats = []
+        hls_url = music.get('hls')
+        if hls_url:
+            formats.append({
+                'url': hls_url,
+                'protocol': 'm3u8_native',
+                'ext': 'mp4',
+            })
+        mp3_file = music.get('mp3File')
+        if mp3_file:
+            formats.append({
+                'url': mp3_file,
+            })
+
+        return {
+            'id': music_id,
+            'title': title,
+            'formats': formats,
+            'duration': int_or_none(music.get('duration')),
+            'view_count': int_or_none(music.get('plays')),
+        }
+
+    def _real_initialize(self):
+        self._ARTIST_FIELDS_TMPL = self._ARTIST_FIELDS_TMPL % self._MUSIC_FIELDS
+
+    def _real_extract(self, url):
+        artist_slug, music_slug = re.match(self._VALID_URL, url).groups()
+        artist_fields = self._ARTIST_FIELDS_TMPL % music_slug
+        music = self._call_api(artist_slug, artist_fields)['artist']['music']
+        return self._parse_music(music)
+
+
+class PalcoMP3IE(PalcoMP3BaseIE):
+    IE_NAME = 'PalcoMP3:song'
+    _VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)'
+    _TESTS = [{
+        'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/nossas-composicoes-cuida-bem-dela/',
+        'md5': '99fd6405b2d8fd589670f6db1ba3b358',
+        'info_dict': {
+            'id': '3162927',
+            'ext': 'mp3',
+            'title': 'Nossas Composições - CUIDA BEM DELA',
+            'duration': 210,
+            'view_count': int,
+        }
+    }]
+
+    @classmethod
+    def suitable(cls, url):
+        return False if PalcoMP3VideoIE.suitable(url) else super(PalcoMP3IE, cls).suitable(url)
+
+
+class PalcoMP3ArtistIE(PalcoMP3BaseIE):
+    IE_NAME = 'PalcoMP3:artist'
+    _VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<id>[^/?&#]+)'
+    _TESTS = [{
+        'url': 'https://www.palcomp3.com.br/condedoforro/',
+        'info_dict': {
+            'id': '358396',
+            'title': 'Conde do Forró',
+        },
+        'playlist_mincount': 188,
+    }]
+    _ARTIST_FIELDS_TMPL = '''artistID
+    musics {
+      nodes {
+        %s
+      }
+    }
+    name'''
+
+    @ classmethod
+    def suitable(cls, url):
+        return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
+
+    def _real_extract(self, url):
+        artist_slug = self._match_id(url)
+        artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
+
+        def entries():
+            for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []):
+                yield self._parse_music(music)
+
+        return self.playlist_result(
+            entries(), str_or_none(artist.get('artistID')), artist.get('name'))
+
+
+class PalcoMP3VideoIE(PalcoMP3BaseIE):
+    IE_NAME = 'PalcoMP3:video'
+    _VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)/?#clipe'
+    _TESTS = [{
+        'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/maiara-e-maraisa-voce-faz-falta-aqui-ao-vivo-em-vicosa-mg/#clipe',
+        'add_ie': ['Youtube'],
+        'info_dict': {
+            'id': '_pD1nR2qqPg',
+            'ext': 'mp4',
+            'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
+            'description': 'md5:7043342c09a224598e93546e98e49282',
+            'upload_date': '20161107',
+            'uploader_id': 'maiaramaraisaoficial',
+            'uploader': 'Maiara e Maraisa',
+        }
+    }]
+    _MUSIC_FIELDS = 'youtubeID'
+
+    def _parse_music(self, music):
+        youtube_id = music['youtubeID']
+        return self.url_result(youtube_id, 'Youtube', youtube_id)
--- a/haruhi_dl/extractor/peertube.py
+++ b/haruhi_dl/extractor/peertube.py
@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals

+import datetime
+from urllib.parse import urlencode
 import re

 from .common import SelfhostedInfoExtractor
@ -14,11 +16,12 @@ from ..utils import (
    unified_timestamp,
    url_or_none,
    urljoin,
+    ExtractorError,
 )


 class PeerTubeBaseExtractor(SelfhostedInfoExtractor):
-    _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
+    _UUID_RE = r'[\da-zA-Z]{22}|[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
    _API_BASE = 'https://%s/api/v1/%s/%s/%s'
    _SH_VALID_CONTENT_STRINGS = (
        '<title>PeerTube<',
@ -27,10 +30,55 @@ class PeerTubeBaseExtractor(SelfhostedInfoExtractor):
        '>We are sorry but it seems that PeerTube is not compatible with your web browser.<',
        '<meta property="og:platform" content="PeerTube"',
    )
+    _NETRC_MACHINE = 'peertube'
+    _LOGIN_INFO = None
+
+    def _login(self):
+        if self._LOGIN_INFO:
+            ts = datetime.datetime.now().timestamp()
+            if self._LOGIN_INFO['expires_on'] >= ts + 5:
+                return True
+
+        username, password = self._get_login_info()
+        if not username:
+            return None
+
+        # the instance domain (the one where user has an account) must be separated from the user e-mail
+        mobj = re.match(r'^(?P<username>[^@]+(?:@[^@]+)?)@(?P<instance>.+)$', username)
+        if not mobj:
+            self.report_warning(
+                'Invalid login format - must be in format [username or email]@[instance]')
+        username, instance = mobj.group('username', 'instance')
+
+        oauth_keys = self._downloader.cache.load('peertube-oauth', instance)
+        if not oauth_keys:
+            oauth_keys = self._download_json(f'https://{instance}/api/v1/oauth-clients/local', instance, 'Downloading OAuth keys')
+            self._downloader.cache.store('peertube-oauth', instance, oauth_keys)
+        client_id, client_secret = oauth_keys['client_id'], oauth_keys['client_secret']
+
+        auth_res = self._download_json(f'https://{instance}/api/v1/users/token', instance, 'Logging in', data=bytes(urlencode({
+            'client_id': client_id,
+            'client_secret': client_secret,
+            'response_type': 'code',
+            'grant_type': 'password',
+            'scope': 'user',
+            'username': username,
+            'password': password,
+        }).encode('utf-8')))
+
+        ts = datetime.datetime.now().timestamp()
+        auth_res['instance'] = instance
+        auth_res['expires_on'] = ts + auth_res['expires_in']
+        auth_res['refresh_token_expires_on'] = ts + auth_res['refresh_token_expires_in']
+        # not using self to set the details to expose it to all peertube extractors
+        PeerTubeBaseExtractor._LOGIN_INFO = auth_res

    def _call_api(self, host, resource, resource_id, path, note=None, errnote=None, fatal=True):
        return self._download_json(
            self._API_BASE % (host, resource, resource_id, path), resource_id,
+            headers={
+                'Authorization': f'Bearer {self._LOGIN_INFO["access_token"]}',
+            } if self._LOGIN_INFO and self._LOGIN_INFO['instance'] == host else {},
            note=note, errnote=errnote, fatal=fatal)

    def _parse_video(self, video, url):
@ -132,16 +180,16 @@ class PeerTubeBaseExtractor(SelfhostedInfoExtractor):

 class PeerTubeSHIE(PeerTubeBaseExtractor):
    _VALID_URL = r'peertube:(?P<host>[^:]+):(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
-    _SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)|api/v\d/videos)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
+    _SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)|api/v\d/videos|w)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)

    _TESTS = [{
        'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
-        'md5': '9bed8c0137913e17b86334e5885aacff',
+        'md5': '8563064d245a4be5705bddb22bb00a28',
        'info_dict': {
            'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
            'ext': 'mp4',
            'title': 'What is PeerTube?',
-            'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
+            'description': 'md5:96adbaf219b4d41747bfc5937df0b017',
            'thumbnail': r're:https?://.*\.(?:jpg|png)',
            'timestamp': 1538391166,
            'upload_date': '20181001',
@ -172,6 +220,27 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):
            'upload_date': '20200420',
            'uploader': 'Drew DeVault',
        }
+    }, {
+        # new url scheme since PeerTube 3.3
+        'url': 'https://peertube2.cpy.re/w/3fbif9S3WmtTP8gGsC5HBd',
+        'info_dict': {
+            'id': '122d093a-1ede-43bd-bd34-59d2931ffc5e',
+            'ext': 'mp4',
+            'title': 'E2E tests',
+            'uploader_id': '37855',
+            'timestamp': 1589276219,
+            'upload_date': '20200512',
+            'uploader': 'chocobozzz',
+        },
+    }, {
+        'url': 'https://peertube2.cpy.re/w/122d093a-1ede-43bd-bd34-59d2931ffc5e',
+        'only_matching': True,
+    }, {
+        'url': 'https://peertube2.cpy.re/api/v1/videos/3fbif9S3WmtTP8gGsC5HBd',
+        'only_matching': True,
+    }, {
+        'url': 'peertube:peertube2.cpy.re:3fbif9S3WmtTP8gGsC5HBd',
+        'only_matching': True,
    }, {
        'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
        'only_matching': True,
@ -221,6 +290,17 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):
    def _selfhosted_extract(self, url, webpage=None):
        host, video_id = self._match_id_and_host(url)

+        self._login()
+
+        if self._LOGIN_INFO and self._LOGIN_INFO['instance'] != host:
+            video_search = self._call_api(
+                self._LOGIN_INFO['instance'], 'search', 'videos', '?' + urlencode({
+                    'search': f'https://{host}/videos/watch/{video_id}',
+                }), note='Searching for remote video')
+            if len(video_search) == 0:
+                raise ExtractorError('Remote video not found')
+            host, video_id = self._LOGIN_INFO['instance'], video_search['data'][0]['uuid']
+
        video = self._call_api(
            host, 'videos', video_id, '', note='Downloading video JSON')

@ -230,7 +310,7 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):

        description = None
        if webpage:
-            description = self._og_search_description(webpage)
+            description = self._og_search_description(webpage, default=None)
        if not description:
            full_description = self._call_api(
                host, 'videos', video_id, 'description', note='Downloading description JSON',
@ -246,7 +326,7 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):

 class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
    _VALID_URL = r'peertube:playlist:(?P<host>[^:]+):(?P<id>.+)'
-    _SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)/playlist|api/v\d/video-playlists)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
+    _SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)/playlist|api/v\d/video-playlists|w/p)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)

    _TESTS = [{
        'url': 'https://video.internet-czas-dzialac.pl/videos/watch/playlist/3c81b894-acde-4539-91a2-1748b208c14c?playlistPosition=1',
@ -257,11 +337,16 @@ class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
            'uploader': 'Internet. Czas działać!',
        },
        'playlist_mincount': 14,
+    }, {
+        'url': 'https://peertube2.cpy.re/w/p/hrAdcvjkMMkHJ28upnoN21',
+        'only_matching': True,
    }]

    def _selfhosted_extract(self, url, webpage=None):
        host, display_id = self._match_id_and_host(url)

+        self._login()
+
        playlist_data = self._call_api(host, 'video-playlists', display_id, '', 'Downloading playlist metadata')
        entries = []
        i = 0
@ -291,23 +376,28 @@ class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):

 class PeerTubeChannelSHIE(PeerTubeBaseExtractor):
    _VALID_URL = r'peertube:channel:(?P<host>[^:]+):(?P<id>.+)'
-    _SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:api/v\d/)?video-channels/(?P<id>[^/?#]+)(?:/videos)?'
+    _SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:(?:api/v\d/)?video-channels|c)/(?P<id>[^/?#]+)(?:/videos)?'

    _TESTS = [{
        'url': 'https://video.internet-czas-dzialac.pl/video-channels/internet_czas_dzialac/videos',
        'info_dict': {
            'id': '2',
-            'title': 'internet_czas_dzialac',
-            'description': 'md5:4d2e215ea0d9ae4501a556ef6e9a5308',
+            'title': 'Internet. Czas działać!',
+            'description': 'md5:ac35d70f6625b04b189e0b4b76e62e17',
            'uploader_id': 3,
            'uploader': 'Internet. Czas działać!',
        },
        'playlist_mincount': 14,
+    }, {
+        'url': 'https://video.internet-czas-dzialac.pl/c/internet_czas_dzialac',
+        'only_matching': True,
    }]

    def _selfhosted_extract(self, url, webpage=None):
        host, display_id = self._match_id_and_host(url)

+        self._login()
+
        channel_data = self._call_api(host, 'video-channels', display_id, '', 'Downloading channel metadata')
        entries = []
        i = 0
@ -338,23 +428,28 @@ class PeerTubeChannelSHIE(PeerTubeBaseExtractor):

 class PeerTubeAccountSHIE(PeerTubeBaseExtractor):
    _VALID_URL = r'peertube:account:(?P<host>[^:]+):(?P<id>.+)'
-    _SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:api/v\d/)?accounts/(?P<id>[^/?#]+)(?:/video(?:s|-channels))?'
+    _SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:(?:api/v\d/)?accounts|a)/(?P<id>[^/?#]+)(?:/video(?:s|-channels))?'

    _TESTS = [{
        'url': 'https://video.internet-czas-dzialac.pl/accounts/icd/video-channels',
        'info_dict': {
            'id': '3',
-            'description': 'md5:ab3c9b934dd39030eea1c9fe76079870',
+            'description': 'md5:ac35d70f6625b04b189e0b4b76e62e17',
            'uploader': 'Internet. Czas działać!',
            'title': 'Internet. Czas działać!',
            'uploader_id': 3,
        },
        'playlist_mincount': 14,
+    }, {
+        'url': 'https://video.internet-czas-dzialac.pl/a/icd',
+        'only_matching': True,
    }]

    def _selfhosted_extract(self, url, webpage=None):
        host, display_id = self._match_id_and_host(url)

+        self._login()
+
        account_data = self._call_api(host, 'accounts', display_id, '', 'Downloading account metadata')
        entries = []
        i = 0
--- a/haruhi_dl/extractor/phoenix.py
+++ b/haruhi_dl/extractor/phoenix.py
@ -9,8 +9,9 @@ from ..compat import compat_str
 from ..utils import (
    int_or_none,
    merge_dicts,
+    try_get,
    unified_timestamp,
-    xpath_text,
+    urljoin,
 )


@ -27,10 +28,11 @@ class PhoenixIE(ZDFBaseIE):
            'title': 'Wohin führt der Protest in der Pandemie?',
            'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
            'duration': 1691,
-            'timestamp': 1613906100,
+            'timestamp': 1613902500,
            'upload_date': '20210221',
            'uploader': 'Phoenix',
-            'channel': 'corona nachgehakt',
+            'series': 'corona nachgehakt',
+            'episode': 'Wohin führt der Protest in der Pandemie?',
        },
    }, {
        # Youtube embed
@ -79,50 +81,53 @@ class PhoenixIE(ZDFBaseIE):

        video_id = compat_str(video.get('basename') or video.get('content'))

-        details = self._download_xml(
+        details = self._download_json(
            'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php',
-            video_id, 'Downloading details XML', query={
+            video_id, 'Downloading details JSON', query={
                'ak': 'web',
                'ptmd': 'true',
                'id': video_id,
                'profile': 'player2',
            })

-        title = title or xpath_text(
-            details, './/information/title', 'title', fatal=True)
-        content_id = xpath_text(
-            details, './/video/details/basename', 'content id', fatal=True)
+        title = title or details['title']
+        content_id = details['tracking']['nielsen']['content']['assetid']

        info = self._extract_ptmd(
            'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id,
            content_id, None, url)

-        timestamp = unified_timestamp(xpath_text(details, './/details/airtime'))
+        duration = int_or_none(try_get(
+            details, lambda x: x['tracking']['nielsen']['content']['length']))
+        timestamp = unified_timestamp(details.get('editorialDate'))
+        series = try_get(
+            details, lambda x: x['tracking']['nielsen']['content']['program'],
+            compat_str)
+        episode = title if details.get('contentType') == 'episode' else None

        thumbnails = []
-        for node in details.findall('.//teaserimages/teaserimage'):
-            thumbnail_url = node.text
+        teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {}
+        for thumbnail_key, thumbnail_url in teaser_images.items():
+            thumbnail_url = urljoin(url, thumbnail_url)
            if not thumbnail_url:
                continue
            thumbnail = {
                'url': thumbnail_url,
            }
-            thumbnail_key = node.get('key')
-            if thumbnail_key:
-                m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
-                if m:
-                    thumbnail['width'] = int(m.group(1))
-                    thumbnail['height'] = int(m.group(2))
+            m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
+            if m:
+                thumbnail['width'] = int(m.group(1))
+                thumbnail['height'] = int(m.group(2))
            thumbnails.append(thumbnail)

        return merge_dicts(info, {
            'id': content_id,
            'title': title,
-            'description': xpath_text(details, './/information/detail'),
-            'duration': int_or_none(xpath_text(details, './/details/lengthSec')),
+            'description': details.get('leadParagraph'),
+            'duration': duration,
            'thumbnails': thumbnails,
            'timestamp': timestamp,
-            'uploader': xpath_text(details, './/details/channel'),
-            'uploader_id': xpath_text(details, './/details/originChannelId'),
-            'channel': xpath_text(details, './/details/originChannelTitle'),
+            'uploader': details.get('tvService'),
+            'series': series,
+            'episode': episode,
        })
--- a/haruhi_dl/extractor/playstuff.py
+++ b/haruhi_dl/extractor/playstuff.py
@ -0,0 +1,65 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    smuggle_url,
+    try_get,
+)
+
+
+class PlayStuffIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?play\.stuff\.co\.nz/details/(?P<id>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://play.stuff.co.nz/details/608778ac1de1c4001a3fa09a',
+        'md5': 'c82d3669e5247c64bc382577843e5bd0',
+        'info_dict': {
+            'id': '6250584958001',
+            'ext': 'mp4',
+            'title': 'Episode 1: Rotorua/Mt Maunganui/Tauranga',
+            'description': 'md5:c154bafb9f0dd02d01fd4100fb1c1913',
+            'uploader_id': '6005208634001',
+            'timestamp': 1619491027,
+            'upload_date': '20210427',
+        },
+        'add_ie': ['BrightcoveNew'],
+    }, {
+        # geo restricted, bypassable
+        'url': 'https://play.stuff.co.nz/details/_6155660351001',
+        'only_matching': True,
+    }]
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        state = self._parse_json(
+            self._search_regex(
+                r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'state'),
+            video_id)
+
+        account_id = try_get(
+            state, lambda x: x['configurations']['accountId'],
+            compat_str) or '6005208634001'
+        player_id = try_get(
+            state, lambda x: x['configurations']['playerId'],
+            compat_str) or 'default'
+
+        entries = []
+        for item_id, video in state['items'].items():
+            if not isinstance(video, dict):
+                continue
+            asset_id = try_get(
+                video, lambda x: x['content']['attributes']['assetId'],
+                compat_str)
+            if not asset_id:
+                continue
+            entries.append(self.url_result(
+                smuggle_url(
+                    self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, asset_id),
+                    {'geo_countries': ['NZ']}),
+                'BrightcoveNew', video_id))
+
+        return self.playlist_result(entries, video_id)
--- a/haruhi_dl/extractor/pluralsight.py
+++ b/haruhi_dl/extractor/pluralsight.py
@ -393,7 +393,7 @@ query viewClip {
                # To somewhat reduce the probability of these consequences
                # we will sleep random amount of time before each call to ViewClip.
                self._sleep(
-                    random.randint(2, 5), display_id,
+                    random.randint(5, 10), display_id,
                    '%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')

                if not viewclip:
--- a/haruhi_dl/extractor/polskieradio.py
+++ b/haruhi_dl/extractor/polskieradio.py
@ -91,6 +91,14 @@ class PolskieRadioIE(PolskieRadioBaseExtractor):
                'upload_date': '20201116',
            },
        }]
+    }, {
+        # PR4 audition - other frontend
+        'url': 'https://www.polskieradio.pl/10/6071/Artykul/2610977,Poglos-29-pazdziernika-godz-2301',
+        'info_dict': {
+            'id': '2610977',
+            'ext': 'mp3',
+            'title': 'Pogłos 29 października godz. 23:01',
+        },
    }, {
        'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis',
        'only_matching': True,
@ -113,24 +121,34 @@ class PolskieRadioIE(PolskieRadioBaseExtractor):

        content = self._search_regex(
            r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>',
-            webpage, 'content')
+            webpage, 'content', default=None)

        timestamp = unified_timestamp(self._html_search_regex(
            r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
-            webpage, 'timestamp', fatal=False))
+            webpage, 'timestamp', default=None))

-        thumbnail_url = self._og_search_thumbnail(webpage)
+        thumbnail_url = self._og_search_thumbnail(webpage, default=None)

        title = self._og_search_title(webpage).strip()

+        description = strip_or_none(self._og_search_description(webpage, default=None))
+        
+        if not content:
+            return {
+                'id': playlist_id,
+                'url': 'https:' + self._search_regex(r"source:\s*'(//static\.prsa\.pl/[^']+)'", webpage, 'audition record url'),
+                'title': title,
+                'description': description,
+                'timestamp': timestamp,
+                'thumbnail': thumbnail_url,
+            }
+
        entries = self._extract_webpage_player_entries(content, playlist_id, {
            'title': title,
            'timestamp': timestamp,
            'thumbnail': thumbnail_url,
        })

-        description = strip_or_none(self._og_search_description(webpage))
-
        return self.playlist_result(entries, playlist_id, title, description)


--- a/haruhi_dl/extractor/pornhub.py
+++ b/haruhi_dl/extractor/pornhub.py
@ -31,6 +31,7 @@ from ..utils import (
 class PornHubBaseIE(InfoExtractor):
    _REQUIRES_PLAYWRIGHT = True
    _NETRC_MACHINE = 'pornhub'
+    _PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubthbh7ap3u\.onion)'

    def _download_webpage_handle(self, *args, **kwargs):
        def dl(*args, **kwargs):
@ -125,11 +126,13 @@ class PornHubIE(PornHubBaseIE):
    _VALID_URL = r'''(?x)
                    https?://
                        (?:
-                            (?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
+                            (?:[^/]+\.)?
+                            %s
+                            /(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
                            (?:www\.)?thumbzilla\.com/video/
                        )
                        (?P<id>[\da-z]+)
-                    '''
+                    ''' % PornHubBaseIE._PORNHUB_HOST_RE
    _TESTS = [{
        'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
        'info_dict': {
@ -238,6 +241,13 @@ class PornHubIE(PornHubBaseIE):
    }, {
        'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
        'only_matching': True,
+    }, {
+        # geo restricted
+        'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156',
+        'only_matching': True,
+    }, {
+        'url': 'http://pornhubthbh7ap3u.onion/view_video.php?viewkey=ph5a9813bfa7156',
+        'only_matching': True,
    }]

    @staticmethod
@ -277,6 +287,11 @@ class PornHubIE(PornHubBaseIE):
                'PornHub said: %s' % error_msg,
                expected=True, video_id=video_id)

+        if any(re.search(p, webpage) for p in (
+                r'class=["\']geoBlocked["\']',
+                r'>\s*This content is unavailable in your country')):
+            self.raise_geo_restricted()
+
        # video_title from flashvars contains whitespace instead of non-ASCII (see
        # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
        # on that anymore.
@ -400,17 +415,24 @@ class PornHubIE(PornHubBaseIE):
        formats = []

        def add_format(format_url, height=None):
-            tbr = None
-            mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
-            if mobj:
-                if not height:
-                    height = int(mobj.group('height'))
-                tbr = int(mobj.group('tbr'))
+            ext = determine_ext(format_url)
+            if ext == 'mpd':
+                formats.extend(self._extract_mpd_formats(
+                    format_url, video_id, mpd_id='dash', fatal=False))
+                return
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                    m3u8_id='hls', fatal=False))
+                return
+            if not height:
+                height = int_or_none(self._search_regex(
+                    r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height',
+                    default=None))
            formats.append({
                'url': format_url,
                'format_id': '%dp' % height if height else None,
                'height': height,
-                'tbr': tbr,
            })

        for video_url, height in video_urls:
@ -419,16 +441,6 @@ class PornHubIE(PornHubBaseIE):
                    r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
                if upload_date:
                    upload_date = upload_date.replace('/', '')
-            ext = determine_ext(video_url)
-            if ext == 'mpd':
-                formats.extend(self._extract_mpd_formats(
-                    video_url, video_id, mpd_id='dash', fatal=False))
-                continue
-            elif ext == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(
-                    video_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls', fatal=False))
-                continue
            if '/video/get_media' in video_url:
                medias = self._download_json(video_url, video_id, fatal=False)
                if isinstance(medias, list):
@ -442,7 +454,8 @@ class PornHubIE(PornHubBaseIE):
                        add_format(video_url, height)
                continue
            add_format(video_url)
-        self._sort_formats(formats)
+        self._sort_formats(
+            formats, field_preference=('height', 'width', 'fps', 'format_id'))

        video_uploader = self._html_search_regex(
            r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
@ -511,7 +524,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):


 class PornHubUserIE(PornHubPlaylistBaseIE):
-    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
+    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE
    _TESTS = [{
        'url': 'https://www.pornhub.com/model/zoe_ph',
        'playlist_mincount': 118,
@ -540,6 +553,9 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
        # Same as before, multi page
        'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
        'only_matching': True,
+    }, {
+        'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
@ -615,7 +631,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):


 class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
-    _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
+    _VALID_URL = r'https?://(?:[^/]+\.)?%s/(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE
    _TESTS = [{
        'url': 'https://www.pornhub.com/model/zoe_ph/videos',
        'only_matching': True,
@ -720,6 +736,9 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
    }, {
        'url': 'https://de.pornhub.com/playlist/4667351',
        'only_matching': True,
+    }, {
+        'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph/videos',
+        'only_matching': True,
    }]

    @classmethod
@ -730,7 +749,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):


 class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
-    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
+    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE
    _TESTS = [{
        'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
        'info_dict': {
@ -740,4 +759,7 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
    }, {
        'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
        'only_matching': True,
+    }, {
+        'url': 'http://pornhubthbh7ap3u.onion/pornstar/jenny-blighe/videos/upload',
+        'only_matching': True,
    }]
--- a/haruhi_dl/extractor/radiokapital.py
+++ b/haruhi_dl/extractor/radiokapital.py
@ -0,0 +1,100 @@
+# coding: utf-8
+
+from .common import InfoExtractor
+from ..utils import (
+    unescapeHTML,
+)
+
+from urllib.parse import urlencode
+
+
+class RadioKapitalBaseIE(InfoExtractor):
+    # offtopic: Kapitał did a great job with their frontend, which just works quickly after opening
+    # this just can't be compared to any commercial radio or news services.
+    # also it's the first wordpress page I don't hate.
+    def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}):
+        return self._download_json(
+            f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urlencode(qs)}',
+            video_id, note=note)
+
+    def _parse_episode(self, ep):
+        data = ep['data']
+        release = '%s%s%s' % (data['published'][6:11], data['published'][3:6], data['published'][:3])
+        return {
+            '_type': 'url_transparent',
+            'url': data['mixcloud_url'],
+            'ie_key': 'Mixcloud',
+            'id': str(data['id']),
+            'title': unescapeHTML(data['title']),
+            'description': data.get('content'),
+            'tags': [tag['name'] for tag in data['tags']],
+            'release_date': release,
+            'series': data['show']['title'],
+        }
+
+
+class RadioKapitalIE(RadioKapitalBaseIE):
+    IE_NAME = 'radiokapital'
+    _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/[a-z\d-]+/(?P<id>[a-z\d-]+)'
+
+    _TESTS = [{
+        'url': 'https://radiokapital.pl/shows/tutaj-sa-smoki/5-its-okay-to-be-immaterial',
+        'info_dict': {
+            'id': 'radiokapital_radio-kapitał-tutaj-są-smoki-5-its-okay-to-be-immaterial-2021-05-20',
+            'ext': 'm4a',
+            'title': '#5: It’s okay to be immaterial',
+            'description': 'md5:2499da5fbfb0e88333b7d37ec8e9e4c4',
+            'uploader': 'Radio Kapitał',
+            'uploader_id': 'radiokapital',
+            'timestamp': 1621640164,
+            'upload_date': '20210521',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        episode = self._call_api('episodes/%s' % video_id, video_id)
+        return self._parse_episode(episode)
+
+
+class RadioKapitalShowIE(RadioKapitalBaseIE):
+    IE_NAME = 'radiokapital:show'
+    _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/(?P<id>[a-z\d-]+)/?(?:$|[?#])'
+
+    _TESTS = [{
+        'url': 'https://radiokapital.pl/shows/wesz',
+        'info_dict': {
+            'id': '100',
+            'title': 'WĘSZ',
+            'description': 'md5:9046105f7eeb03b7f01240fbed245df6',
+        },
+        'playlist_mincount': 17,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        page_no = 1
+        page_count = 1
+        entries = []
+        while page_no <= page_count:
+            episode_list = self._call_api(
+                'episodes', video_id,
+                f'Downloading episode list page #{page_no}', qs={
+                    'show': video_id,
+                    'page': page_no,
+                })
+            page_no += 1
+            page_count = episode_list['max']
+            for ep in episode_list['items']:
+                entries.append(self._parse_episode(ep))
+
+        show = episode_list['items'][0]['data']['show']
+        return {
+            '_type': 'playlist',
+            'entries': entries,
+            'id': str(show['id']),
+            'title': show['title'],
+            'description': show['content'],
+        }
--- a/haruhi_dl/extractor/redbulltv.py
+++ b/haruhi_dl/extractor/redbulltv.py
@ -133,8 +133,10 @@ class RedBullEmbedIE(RedBullTVIE):
        rrn_id = self._match_id(url)
        asset_id = self._download_json(
            'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql',
-            rrn_id, headers={'API-KEY': 'e90a1ff11335423998b100c929ecc866'},
-            query={
+            rrn_id, headers={
+                'Accept': 'application/json',
+                'API-KEY': 'e90a1ff11335423998b100c929ecc866',
+            }, query={
                'query': '''{
  resource(id: "%s", enforceGeoBlocking: false) {
    %s
--- a/haruhi_dl/extractor/screencastomatic.py
+++ b/haruhi_dl/extractor/screencastomatic.py
@ -2,12 +2,18 @@
 from __future__ import unicode_literals

 from .common import InfoExtractor
-from ..utils import js_to_json
+from ..utils import (
+    get_element_by_class,
+    int_or_none,
+    remove_start,
+    strip_or_none,
+    unified_strdate,
+)


 class ScreencastOMaticIE(InfoExtractor):
-    _VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
-    _TEST = {
+    _VALID_URL = r'https?://screencast-o-matic\.com/(?:(?:watch|player)/|embed\?.*?\bsc=)(?P<id>[0-9a-zA-Z]+)'
+    _TESTS = [{
        'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
        'md5': '483583cb80d92588f15ccbedd90f0c18',
        'info_dict': {
@ -16,22 +22,30 @@ class ScreencastOMaticIE(InfoExtractor):
            'title': 'Welcome to 3-4 Philosophy @ DECV!',
            'thumbnail': r're:^https?://.*\.jpg$',
            'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
-            'duration': 369.163,
+            'duration': 369,
+            'upload_date': '20141216',
        }
-    }
+    }, {
+        'url': 'http://screencast-o-matic.com/player/c2lD3BeOPl',
+        'only_matching': True,
+    }, {
+        'url': 'http://screencast-o-matic.com/embed?ff=true&sc=cbV2r4Q5TL&fromPH=true&a=1',
+        'only_matching': True,
+    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-
-        jwplayer_data = self._parse_json(
-            self._search_regex(
-                r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);", webpage, 'setup code'),
-            video_id, transform_source=js_to_json)
-
-        info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
-        info_dict.update({
-            'title': self._og_search_title(webpage),
-            'description': self._og_search_description(webpage),
+        webpage = self._download_webpage(
+            'https://screencast-o-matic.com/player/' + video_id, video_id)
+        info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+        info.update({
+            'id': video_id,
+            'title': get_element_by_class('overlayTitle', webpage),
+            'description': strip_or_none(get_element_by_class('overlayDescription', webpage)) or None,
+            'duration': int_or_none(self._search_regex(
+                r'player\.duration\s*=\s*function\(\)\s*{\s*return\s+(\d+);\s*};',
+                webpage, 'duration', default=None)),
+            'upload_date': unified_strdate(remove_start(
+                get_element_by_class('overlayPublished', webpage), 'Published: ')),
        })
-        return info_dict
+        return info
--- a/haruhi_dl/extractor/sejmpl.py
+++ b/haruhi_dl/extractor/sejmpl.py
@ -0,0 +1,156 @@
+# coding: utf-8
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    int_or_none,
+    js_to_json,
+    try_get,
+)
+
+import datetime
+import re
+from urllib.parse import parse_qs
+
+
+class SejmPlIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?sejm\.gov\.pl/Sejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp(?:\?(?:[^&\s]+(?:&[^&\s]+)*)?)?(?:#|unid=)(?P<id>[\dA-F]+)'
+    IE_NAME = 'sejm.gov.pl'
+
+    _TESTS = [{
+        # multiple cameras, PJM translator
+        'url': 'https://www.sejm.gov.pl/Sejm9.nsf/transmisje_arch.xsp#9587D63364A355A1C1258562004DCF21',
+        'info_dict': {
+            'id': '9587D63364A355A1C1258562004DCF21',
+            'title': '11. posiedzenie Sejmu IX kadencji',
+        },
+        'playlist_count': 10,
+    }, {
+        # live stream
+        'url': 'https://www.sejm.gov.pl/Sejm9.nsf/transmisje.xsp?unid=DF7D229E316BBC5AC12586A8003E90AC#',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        term, video_id = mobj.group('term', 'id')
+        frame = self._download_webpage(
+            'https://sejm-embed.redcdn.pl/Sejm%s.nsf/VideoFrame.xsp/%s' % (term, video_id),
+            video_id, headers={
+                'Referer': 'https://www.sejm.gov.pl/Sejm%s.nsf/transmisje_arch.xsp' % (term),
+            })
+        # despite it says "transmisje_arch", it works for live streams too!
+        data = self._download_json(
+            'https://www.sejm.gov.pl/Sejm%s.nsf/transmisje_arch.xsp/json/%s' % (term, video_id),
+            video_id, headers={
+                'Referer': 'https://www.sejm.gov.pl/Sejm%s.nsf/transmisje_arch.xsp' % (term),
+            })
+        params = data['params']
+
+        def iso_date_to_wtf_atende_wants(date):
+            date = datetime.datetime.fromisoformat(date)
+            # atende uses timestamp but since 2001 instead of 1970
+            date = date.replace(year=date.year - 31)
+            # also it's in milliseconds
+            return int(date.timestamp() * 1000)
+
+        start_time = iso_date_to_wtf_atende_wants(params['start'])
+        if 'transmisje_arch.xsp' in url:
+            stop_time = iso_date_to_wtf_atende_wants(params['stop'])
+        else:
+            stop_time = None
+
+        duration = (stop_time - start_time) if stop_time else None
+
+        entries = []
+
+        def add_entry(file):
+            if not file:
+                return
+            file = 'https:%s?startTime=%d' % (file, start_time)
+            # live streams don't use stopTime
+            if stop_time:
+                file += '&stopTime=%d' % stop_time
+            stream_id = self._search_regex(r'/o2/sejm/([^/]+)/[^./]+\.livx', file, 'stream id')
+            entries.append({
+                '_type': 'url_transparent',
+                'url': file,
+                'ie_key': 'SejmPlVideo',
+                'id': stream_id,
+                'title': stream_id,
+                'duration': duration,
+            })
+
+        cameras = self._parse_json(
+            self._search_regex(r'(?s)var cameras = (\[.+?\]);', frame, 'camera list'),
+            video_id, js_to_json)
+        for camera in cameras:
+            add_entry(camera['file']['flv'])
+
+        if params.get('mig'):
+            add_entry(self._search_regex(r"var sliUrl = '(.+?)';", frame, 'migacz url', fatal=False))
+
+        return {
+            '_type': 'multi_video',
+            'entries': entries,
+            'id': video_id,
+            'title': data['title'],
+            'description': clean_html(data['desc']),
+            'duration': duration,
+            'is_live': 'transmisje.xsp' in url,
+        }
+
+
+# actually, this is common between Sejm and Senat, the 2 houses of PL parliament
+class SejmPlVideoIE(InfoExtractor):
+    _VALID_URL = r'https?://[^.]+\.dcs\.redcdn\.pl/[^/]+/o2/(?P<house>sejm|senat)/(?P<id>[^/]+)/(?P<filename>[^./]+)\.livx\?(?P<qs>.+)'
+    IE_NAME = 'parlament-pl:video'
+
+    _TESTS = [{
+        'url': 'https://r.dcs.redcdn.pl/livedash/o2/senat/ENC02/channel.livx?indexMode=true&startTime=638272860000&stopTime=638292544000',
+        'info_dict': {
+            'id': 'ENC02-638272860000-638292544000',
+            'ext': 'mp4',
+            'title': 'ENC02',
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        house, camera, filename, qs = mobj.group('house', 'id', 'filename', 'qs')
+        qs = parse_qs(qs)
+        start_time = int(qs["startTime"][0])
+        stop_time = int_or_none(try_get(qs, lambda x: x["stopTime"][0]))
+
+        file = f'https://r.dcs.redcdn.pl/%s/o2/{house}/{camera}/{filename}.livx?startTime={start_time}'
+        if stop_time:
+            file += f'&stopTime={stop_time}'
+        file_index = file + '&indexMode=true'
+
+        # sejm videos don't have an id, just a camera (pov) id and time range
+        video_id = '%s-%d-%d' % (camera, start_time, stop_time)
+
+        formats = [{
+            'url': file % 'nvr',
+            'ext': 'flv',
+            'format_id': 'direct-0',
+            'preference': -1,   # VERY slow to download (~200 KiB/s, compared to ~10-15 MiB/s by DASH/HLS)
+        }]
+        formats.extend(self._extract_mpd_formats(file_index % 'livedash', video_id, mpd_id='dash'))
+        formats.extend(self._extract_m3u8_formats(
+            file_index.replace('?', '/playlist.m3u8?') % 'livehls', video_id, m3u8_id='hls', ext='mp4'))
+        formats.extend(self._extract_ism_formats(
+            file_index.replace('?', '/manifest?') % 'livess', video_id, ism_id='ss'))
+
+        self._sort_formats(formats)
+
+        duration = (stop_time - start_time) // 1000
+
+        return {
+            'id': video_id,
+            'title': camera,
+            'formats': formats,
+            'duration': duration,
+            # if there's no stop, it's live
+            'is_live': stop_time is None,
+        }
--- a/haruhi_dl/extractor/senatpl.py
+++ b/haruhi_dl/extractor/senatpl.py
@ -0,0 +1,83 @@
+# coding: utf-8
+
+from .common import InfoExtractor
+
+import datetime
+
+
+class SenatPlIE(InfoExtractor):
+    _VALID_URL = r'https://av8\.senat\.pl/(?P<id>\d+[a-zA-Z\d]+)'
+    IE_NAME = 'senat.gov.pl'
+
+    _TESTS = [{
+        'url': 'https://av8.senat.pl/10Sen221',
+        'info_dict': {
+            'id': '10Sen221',
+            'title': '22. posiedzenie Senatu RP X kadencji - 24.03.2021 r. - cz. 1',
+        },
+        'playlist_count': 2,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        res_type = self._search_regex(
+            r'<script [^>]+ src="/senat-console/static/js/generated/(vod|live)\.item\.js">',
+            webpage, 'resource type')
+        vod = self._download_json(
+            'https://av8.senat.pl/senat-console/side-menu/transmissions/%s/%s' % (video_id, res_type),
+            video_id, 'Downloading transmission metadata')
+        conf = self._download_json(
+            'https://av8.senat.pl/senat-console/side-menu/transmissions/%s/%s/player-configuration' % (video_id, res_type),
+            video_id, 'Downloading player configuration')
+
+        def unix_milliseconds_to_wtf_atende_wants(date):
+            date = datetime.datetime.fromtimestamp(date / 1000)
+            # atende uses timestamp but since 2001 instead of 1970
+            date = date.replace(year=date.year - 31)
+            # also it's in milliseconds
+            return int(date.timestamp() * 1000)
+
+        start_time = unix_milliseconds_to_wtf_atende_wants(vod['since'])
+        if res_type == 'vod':
+            stop_time = unix_milliseconds_to_wtf_atende_wants(vod['till'])
+        else:
+            stop_time = None
+
+        if stop_time:
+            duration = (stop_time - start_time) // 1000
+        else:
+            duration = None
+
+        entries = []
+
+        def add_entry(player):
+            trans_url = f"https:{player['playlist']['flv']}?startTime={start_time}"
+            if stop_time:
+                trans_url += f"&stopTime={stop_time}"
+            stream_id = self._search_regex(
+                r'/o2/senat/([^/]+)/[^./]+\.livx', trans_url, 'stream id')
+            entries.append({
+                '_type': 'url_transparent',
+                'url': trans_url,
+                'ie_key': 'SejmPlVideo',
+                'id': stream_id,
+                'title': stream_id,
+                'duration': duration,
+                'is_live': res_type == 'live',
+            })
+
+        add_entry(conf['player'])
+
+        # PJM translator
+        if conf.get('sliPlayer'):
+            add_entry(conf['sliPlayer'])
+
+        return {
+            '_type': 'multi_video',
+            'entries': entries,
+            'id': video_id,
+            'title': vod['title'],
+            'duration': duration,
+            'is_live': res_type == 'live',
+        }
--- a/haruhi_dl/extractor/shahid.py
+++ b/haruhi_dl/extractor/shahid.py
@ -21,6 +21,7 @@ from ..utils import (
 class ShahidBaseIE(AWSIE):
    _AWS_PROXY_HOST = 'api2.shahid.net'
    _AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh'
+    _VALID_URL_BASE = r'https?://shahid\.mbc\.net/[a-z]{2}/'

    def _handle_error(self, e):
        fail_data = self._parse_json(
@ -49,7 +50,7 @@ class ShahidBaseIE(AWSIE):

 class ShahidIE(ShahidBaseIE):
    _NETRC_MACHINE = 'shahid'
-    _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
+    _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924',
        'info_dict': {
@ -73,6 +74,9 @@ class ShahidIE(ShahidBaseIE):
        # shahid plus subscriber only
        'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511',
        'only_matching': True
+    }, {
+        'url': 'https://shahid.mbc.net/en/shows/Ramez-Fi-Al-Shallal-season-1-episode-1/episode-359319',
+        'only_matching': True
    }]

    def _real_initialize(self):
@ -168,7 +172,7 @@ class ShahidIE(ShahidBaseIE):


 class ShahidShowIE(ShahidBaseIE):
-    _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)'
+    _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187',
        'info_dict': {
--- a/haruhi_dl/extractor/shared.py
+++ b/haruhi_dl/extractor/shared.py
@ -86,10 +86,10 @@ class SharedIE(SharedBaseIE):

 class VivoIE(SharedBaseIE):
    IE_DESC = 'vivo.sx'
-    _VALID_URL = r'https?://vivo\.sx/(?P<id>[\da-z]{10})'
+    _VALID_URL = r'https?://vivo\.s[xt]/(?P<id>[\da-z]{10})'
    _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed'

-    _TEST = {
+    _TESTS = [{
        'url': 'http://vivo.sx/d7ddda0e78',
        'md5': '15b3af41be0b4fe01f4df075c2678b2c',
        'info_dict': {
@ -98,7 +98,10 @@ class VivoIE(SharedBaseIE):
            'title': 'Chicken',
            'filesize': 515659,
        },
-    }
+    }, {
+        'url': 'http://vivo.st/d7ddda0e78',
+        'only_matching': True,
+    }]

    def _extract_title(self, webpage):
        title = self._html_search_regex(
--- a/haruhi_dl/extractor/soundcloud.py
+++ b/haruhi_dl/extractor/soundcloud.py
@ -30,6 +30,10 @@ from ..utils import (
    url_or_none,
    urlhandle_detect_ext,
 )
+try:
+    from ..extractor_artifacts.soundcloud import prerelease_client_id
+except ImportError:
+    prerelease_client_id = None


 class SoundcloudEmbedIE(InfoExtractor):
@ -289,6 +293,10 @@ class SoundcloudIE(InfoExtractor):
                    return
        raise ExtractorError('Unable to extract client id')

+    def _generate_prerelease_file(self):
+        self._update_client_id()
+        return 'prerelease_client_id = {!r}\n'.format(self._CLIENT_ID)
+
    def _download_json(self, *args, **kwargs):
        non_fatal = kwargs.get('fatal') is False
        if non_fatal:
@ -310,7 +318,7 @@ class SoundcloudIE(InfoExtractor):
                raise

    def _real_initialize(self):
-        self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk'
+        self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or prerelease_client_id or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk'

    @classmethod
    def _resolv_url(cls, url):
--- a/haruhi_dl/extractor/spreaker.py
+++ b/haruhi_dl/extractor/spreaker.py
@ -2,6 +2,7 @@
 from __future__ import unicode_literals

 import itertools
+import re

 from .common import InfoExtractor
 from ..compat import compat_str
@ -64,11 +65,16 @@ def _extract_episode(data, episode_id=None):
 class SpreakerIE(InfoExtractor):
    _VALID_URL = r'''(?x)
                    https?://
-                        api\.spreaker\.com/
+                        (?:(?:api|widget|www)\.)?spreaker\.com/
                        (?:
-                            (?:download/)?episode|
-                            v2/episodes
-                        )/
+                            (?:
+                                (?:download/)?episode|
+                                v2/episodes
+                            )/
+                            |(?:
+                                player\?(?:.+?&)?episode_id=
+                            )
+                        )
                        (?P<id>\d+)
                    '''
    _TESTS = [{
@ -95,8 +101,20 @@ class SpreakerIE(InfoExtractor):
    }, {
        'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
        'only_matching': True,
+    }, {
+        'url': 'https://www.spreaker.com/episode/44098221',
+        'only_matching': True,
+    }, {
+        'url': 'https://widget.spreaker.com/player?episode_id=44098221',
+        'only_matching': True,
    }]

+    @staticmethod
+    def _extract_urls(webpage, **kw):
+        return ['https://api.spreaker.com/episode/%s' % mobj.group('id') for mobj in re.finditer(
+            r'<iframe\b[^>]+src=(["\'])(?:(?:https?)?:)?//widget\.spreaker\.com/player\?(?:.+?&)?episode_id=(?P<id>\d+)',
+            webpage)]
+
    def _real_extract(self, url):
        episode_id = self._match_id(url)
        data = self._download_json(
--- a/haruhi_dl/extractor/spryciarze.py
+++ b/haruhi_dl/extractor/spryciarze.py
@ -0,0 +1,78 @@
+# coding: utf-8
+
+from .common import InfoExtractor
+from ..utils import (
+    js_to_json,
+    mimetype2ext,
+)
+
+
+class SpryciarzePageIE(InfoExtractor):
+    _VALID_URL = r'https?://[^/]+\.spryciarze\.pl/zobacz/(?P<id>[^/?#]+)'
+    IE_NAME = 'spryciarze:page'
+
+    _TESTS = [{
+        'url': 'https://komputery.spryciarze.pl/zobacz/jak-jezdzic-pojazdami-pod-woda-w-gta-sa-mp',
+        'info_dict': {
+            'id': 'jak-jezdzic-pojazdami-pod-woda-w-gta-sa-mp',
+            'ext': 'mp4',
+            'title': 'Jak jeździć pojazdami pod wodą w GTA SA: MP',
+            'description': 'Jest sposób na jazdę pojazdami pod wodą w GTA San Andreas w trybie multiplayer. Po wgraniu pojazdu musimy się od razu w nim znaleźć inaczej pomysł może nie zadziałać.',
+            'uploader': 'Webster90804',
+            'upload_date': '20091228',
+            'timestamp': 1261983600,
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        info_dict = self._search_json_ld(webpage, video_id, 'VideoObject')
+
+        info_dict.update({
+            '_type': 'url_transparent',
+            'url': self._search_regex(r'<iframe src="(https://player\.spryciarze\.pl/embed/[^"]+)"', webpage, 'embed url'),
+            'ie_key': 'Spryciarze',
+        })
+        return info_dict
+
+
+class SpryciarzeIE(InfoExtractor):
+    _VALID_URL = r'https?://player\.spryciarze\.pl/embed/(?P<id>[^/?#]+)'
+    IE_NAME = 'spryciarze'
+
+    _TESTS = [{
+        'url': 'https://player.spryciarze.pl/embed/jak-sciagac-z-30-hostingow-za-darmo-i-bez-rejestracji',
+        'info_dict': {
+            'id': 'jak-sciagac-z-30-hostingow-za-darmo-i-bez-rejestracji',
+            'ext': 'mp4',
+            'title': 'Jak ściągać z 30 hostingów za darmo i bez rejestracji',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        data = self._parse_json(
+            self._search_regex(
+                r'(?s)const data = ({.+?});',
+                webpage, 'video data'), video_id, js_to_json)
+
+        formats = []
+        for fmt in data['mediaFiles']:
+            formats.append({
+                'url': fmt['src'],
+                'ext': mimetype2ext(fmt['type']),
+            })
+
+        self._sort_formats(formats)
+
+        return {
+            'id': video_id,
+            'title': data['title'],
+            'formats': formats,
+        }
--- a/haruhi_dl/extractor/svt.py
+++ b/haruhi_dl/extractor/svt.py
@ -145,7 +145,7 @@ class SVTPlayIE(SVTPlayBaseIE):
                        )
                        (?P<svt_id>[^/?#&]+)|
                        https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
-                        (?:.*?modalId=(?P<modal_id>[\da-zA-Z-]+))?
+                        (?:.*?(?:modalId|id)=(?P<modal_id>[\da-zA-Z-]+))?
                    )
                    '''
    _TESTS = [{
@ -176,6 +176,9 @@ class SVTPlayIE(SVTPlayBaseIE):
    }, {
        'url': 'https://www.svtplay.se/video/30479064/husdrommar/husdrommar-sasong-8-designdrommar-i-stenungsund?modalId=8zVbDPA',
        'only_matching': True,
+    }, {
+        'url': 'https://www.svtplay.se/video/30684086/rapport/rapport-24-apr-18-00-7?id=e72gVpa',
+        'only_matching': True,
    }, {
        # geo restricted to Sweden
        'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
@ -258,7 +261,7 @@ class SVTPlayIE(SVTPlayBaseIE):
        if not svt_id:
            svt_id = self._search_regex(
                (r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
-                 r'<[^>]+\bdata-rt=["\']top-area-play-button["\'][^>]+\bhref=["\'][^"\']*video/%s/[^"\']*\bmodalId=([\da-zA-Z-]+)' % re.escape(video_id),
+                 r'<[^>]+\bdata-rt=["\']top-area-play-button["\'][^>]+\bhref=["\'][^"\']*video/%s/[^"\']*\b(?:modalId|id)=([\da-zA-Z-]+)' % re.escape(video_id),
                 r'["\']videoSvtId["\']\s*:\s*["\']([\da-zA-Z-]+)',
                 r'["\']videoSvtId\\?["\']\s*:\s*\\?["\']([\da-zA-Z-]+)',
                 r'"content"\s*:\s*{.*?"id"\s*:\s*"([\da-zA-Z-]+)"',
--- a/haruhi_dl/extractor/ted.py
+++ b/haruhi_dl/extractor/ted.py
@ -123,6 +123,10 @@ class TEDIE(InfoExtractor):
        'params': {
            'skip_download': True,
        },
+    }, {
+        # with own formats and private Youtube external
+        'url': 'https://www.ted.com/talks/spencer_wells_a_family_tree_for_humanity',
+        'only_matching': True,
    }]

    _NATIVE_FORMATS = {
@ -210,16 +214,6 @@ class TEDIE(InfoExtractor):

        player_talk = talk_info['player_talks'][0]

-        external = player_talk.get('external')
-        if isinstance(external, dict):
-            service = external.get('service')
-            if isinstance(service, compat_str):
-                ext_url = None
-                if service.lower() == 'youtube':
-                    ext_url = external.get('code')
-
-                return self.url_result(ext_url or external['uri'])
-
        resources_ = player_talk.get('resources') or talk_info.get('resources')

        http_url = None
@ -294,6 +288,16 @@ class TEDIE(InfoExtractor):
                'vcodec': 'none',
            })

+        if not formats:
+            external = player_talk.get('external')
+            if isinstance(external, dict):
+                service = external.get('service')
+                if isinstance(service, compat_str):
+                    ext_url = None
+                    if service.lower() == 'youtube':
+                        ext_url = external.get('code')
+                    return self.url_result(ext_url or external['uri'])
+
        self._sort_formats(formats)

        video_id = compat_str(talk_info['id'])
--- a/haruhi_dl/extractor/tiktok.py
+++ b/haruhi_dl/extractor/tiktok.py
@ -155,6 +155,16 @@ class TikTokIE(TikTokBaseIE):


 class TikTokPlaywrightBaseIE(TikTokBaseIE):
+    def _dedup_by_ids(self, items):
+        deduped = []
+        dedids = []
+        for item in items:
+            if item['id'] in dedids:
+                continue
+            dedids.append(item['id'])
+            deduped.append(item)
+        return deduped
+
    def _scroll_the_page(self, page, item_list_re, display_id):
        if page.title() == 'tiktok-verify-page':
            raise ExtractorError('TikTok requires captcha, use --cookies')
@ -223,6 +233,7 @@ class TikTokUserIE(TikTokPlaywrightBaseIE):
        next_data_items = try_get(page_props, lambda x: x['items'], expected_type=list)
        if next_data_items:
            items = next_data_items + items
+        items = self._dedup_by_ids(items)

        info_dict = {
            '_type': 'playlist',
@ -265,6 +276,7 @@ class TikTokHashtagIE(TikTokPlaywrightBaseIE):
        next_data_items = try_get(page_props, lambda x: x['items'], expected_type=list)
        if next_data_items:
            items = next_data_items + items
+        items = self._dedup_by_ids(items)

        return {
            '_type': 'playlist',
@ -333,6 +345,7 @@ class TikTokMusicIE(TikTokPlaywrightBaseIE):
        next_data_items = try_get(page_props, lambda x: x['items'], expected_type=list)
        if next_data_items:
            items = next_data_items + items
+        items = self._dedup_by_ids(items)

        info_dict = {
            '_type': 'playlist',
--- a/haruhi_dl/extractor/tv2dk.py
+++ b/haruhi_dl/extractor/tv2dk.py
@ -74,6 +74,12 @@ class TV2DKIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id)

        entries = []
+
+        def add_entry(partner_id, kaltura_id):
+            entries.append(self.url_result(
+                'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura',
+                video_id=kaltura_id))
+
        for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage):
            video = extract_attributes(video_el)
            kaltura_id = video.get('data-entryid')
@ -82,9 +88,14 @@ class TV2DKIE(InfoExtractor):
            partner_id = video.get('data-partnerid')
            if not partner_id:
                continue
-            entries.append(self.url_result(
-                'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura',
-                video_id=kaltura_id))
+            add_entry(partner_id, kaltura_id)
+        if not entries:
+            kaltura_id = self._search_regex(
+                r'entry_id\s*:\s*["\']([0-9a-z_]+)', webpage, 'kaltura id')
+            partner_id = self._search_regex(
+                (r'\\u002Fp\\u002F(\d+)\\u002F', r'/p/(\d+)/'), webpage,
+                'partner id')
+            add_entry(partner_id, kaltura_id)
        return self.playlist_result(entries)


--- a/haruhi_dl/extractor/tver.py
+++ b/haruhi_dl/extractor/tver.py
@ -9,7 +9,6 @@ from ..utils import (
    int_or_none,
    remove_start,
    smuggle_url,
-    strip_or_none,
    try_get,
 )

@ -45,32 +44,18 @@ class TVerIE(InfoExtractor):
            query={'token': self._TOKEN})['main']
        p_id = main['publisher_id']
        service = remove_start(main['service'], 'ts_')
-        info = {
+
+        r_id = main['reference_id']
+        if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'):
+            r_id = 'ref:' + r_id
+        bc_url = smuggle_url(
+            self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id),
+            {'geo_countries': ['JP']})
+
+        return {
            '_type': 'url_transparent',
            'description': try_get(main, lambda x: x['note'][0]['text'], compat_str),
            'episode_number': int_or_none(try_get(main, lambda x: x['ext']['episode_number'])),
+            'url': bc_url,
+            'ie_key': 'BrightcoveNew',
        }
-
-        if service == 'cx':
-            title = main['title']
-            subtitle = strip_or_none(main.get('subtitle'))
-            if subtitle:
-                title += ' - ' + subtitle
-            info.update({
-                'title': title,
-                'url': 'https://i.fod.fujitv.co.jp/plus7/web/%s/%s.html' % (p_id[:4], p_id),
-                'ie_key': 'FujiTVFODPlus7',
-            })
-        else:
-            r_id = main['reference_id']
-            if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'):
-                r_id = 'ref:' + r_id
-            bc_url = smuggle_url(
-                self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id),
-                {'geo_countries': ['JP']})
-            info.update({
-                'url': bc_url,
-                'ie_key': 'BrightcoveNew',
-            })
-
-        return info
--- a/haruhi_dl/extractor/tvn24.py
+++ b/haruhi_dl/extractor/tvn24.py
@ -2,6 +2,10 @@
 from __future__ import unicode_literals

 import re
+from urllib.parse import (
+    parse_qs,
+    urlparse,
+)

 from .common import InfoExtractor
 from ..utils import (
@ -14,8 +18,46 @@ from ..utils import (
 )


-class TVN24IE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:(?:[^/]+)\.)?tvn24\.pl)/(?:[^/]+/)*[^/?#\s]+[,-](?P<id>\d+)(?:\.html)?'
+class TVNBaseIE(InfoExtractor):
+    def _parse_nuvi_data(self, data, display_id):
+        video = data['movie']['video']
+        info = data['movie']['info']
+
+        if video.get('protections'):
+            raise ExtractorError(
+                'This video is protected by %s DRM protection' % '/'.join(video['protections'].keys()),
+                expected=True)
+
+        formats = []
+
+        for fmt_id, fmt_data in video['sources'].items():
+            if fmt_id == 'hls':
+                formats.extend(self._extract_m3u8_formats(fmt_data['url'], display_id, ext='mp4'))
+            elif fmt_id == 'dash':
+                formats.extend(self._extract_mpd_formats(fmt_data['url'], display_id))
+            elif fmt_id == 'mp4':
+                for quality, mp4_url in fmt_data.items():
+                    formats.append({
+                        'url': mp4_url,
+                        'ext': 'mp4',
+                        'height': int_or_none(quality),
+                    })
+
+        self._sort_formats(formats)
+
+        return {
+            'id': display_id,
+            'formats': formats,
+            'title': unescapeHTML(info.get('episode_title')),
+            'description': unescapeHTML(info.get('description')),
+            'duration': int_or_none(info.get('total_time')),
+            'age_limit': int_or_none(data['movie']['options'].get('parental_rating', {}).get('rating')),
+            'is_live': video.get('is_live'),
+        }
+
+
+class TVN24IE(TVNBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:(?:[^/]+)\.)?tvn(?:24)?\.pl)/(?:[^/]+/)*[^/?#\s]+[,-](?P<id>\d+)(?:\.html)?'
    _TESTS = [{
        'url': 'https://tvn24.pl/polska/edyta-gorniak-napisala-o-statystach-w-szpitalach-udajacych-chorych-na-covid-19-jerzy-polaczek-i-marek-posobkiewicz-odpowiadaja-zapraszamy-4747899',
        'info_dict': {
@ -25,7 +67,7 @@ class TVN24IE(InfoExtractor):
        'playlist_count': 5,
    }, {
        # different layout
-        'url': 'https://tvnmeteo.tvn24.pl/magazyny/maja-w-ogrodzie,13/odcinki-online,1,4,1,0/pnacza-ptaki-i-iglaki-odc-691-hgtv-odc-29,1771763.html',
+        'url': 'https://tvn24.pl/tvnmeteo/magazyny/nowa-maja-w-ogrodzie,13/odcinki-online,1,4,1,0/pnacza-ptaki-i-iglaki-odc-691-hgtv-odc-29,1771763.html',
        'info_dict': {
            'id': '1771763',
            'ext': 'mp4',
@ -51,6 +93,15 @@ class TVN24IE(InfoExtractor):
            'title': '11.02.2021 | Misja telewizji publicznej i reakcja na protest "Media bez wyboru"',
            'description': 'md5:684d2e09f57c7ed03a277bc5ce295d63',
        },
+    }, {
+        # no data-qualities, just data-src
+        'url': 'https://uwaga.tvn.pl/reportaze,2671,n/po-wyroku-trybunalu-kobiety-nie-moga-poddac-sie-aborcji,337993.html',
+        'info_dict': {
+            'id': '337993',
+            'ext': 'mp4',
+            'title': 'Wady letalne, czyli śmiertelne. "Boję się następnej ciąży"',
+            'description': 'md5:4f5efe579b7f801d5a8d7a75c0809260',
+        },
    }, {
        'url': 'http://fakty.tvn24.pl/ogladaj-online,60/53-konferencja-bezpieczenstwa-w-monachium,716431.html',
        'only_matching': True,
@ -66,18 +117,18 @@ class TVN24IE(InfoExtractor):
        mobj = re.match(self._VALID_URL, url)
        domain, display_id = mobj.group('domain', 'id')

-        if '/magazyn-tvn24/' in url:
-            return self._handle_magazine_frontend(url, display_id)
-        elif domain in ('tvn24.pl', ):
-            return self._handle_nextjs_frontend(url, display_id)
-        elif domain in ('fakty.tvn24.pl', ):
-            return self._handle_fakty_frontend(url, display_id)
-        else:
-            return self._handle_old_frontend(url, display_id)
-
-    def _handle_old_frontend(self, url, display_id):
        webpage = self._download_webpage(url, display_id)

+        if domain == 'tvn24.pl':
+            if '<script id="__NEXT_DATA__"' in webpage:
+                return self._handle_nextjs_frontend(url, display_id, webpage)
+            if '/magazyn-tvn24/' in url:
+                return self._handle_magazine_frontend(url, display_id, webpage)
+        if 'window.VideoManager.initVideo(' in webpage:
+            return self._handle_fakty_frontend(url, display_id, webpage)
+        return self._handle_old_frontend(url, display_id, webpage)
+
+    def _handle_old_frontend(self, url, display_id, webpage):
        title = self._og_search_title(
            webpage, default=None) or self._search_regex(
            r'<h\d+[^>]+class=["\']magazineItemHeader[^>]+>(.+?)</h',
@ -90,16 +141,23 @@ class TVN24IE(InfoExtractor):
                    name, group='json', default=default, fatal=fatal) or '{}',
                display_id, transform_source=unescapeHTML, fatal=fatal)

-        quality_data = extract_json('data-quality', 'formats')
+        quality_data = extract_json('data-quality', 'formats', default=None, fatal=False)

        formats = []
-        for format_id, url in quality_data.items():
+        if quality_data:
+            for format_id, url in quality_data.items():
+                formats.append({
+                    'url': url,
+                    'format_id': format_id,
+                    'height': int_or_none(format_id.rstrip('p')),
+                })
+            self._sort_formats(formats)
+        else:
            formats.append({
-                'url': url,
-                'format_id': format_id,
-                'height': int_or_none(format_id.rstrip('p')),
+                'url': self._search_regex(
+                    r'\bdata-src=(["\'])(?P<url>(?!\1).+?)\1',
+                    webpage, 'video url', group='url'),
            })
-        self._sort_formats(formats)

        description = self._og_search_description(webpage, default=None)
        thumbnail = self._og_search_thumbnail(
@ -128,20 +186,24 @@ class TVN24IE(InfoExtractor):
            'formats': formats,
        }

-    def _handle_magazine_frontend(self, url, display_id):
-        webpage = self._download_webpage(url, display_id)
-
+    def _handle_magazine_frontend(self, url, display_id, webpage):
        entries = []
-        for vid_el in re.finditer(r'(?P<video><div\b[^>]+\bdata-src=[^>]+>)\s*(?:</[^>]+>\s*)*<figcaption>(?P<title>(?:.|\s)+?)</figcaption>', webpage):
+        for vid_el in re.finditer(r'(?P<video><div\b[^>]+\bdata-src=[^>]+>)\s*(?:</[^>]+>\s*)*?(?:<figcaption>(?P<title>(?:.|\s)+?)</figcaption>)?', webpage):
            vid = extract_attributes(vid_el.group('video'))

            formats = []
-            for fmt_name, fmt_url in self._parse_json(unescapeHTML(vid['data-quality']), display_id).items():
-                formats.append({
-                    'format_id': fmt_name,
-                    'height': int_or_none(fmt_name[:-1]),
-                    'url': fmt_url,
-                })
+            qualities = vid.get('data-quality')
+            if qualities:
+                for fmt_name, fmt_url in self._parse_json(unescapeHTML(qualities), display_id).items():
+                    formats.append({
+                        'format_id': fmt_name,
+                        'height': int_or_none(fmt_name[:-1]),
+                        'url': fmt_url,
+                    })
+            else:
+                formats = [{
+                    'url': vid['data-src'],
+                }]

            self._sort_formats(formats)
            entries.append({
@ -151,6 +213,9 @@ class TVN24IE(InfoExtractor):
                'thumbnail': vid.get('data-poster'),
            })

+        if not entries:
+            raise ExtractorError('No videos found')
+
        return {
            '_type': 'playlist',
            'id': display_id,
@ -159,9 +224,7 @@ class TVN24IE(InfoExtractor):
            'description': self._og_search_description(webpage),
        }

-    def _handle_nextjs_frontend(self, url, display_id):
-        webpage = self._download_webpage(url, display_id)
-
+    def _handle_nextjs_frontend(self, url, display_id, webpage):
        next_data = self._search_nextjs_data(webpage, display_id)
        context = next_data['props']['initialProps']['pageProps']['context']

@ -183,14 +246,13 @@ class TVN24IE(InfoExtractor):
            plst_url = re.sub(r'[?#].+', '', url)
            plst_url += '/nuviArticle?playlist&id=%s&r=%s' % (video['id'], route_name)

-            plst = self._download_json(plst_url, display_id)
-
-            data = self._parse_nuvi_data(plst, display_id)
-            data.update({
+            entries.append({
+                '_type': 'url_transparent',
+                'url': plst_url,
+                'ie_key': 'TVN24Nuvi',
                'title': fields['title'],
                'description': fields['description'],
            })
-            entries.append(data)

        return {
            '_type': 'playlist',
@ -200,9 +262,7 @@ class TVN24IE(InfoExtractor):
            'alt_title': context['fields']['title'],
        }

-    def _handle_fakty_frontend(self, url, display_id):
-        webpage = self._download_webpage(url, display_id)
-
+    def _handle_fakty_frontend(self, url, display_id, webpage):
        data = self._parse_json(
            self._search_regex(
                r"window\.VideoManager\.initVideo\('[^']+',\s*({.+?})\s*,\s*{.+?}\s*\);",
@ -210,38 +270,15 @@ class TVN24IE(InfoExtractor):

        return self._parse_nuvi_data(data, display_id)

-    def _parse_nuvi_data(self, data, display_id):
-        video = data['movie']['video']
-        info = data['movie']['info']

-        if video.get('protections'):
-            raise ExtractorError(
-                'This video is protected by %s DRM protection' % '/'.join(video['protections'].keys()),
-                expected=True)
+class TVN24NuviIE(TVNBaseIE):
+    # handles getting specific videos from the list of nuvi urls
+    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:(?:[^/]+)\.)?tvn24\.pl)/(?:[^/]+/)*[^/?#\s]+[,-](?P<id>\d+)(?:\.html)?/nuviArticle'
+    IE_NAME = 'tvn24:nuvi'
+    IE_DESC = False  # do not list

-        formats = []
-
-        for fmt_id, fmt_data in video['sources'].items():
-            if fmt_id == 'hls':
-                formats.extend(self._extract_m3u8_formats(fmt_data['url'], display_id))
-            elif fmt_id == 'dash':
-                formats.extend(self._extract_mpd_formats(fmt_data['url'], display_id))
-            elif fmt_id == 'mp4':
-                for quality, mp4_url in fmt_data.items():
-                    formats.append({
-                        'url': mp4_url,
-                        'ext': 'mp4',
-                        'height': int_or_none(quality),
-                    })
-
-        self._sort_formats(formats)
-
-        return {
-            'id': display_id,
-            'formats': formats,
-            'title': unescapeHTML(info.get('episode_title')),
-            'description': unescapeHTML(info.get('description')),
-            'duration': int_or_none(info.get('total_time')),
-            'age_limit': int_or_none(data['movie']['options'].get('parental_rating', {}).get('rating')),
-            'is_live': video.get('is_live'),
-        }
+    def _real_extract(self, url):
+        qs = parse_qs(urlparse(url).query)
+        video_id = qs['id'][0]
+        plst = self._download_json(url, video_id)
+        return self._parse_nuvi_data(plst, video_id)
--- a/haruhi_dl/extractor/tvp.py
+++ b/haruhi_dl/extractor/tvp.py
@ -10,9 +10,11 @@ from ..utils import (
    determine_ext,
    ExtractorError,
    int_or_none,
+    js_to_json,
    str_or_none,
    try_get,
    unescapeHTML,
+    urljoin,
 )


@ -51,7 +53,7 @@ class TVPIE(InfoExtractor):
            'age_limit': 12,
        },
    }, {
-        # TVPlayer 2 in client-side rendered website (regional)
+        # TVPlayer 2 in client-side rendered website (regional; window.__newsData)
        'url': 'https://warszawa.tvp.pl/25804446/studio-yayo',
        'md5': '883c409691c6610bdc8f464fab45a9a9',
        'info_dict': {
@ -61,6 +63,14 @@ class TVPIE(InfoExtractor):
            'upload_date': '20160616',
            'timestamp': 1466075700,
        }
+    }, {
+        # TVPlayer 2 in client-side rendered website (tvp.info; window.__videoData)
+        'url': 'https://www.tvp.info/52880236/09042021-0800',
+        'info_dict': {
+            'id': '52880236',
+            'ext': 'mp4',
+            'title': '09.04.2021, 08:00',
+        },
    }, {
        # client-side rendered (regional) program (playlist) page
        'url': 'https://opole.tvp.pl/9660819/rozmowa-dnia',
@ -73,6 +83,15 @@ class TVPIE(InfoExtractor):
        'params': {
            'skip_download': True,
        }
+    }, {
+        # yet another vue page
+        # more pope, youth will handle that
+        'url': 'https://jp2.tvp.pl/46925618/filmy',
+        'info_dict': {
+            'id': '46925618',
+            'title': 'Filmy',
+        },
+        'playlist_mincount': 19,
    }, {
        # ABC-specific video embeding
        'url': 'https://abc.tvp.pl/48636269/zubry-odc-124',
@ -113,19 +132,27 @@ class TVPIE(InfoExtractor):

    def _parse_vue_website_data(self, webpage, page_id):
        website_data = self._search_regex([
-            r'window\.__websiteData\s*=\s*({(?:.|\s)+?});',
+            # website - regiony, tvp.info
+            # directory - jp2.tvp.pl
+            r'window\.__(?:website|directory)Data\s*=\s*({(?:.|\s)+?});',
        ], webpage, 'website data')
        if not website_data:
            return None
-        # "sanitize" "JSON" trailing comma before parsing
-        website_data = re.sub(r',\s+}$', '}', website_data)
-        # replace JSON string with parsed dict
-        website_data = self._parse_json(website_data, page_id)
-        return website_data
+        return self._parse_json(website_data, page_id, transform_source=js_to_json)

    def _extract_vue_video(self, video_data, page_id=None):
+        if isinstance(video_data, str):
+            video_data = self._parse_json(video_data, page_id, transform_source=js_to_json)
        thumbnails = []
        image = video_data.get('image')
+        is_website = video_data.get('type') == 'website'
+        if is_website:
+            url = video_data['url']
+            fucked_up_url_parts = re.match(r'https?://vod\.tvp\.pl/(\d+)/([^/?#]+)', url)
+            if fucked_up_url_parts:
+                url = f'https://vod.tvp.pl/website/{fucked_up_url_parts.group(2)},{fucked_up_url_parts.group(1)}'
+        else:
+            url = 'tvp:' + str_or_none(video_data.get('_id') or page_id)
        if image:
            for thumb in (image if isinstance(image, list) else [image]):
                thmb_url = str_or_none(thumb.get('url'))
@ -136,8 +163,8 @@ class TVPIE(InfoExtractor):
        return {
            '_type': 'url_transparent',
            'id': str_or_none(video_data.get('_id') or page_id),
-            'url': 'tvp:' + str_or_none(video_data.get('_id') or page_id),
-            'ie_key': 'TVPEmbed',
+            'url': url,
+            'ie_key': 'TVPEmbed' if not is_website else 'TVPWebsite',
            'title': str_or_none(video_data.get('title')),
            'description': str_or_none(video_data.get('lead')),
            'timestamp': int_or_none(video_data.get('release_date_long')),
@ -145,73 +172,91 @@ class TVPIE(InfoExtractor):
            'thumbnails': thumbnails,
        }

+    def _handle_vuejs_page(self, url, webpage, page_id):
+        # vue client-side rendered sites (all regional pages + tvp.info)
+        video_data = self._search_regex([
+            r'window\.__(?:news|video)Data\s*=\s*({(?:.|\s)+?})\s*;',
+        ], webpage, 'video data', default=None)
+        if video_data:
+            return self._extract_vue_video(video_data, page_id=page_id)
+        # paged playlists
+        website_data = self._parse_vue_website_data(webpage, page_id)
+        if website_data:
+            entries = []
+
+            def extract_videos(wd):
+                if wd.get('latestVideo'):
+                    entries.append(self._extract_vue_video(wd['latestVideo']))
+                for video in wd.get('videos') or []:
+                    entries.append(self._extract_vue_video(video))
+                for video in wd.get('items') or []:
+                    entries.append(self._extract_vue_video(video))
+
+            extract_videos(website_data)
+
+            items_total_count = int_or_none(website_data.get('items_total_count'))
+            items_per_page = int_or_none(website_data.get('items_per_page'))
+            if items_total_count > len(entries) - 1:
+                pages = (items_total_count / items_per_page) + 1
+                if pages != int(pages):
+                    pages = int(pages) + 1
+                for page in range(2, pages):
+                    page_website_data = self._parse_vue_website_data(
+                        # seriously, this thing is rendered on the client and requires to reload page
+                        # when flipping the page, instead of just loading new pages with xhr or sth
+                        # (they already even import axios!)
+                        self._download_webpage(url, page_id, note='Downloading page #%d' % page,
+                                               query={'page': page}),
+                        page_id)
+                    extract_videos(page_website_data)
+
+            return {
+                '_type': 'playlist',
+                'id': page_id,
+                'title': str_or_none(website_data.get('title')),
+                'description': str_or_none(website_data.get('lead')),
+                'entries': entries,
+            }
+        raise ExtractorError('Could not extract video/website data')
+
    def _real_extract(self, url):
        page_id = self._match_id(url)
-        webpage = self._download_webpage(url, page_id)
-        if '//s.tvp.pl/files/portale-v4/regiony-tvp-pl' in webpage:
-            # vue client-side rendered sites (all regional pages)
-            video_data = self._search_regex([
-                r'window\.__newsData\s*=\s*({(?:.|\s)+?});',
-            ], webpage, 'video data', default=None)
-            if video_data:
-                return self._extract_vue_video(
-                    self._parse_json(video_data, page_id),
-                    page_id=page_id)
-            # paged playlists
-            website_data = self._parse_vue_website_data(webpage, page_id)
-            if website_data:
-                entries = []
-                if website_data.get('latestVideo'):
-                    entries.append(self._extract_vue_video(website_data['latestVideo']))
-                for video in website_data.get('videos') or []:
-                    entries.append(self._extract_vue_video(video))
-                items_total_count = int_or_none(website_data.get('items_total_count'))
-                items_per_page = int_or_none(website_data.get('items_per_page'))
-                if items_total_count > len(entries) - 1:
-                    pages = items_total_count / items_per_page
-                    if pages != int(pages):
-                        pages = int(pages) + 1
-                    for page in range(2, pages):
-                        page_website_data = self._parse_vue_website_data(
-                            # seriously, this thing is rendered on the client and requires to reload page
-                            # when flipping the page, instead of just loading new pages with xhr or sth
-                            # (they already even import axios!)
-                            self._download_webpage(url, page_id, note='Downloading page #%d' % page,
-                                                   query={'page': page}),
-                            page_id)
-                        for video in page_website_data.get('videos') or []:
-                            entries.append(self._extract_vue_video(video))
+        webpage, urlh = self._download_webpage_handle(url, page_id)

-                return {
-                    '_type': 'playlist',
-                    'id': page_id,
-                    'title': str_or_none(website_data.get('title')),
-                    'description': str_or_none(website_data.get('lead')),
-                    'entries': entries,
-                }
-            raise ExtractorError('Could not extract video/website data')
-        else:
-            # classic server-site rendered sites
-            video_id = self._search_regex([
-                r'<iframe[^>]+src="[^"]*?embed\.php\?(?:[^&]+&)*ID=(\d+)',
-                r'<iframe[^>]+src="[^"]*?object_id=(\d+)',
-                r"object_id\s*:\s*'(\d+)'",
-                r'data-video-id="(\d+)"',
+        # sometimes vod.tvp.pl urls look like... pretty much any TVP url, that does a redirect
+        # example: https://vod.tvp.pl/48463890/wadowickie-spotkania-z-janem-pawlem-ii
+        # VOD videos will work with this extractor,
+        # but VOD website needs to be handled with its extractor
+        if re.match(TVPWebsiteIE._VALID_URL, urlh.url):
+            return self.url_result(urlh.url, ie=TVPWebsiteIE.ie_key(), video_id=page_id)

-                # abc.tvp.pl - somehow there are more than one video IDs that seem to be the same video?
-                # the first one is referenced to as "copyid", and seems to be unused by the website
-                r'<script>\s*tvpabc\.video\.init\(\s*\d+,\s*(\d+)\s*\)\s*</script>',
-            ], webpage, 'video id', default=page_id)
-            return {
-                '_type': 'url_transparent',
-                'url': 'tvp:' + video_id,
-                'description': self._og_search_description(
-                    webpage, default=None) or (self._html_search_meta(
-                        'description', webpage, default=None)
-                        if '//s.tvp.pl/files/portal/v' in webpage else None),
-                'thumbnail': self._og_search_thumbnail(webpage, default=None),
-                'ie_key': 'TVPEmbed',
-            }
+        # some tvp.info pages are vue.js, some are not
+        if re.search(
+            r'window\.__(?:video|news|website|directory)Data\s*=',
+                webpage):
+            return self._handle_vuejs_page(url, webpage, page_id)
+
+        # classic server-side rendered sites
+        video_id = self._search_regex([
+            r'<iframe[^>]+src="[^"]*?embed\.php\?(?:[^&]+&)*ID=(\d+)',
+            r'<iframe[^>]+src="[^"]*?object_id=(\d+)',
+            r"object_id\s*:\s*'(\d+)'",
+            r'data-video-id="(\d+)"',
+
+            # abc.tvp.pl - somehow there are more than one video IDs that seem to be the same video?
+            # the first one is referenced to as "copyid", and seems to be unused by the website
+            r'<script>\s*tvpabc\.video\.init\(\s*\d+,\s*(\d+)\s*\)\s*</script>',
+        ], webpage, 'video id', default=page_id)
+        return {
+            '_type': 'url_transparent',
+            'url': 'tvp:' + video_id,
+            'description': self._og_search_description(
+                webpage, default=None) or (self._html_search_meta(
+                    'description', webpage, default=None)
+                    if '//s.tvp.pl/files/portal/v' in webpage else None),
+            'thumbnail': self._og_search_thumbnail(webpage, default=None),
+            'ie_key': 'TVPEmbed',
+        }


 class TVPStreamIE(InfoExtractor):
@ -351,7 +396,7 @@ class TVPEmbedIE(InfoExtractor):
        for file in content['files']:
            video_url = file['url']
            if video_url.endswith('.m3u8'):
-                formats.extend(self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls'))
+                formats.extend(self._extract_m3u8_formats(video_url, video_id, m3u8_id='hls', ext='mp4'))
            elif video_url.endswith('.mpd'):
                formats.extend(self._extract_mpd_formats(video_url, video_id, mpd_id='dash'))
            elif video_url.endswith('.f4m'):
@ -359,12 +404,12 @@ class TVPEmbedIE(InfoExtractor):
            elif video_url.endswith('.ism/manifest'):
                formats.extend(self._extract_ism_formats(video_url, video_id, ism_id='mss'))
            else:
-                # probably just mp4 versions
+                # mp4, wmv or something
                quality = file.get('quality', {})
                formats.append({
                    'format_id': 'direct',
                    'url': video_url,
-                    'ext': determine_ext(video_url, 'mp4'),
+                    'ext': determine_ext(video_url, file['type']),
                    'fps': int_or_none(quality.get('fps')),
                    'tbr': int_or_none(quality.get('bitrate')),
                    'width': int_or_none(quality.get('width')),
@ -457,6 +502,15 @@ class TVPWebsiteIE(InfoExtractor):
            'skip_download': True,
        },
        'add_ie': ['TVPEmbed'],
+    }, {
+        # series unavailable via API
+        'url': 'https://vod.tvp.pl/website/panorama-opinii,39001978/video',
+        'info_dict': {
+            'id': '39001978',
+            'title': 'Panorama opinii',
+        },
+        'playlist_mincount': 300,
+        'expected_warnings': ['not available through API'],
    }, {
        'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312',
        'only_matching': True,
@ -475,6 +529,14 @@ class TVPWebsiteIE(InfoExtractor):
        data = self._download_json(self._API_BASE + '/tv/v2/website/%s' % playlist_id,
                                   playlist_id, 'Downloading series metadata', headers=headers)

+        if data.get('error'):
+            if data['error'] == 4005:
+                self.report_warning('Series not available through API, falling back to website')
+                return self._workaround_android_lock(display_id, playlist_id)
+            raise ExtractorError(
+                'TVP said: %s (error %d)' % (data.get('message'), data['error']),
+                expected=True, video_id=playlist_id)
+
        info_dict = {}
        entries = []
        pages_count = None
@ -537,3 +599,55 @@ class TVPWebsiteIE(InfoExtractor):
            'is_live': ep.get('is_live', False),
            'age_limit': age_limit,
        }
+
+    def _workaround_android_lock(self, display_id, playlist_id):
+        url = 'https://vod.tvp.pl/website/%s,%s/video' % (display_id, playlist_id)
+        webpage = self._download_webpage(url, playlist_id, 'Downloading page #1', query={
+            'order': 'oldest',
+            'page': 1,
+        })
+
+        page_count = int_or_none(
+            self._search_regex(
+                r'<li class="lastItem">\s*<a href="[^"]+&page=(\d+)',
+                webpage, 'page count')) or 1
+        current_page = 1
+        series_title = self._html_search_regex(
+            r'<div class="strefa-abo__listing-header-title">\s*<a[^>]+>\s*(.+?)\s*<i',
+            webpage, 'series title')
+        entries = []
+
+        def _web_ep_to_entry(ep):
+            return {
+                '_type': 'url',
+                'url': urljoin('https://vod.tvp.pl', ep['episodeLink']),
+                'ie_key': 'TVP',
+                'title': '%s, %s' % (series_title, ep['episodeCount']),
+                'description': ep.get('description'),
+                'thumbnail': ep.get('image'),
+                'series': series_title,
+                'episode_id': ep['episodeCount'],
+            }
+
+        while current_page <= page_count:
+            if current_page != 1:
+                webpage = self._download_webpage(url, playlist_id,
+                                                 'Downloading page #%d' % current_page,
+                                                 query={
+                                                     'order': 'oldest',
+                                                     'page': current_page,
+                                                 })
+            episodes = re.finditer(r'data-hover="([^"]+)"', webpage)
+            for ep in episodes:
+                entries.append(
+                    _web_ep_to_entry(
+                        self._parse_json(
+                            ep.group(1), playlist_id, transform_source=unescapeHTML)))
+            current_page += 1
+
+        return {
+            '_type': 'playlist',
+            'entries': entries,
+            'id': playlist_id,
+            'title': series_title,
+        }
--- a/haruhi_dl/extractor/twitch.py
+++ b/haruhi_dl/extractor/twitch.py
@ -49,6 +49,7 @@ class TwitchBaseIE(InfoExtractor):
        'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
        'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
        'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
+        'VideoAccessToken_Clip': '36b89d2507fce29e5ca551df756d27c1cfe079e2609642b4390aa4c35796eb11',
        'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
        'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687',
    }
@ -893,7 +894,25 @@ class TwitchClipsIE(TwitchBaseIE):
    def _real_extract(self, url):
        video_id = self._match_id(url)

-        clip = self._download_base_gql(
+        clip = self._download_gql(
+            video_id, [{
+                'operationName': 'VideoAccessToken_Clip',
+                'variables': {
+                    'slug': video_id,
+                },
+            }],
+            'Downloading clip access token GraphQL')[0]['data']['clip']
+
+        if not clip:
+            raise ExtractorError(
+                'This clip is no longer available', expected=True)
+
+        access_query = {
+            'sig': clip['playbackAccessToken']['signature'],
+            'token': clip['playbackAccessToken']['value'],
+        }
+
+        data = self._download_base_gql(
            video_id, {
                'query': '''{
  clip(slug: "%s") {
@ -918,11 +937,10 @@ class TwitchClipsIE(TwitchBaseIE):
    }
    viewCount
  }
-}''' % video_id}, 'Downloading clip GraphQL')['data']['clip']
+}''' % video_id}, 'Downloading clip GraphQL', fatal=False)

-        if not clip:
-            raise ExtractorError(
-                'This clip is no longer available', expected=True)
+        if data:
+            clip = try_get(data, lambda x: x['data']['clip'], dict) or clip

        formats = []
        for option in clip.get('videoQualities', []):
@ -932,7 +950,7 @@ class TwitchClipsIE(TwitchBaseIE):
            if not source:
                continue
            formats.append({
-                'url': source,
+                'url': update_url_query(source, access_query),
                'format_id': option.get('quality'),
                'height': int_or_none(option.get('quality')),
                'fps': int_or_none(option.get('frameRate')),
--- a/haruhi_dl/extractor/twitter.py
+++ b/haruhi_dl/extractor/twitter.py
@ -19,6 +19,7 @@ from ..utils import (
    strip_or_none,
    unified_timestamp,
    update_url_query,
+    url_or_none,
    xpath_text,
 )

@ -52,6 +53,9 @@ class TwitterBaseIE(InfoExtractor):
            return [f]

    def _extract_formats_from_vmap_url(self, vmap_url, video_id):
+        vmap_url = url_or_none(vmap_url)
+        if not vmap_url:
+            return []
        vmap_data = self._download_xml(vmap_url, video_id)
        formats = []
        urls = []
--- a/haruhi_dl/extractor/umg.py
+++ b/haruhi_dl/extractor/umg.py
@ -28,7 +28,7 @@ class UMGDeIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)
        video_data = self._download_json(
-            'https://api.universal-music.de/graphql',
+            'https://graphql.universal-music.de/',
            video_id, query={
                'query': '''{
  universalMusic(channel:16) {
@ -56,11 +56,9 @@ class UMGDeIE(InfoExtractor):
        formats = []

        def add_m3u8_format(format_id):
-            m3u8_formats = self._extract_m3u8_formats(
+            formats.extend(self._extract_m3u8_formats(
                hls_url_template % format_id, video_id, 'mp4',
-                'm3u8_native', m3u8_id='hls', fatal='False')
-            if m3u8_formats and m3u8_formats[0].get('height'):
-                formats.extend(m3u8_formats)
+                'm3u8_native', m3u8_id='hls', fatal=False))

        for f in video_data.get('formats', []):
            f_url = f.get('url')
--- a/haruhi_dl/extractor/ustream.py
+++ b/haruhi_dl/extractor/ustream.py
@ -75,7 +75,7 @@ class UstreamIE(InfoExtractor):
    @staticmethod
    def _extract_url(webpage):
        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?(?:ustream\.tv|video\.ibm\.com)/embed/.+?)\1', webpage)
        if mobj is not None:
            return mobj.group('url')

--- a/haruhi_dl/extractor/vider.py
+++ b/haruhi_dl/extractor/vider.py
@ -0,0 +1,37 @@
+from .common import InfoExtractor
+
+
+class ViderIE(InfoExtractor):
+    _VALID_URL = r'https?://vider\.(?:pl|info)/(?:vid/\+f|embed/video/)(?P<id>[a-z\d]+)'
+    _TESTS = [{
+        'url': 'https://vider.info/vid/+fsx51se',
+        'info_dict': {
+            'id': 'sx51se',
+            'ext': 'mp4',
+            'title': 'Big Buck Bunny',
+            'upload_date': '20210906',
+            'timestamp': 1630927351,
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(f'https://vider.info/vid/+f{video_id}', video_id)
+
+        json_ld = self._parse_json(
+            self._search_regex(
+                r'(?s)<script type="application/ld\+json">(.+?)</script>',
+                webpage, 'JSON-LD'), video_id)
+        info_dict = self._json_ld(json_ld, video_id)
+        # generated SEO junk
+        info_dict['description'] = None
+        info_dict['id'] = video_id
+        info_dict['formats'] = [{
+            'url': self._search_regex(r'\?file=(.+)', json_ld['embedUrl'], 'video url'),
+            'http_headers': {
+                'Referer': 'https://vider.info/',
+            },
+        }]
+
+        return info_dict
--- a/haruhi_dl/extractor/vimeo.py
+++ b/haruhi_dl/extractor/vimeo.py
@ -3,7 +3,6 @@ from __future__ import unicode_literals

 import base64
 import functools
-import json
 import re
 import itertools

@ -17,15 +16,14 @@ from ..compat import (
 from ..utils import (
    clean_html,
    determine_ext,
-    dict_get,
    ExtractorError,
+    get_element_by_class,
    js_to_json,
    int_or_none,
    merge_dicts,
    OnDemandPagedList,
    parse_filesize,
    parse_iso8601,
-    RegexNotFoundError,
    sanitized_Request,
    smuggle_url,
    std_headers,
@ -124,10 +122,11 @@ class VimeoBaseInfoExtractor(InfoExtractor):
        video_title = video_data['title']
        live_event = video_data.get('live_event') or {}
        is_live = live_event.get('status') == 'started'
+        request = config.get('request') or {}

        formats = []
-        config_files = video_data.get('files') or config['request'].get('files', {})
-        for f in config_files.get('progressive', []):
+        config_files = video_data.get('files') or request.get('files') or {}
+        for f in (config_files.get('progressive') or []):
            video_url = f.get('url')
            if not video_url:
                continue
@ -143,7 +142,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
        # TODO: fix handling of 308 status code returned for live archive manifest requests
        sep_pattern = r'/sep/video/'
        for files_type in ('hls', 'dash'):
-            for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items():
+            for cdn_name, cdn_data in (try_get(config_files, lambda x: x[files_type]['cdns']) or {}).items():
                manifest_url = cdn_data.get('url')
                if not manifest_url:
                    continue
@ -189,17 +188,15 @@ class VimeoBaseInfoExtractor(InfoExtractor):
                f['preference'] = -40

        subtitles = {}
-        text_tracks = config['request'].get('text_tracks')
-        if text_tracks:
-            for tt in text_tracks:
-                subtitles[tt['lang']] = [{
-                    'ext': 'vtt',
-                    'url': urljoin('https://vimeo.com', tt['url']),
-                }]
+        for tt in (request.get('text_tracks') or []):
+            subtitles[tt['lang']] = [{
+                'ext': 'vtt',
+                'url': urljoin('https://vimeo.com', tt['url']),
+            }]

        thumbnails = []
        if not is_live:
-            for key, thumb in video_data.get('thumbs', {}).items():
+            for key, thumb in (video_data.get('thumbs') or {}).items():
                thumbnails.append({
                    'id': key,
                    'width': int_or_none(key),
@ -397,6 +394,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
                'uploader_id': 'staff',
                'uploader': 'Vimeo Staff',
                'duration': 62,
+                'subtitles': {
+                    'de': [{'ext': 'vtt'}],
+                    'en': [{'ext': 'vtt'}],
+                    'es': [{'ext': 'vtt'}],
+                    'fr': [{'ext': 'vtt'}],
+                },
            }
        },
        {
@ -570,6 +573,37 @@ class VimeoIE(VimeoBaseInfoExtractor):
    def _real_initialize(self):
        self._login()

+    def _extract_from_api(self, video_id, unlisted_hash=None):
+        token = self._download_json(
+            'https://vimeo.com/_rv/jwt', video_id, headers={
+                'X-Requested-With': 'XMLHttpRequest'
+            })['token']
+        api_url = 'https://api.vimeo.com/videos/' + video_id
+        if unlisted_hash:
+            api_url += ':' + unlisted_hash
+        video = self._download_json(
+            api_url, video_id, headers={
+                'Authorization': 'jwt ' + token,
+            }, query={
+                'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
+            })
+        info = self._parse_config(self._download_json(
+            video['config_url'], video_id), video_id)
+        self._vimeo_sort_formats(info['formats'])
+        get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
+        info.update({
+            'description': video.get('description'),
+            'license': video.get('license'),
+            'release_timestamp': get_timestamp('release'),
+            'timestamp': get_timestamp('created'),
+            'view_count': int_or_none(try_get(video, lambda x: x['stats']['plays'])),
+        })
+        connections = try_get(
+            video, lambda x: x['metadata']['connections'], dict) or {}
+        for k in ('comment', 'like'):
+            info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total']))
+        return info
+
    def _real_extract(self, url):
        url, data = unsmuggle_url(url, {})
        headers = std_headers.copy()
@ -578,48 +612,19 @@ class VimeoIE(VimeoBaseInfoExtractor):
        if 'Referer' not in headers:
            headers['Referer'] = url

-        # Extract ID from URL
-        video_id, unlisted_hash = re.match(self._VALID_URL, url).groups()
+        mobj = re.match(self._VALID_URL, url).groupdict()
+        video_id, unlisted_hash = mobj['id'], mobj.get('unlisted_hash')
        if unlisted_hash:
-            token = self._download_json(
-                'https://vimeo.com/_rv/jwt', video_id, headers={
-                    'X-Requested-With': 'XMLHttpRequest'
-                })['token']
-            video = self._download_json(
-                'https://api.vimeo.com/videos/%s:%s' % (video_id, unlisted_hash),
-                video_id, headers={
-                    'Authorization': 'jwt ' + token,
-                }, query={
-                    'fields': 'config_url,created_time,description,license,metadata.connections.comments.total,metadata.connections.likes.total,release_time,stats.plays',
-                })
-            info = self._parse_config(self._download_json(
-                video['config_url'], video_id), video_id)
-            self._vimeo_sort_formats(info['formats'])
-            get_timestamp = lambda x: parse_iso8601(video.get(x + '_time'))
-            info.update({
-                'description': video.get('description'),
-                'license': video.get('license'),
-                'release_timestamp': get_timestamp('release'),
-                'timestamp': get_timestamp('created'),
-                'view_count': int_or_none(try_get(video, lambda x: x['stats']['plays'])),
-            })
-            connections = try_get(
-                video, lambda x: x['metadata']['connections'], dict) or {}
-            for k in ('comment', 'like'):
-                info[k + '_count'] = int_or_none(try_get(connections, lambda x: x[k + 's']['total']))
-            return info
+            return self._extract_from_api(video_id, unlisted_hash)

        orig_url = url
        is_pro = 'vimeopro.com/' in url
-        is_player = '://player.vimeo.com/video/' in url
        if is_pro:
            # some videos require portfolio_id to be present in player url
            # https://github.com/ytdl-org/youtube-dl/issues/20070
            url = self._extract_url(url, self._download_webpage(url, video_id))
            if not url:
                url = 'https://vimeo.com/' + video_id
-        elif is_player:
-            url = 'https://player.vimeo.com/video/' + video_id
        elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
            url = 'https://vimeo.com/' + video_id

@ -639,14 +644,25 @@ class VimeoIE(VimeoBaseInfoExtractor):
                        expected=True)
            raise

-        # Now we begin extracting as much information as we can from what we
-        # retrieved. First we extract the information common to all extractors,
-        # and latter we extract those that are Vimeo specific.
-        self.report_extraction(video_id)
+        if '//player.vimeo.com/video/' in url:
+            config = self._parse_json(self._search_regex(
+                r'\bconfig\s*=\s*({.+?})\s*;', webpage, 'info section'), video_id)
+            if config.get('view') == 4:
+                config = self._verify_player_video_password(
+                    redirect_url, video_id, headers)
+            info = self._parse_config(config, video_id)
+            self._vimeo_sort_formats(info['formats'])
+            return info
+
+        if re.search(r'<form[^>]+?id="pw_form"', webpage):
+            video_password = self._get_video_password()
+            token, vuid = self._extract_xsrft_and_vuid(webpage)
+            webpage = self._verify_video_password(
+                redirect_url, video_id, video_password, token, vuid)

        vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
        if vimeo_config:
-            seed_status = vimeo_config.get('seed_status', {})
+            seed_status = vimeo_config.get('seed_status') or {}
            if seed_status.get('state') == 'failed':
                raise ExtractorError(
                    '%s said: %s' % (self.IE_NAME, seed_status['title']),
@ -655,57 +671,31 @@ class VimeoIE(VimeoBaseInfoExtractor):
        cc_license = None
        timestamp = None
        video_description = None
+        info_dict = {}

-        # Extract the config JSON
-        try:
-            try:
-                config_url = self._html_search_regex(
-                    r' data-config-url="(.+?)"', webpage,
-                    'config URL', default=None)
-                if not config_url:
-                    # Sometimes new react-based page is served instead of old one that require
-                    # different config URL extraction approach (see
-                    # https://github.com/ytdl-org/youtube-dl/pull/7209)
-                    page_config = self._parse_json(self._search_regex(
-                        r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});',
-                        webpage, 'page config'), video_id)
-                    config_url = page_config['player']['config_url']
-                    cc_license = page_config.get('cc_license')
-                    timestamp = try_get(
-                        page_config, lambda x: x['clip']['uploaded_on'],
-                        compat_str)
-                    video_description = clean_html(dict_get(
-                        page_config, ('description', 'description_html_escaped')))
-                config = self._download_json(config_url, video_id)
-            except RegexNotFoundError:
-                # For pro videos or player.vimeo.com urls
-                # We try to find out to which variable is assigned the config dic
-                m_variable_name = re.search(r'(\w)\.video\.id', webpage)
-                if m_variable_name is not None:
-                    config_re = [r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))]
-                else:
-                    config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
-                config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;')
-                config_re.append(r'\bconfig\s*=\s*({.+?})\s*;')
-                config = self._search_regex(config_re, webpage, 'info section',
-                                            flags=re.DOTALL)
-                config = json.loads(config)
-        except Exception as e:
-            if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
-                raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
-
-            if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
-                if '_video_password_verified' in data:
-                    raise ExtractorError('video password verification failed!')
-                self._verify_video_password(redirect_url, video_id, webpage)
-                return self._real_extract(
-                    smuggle_url(redirect_url, {'_video_password_verified': 'verified'}))
-            else:
-                raise ExtractorError('Unable to extract info section',
-                                     cause=e)
+        channel_id = self._search_regex(
+            r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
+        if channel_id:
+            config_url = self._html_search_regex(
+                r'\bdata-config-url="([^"]+)"', webpage, 'config URL')
+            video_description = clean_html(get_element_by_class('description', webpage))
+            info_dict.update({
+                'channel_id': channel_id,
+                'channel_url': 'https://vimeo.com/channels/' + channel_id,
+            })
        else:
-            if config.get('view') == 4:
-                config = self._verify_player_video_password(redirect_url, video_id, headers)
+            page_config = self._parse_json(self._search_regex(
+                r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});',
+                webpage, 'page config', default='{}'), video_id, fatal=False)
+            if not page_config:
+                return self._extract_from_api(video_id)
+            config_url = page_config['player']['config_url']
+            cc_license = page_config.get('cc_license')
+            clip = page_config.get('clip') or {}
+            timestamp = clip.get('uploaded_on')
+            video_description = clean_html(
+                clip.get('description') or page_config.get('description_html_escaped'))
+        config = self._download_json(config_url, video_id)

        video = config.get('video') or {}
        vod = video.get('vod') or {}
@ -713,9 +703,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
        def is_rented():
            if '>You rented this title.<' in webpage:
                return True
-            if config.get('user', {}).get('purchased'):
+            if try_get(config, lambda x: x['user']['purchased']):
                return True
-            for purchase_option in vod.get('purchase_options', []):
+            for purchase_option in (vod.get('purchase_options') or []):
                if purchase_option.get('purchased'):
                    return True
                label = purchase_option.get('label_string')
@ -730,14 +720,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
                    'https://player.vimeo.com/player/%s' % feature_id,
                    {'force_feature_id': True}), 'Vimeo')

-        # Extract video description
-        if not video_description:
-            video_description = self._html_search_regex(
-                r'(?s)<div\s+class="[^"]*description[^"]*"[^>]*>(.*?)</div>',
-                webpage, 'description', default=None)
        if not video_description:
            video_description = self._html_search_meta(
-                'description', webpage, default=None)
+                ['description', 'og:description', 'twitter:description'],
+                webpage, default=None)
        if not video_description and is_pro:
            orig_webpage = self._download_webpage(
                orig_url, video_id,
@ -746,7 +732,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
            if orig_webpage:
                video_description = self._html_search_meta(
                    'description', orig_webpage, default=None)
-        if not video_description and not is_player:
+        if not video_description:
            self._downloader.report_warning('Cannot find video description')

        # Extract upload date
@ -755,16 +741,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
                r'<time[^>]+datetime="([^"]+)"', webpage,
                'timestamp', default=None)

-        try:
-            view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count'))
-            like_count = int(self._search_regex(r'UserLikes:(\d+)', webpage, 'like count'))
-            comment_count = int(self._search_regex(r'UserComments:(\d+)', webpage, 'comment count'))
-        except RegexNotFoundError:
-            # This info is only available in vimeo.com/{id} urls
-            view_count = None
-            like_count = None
-            comment_count = None
-
        formats = []

        source_format = self._extract_original_format(
@ -783,31 +759,20 @@ class VimeoIE(VimeoBaseInfoExtractor):
                r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
                webpage, 'license', default=None, group='license')

-        channel_id = self._search_regex(
-            r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
-        channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None
-
-        info_dict = {
+        info_dict.update({
            'formats': formats,
            'timestamp': unified_timestamp(timestamp),
            'description': video_description,
            'webpage_url': url,
-            'view_count': view_count,
-            'like_count': like_count,
-            'comment_count': comment_count,
            'license': cc_license,
-            'channel_id': channel_id,
-            'channel_url': channel_url,
-        }
+        })

-        info_dict = merge_dicts(info_dict, info_dict_config, json_ld)
-
-        return info_dict
+        return merge_dicts(info_dict, info_dict_config, json_ld)


 class VimeoOndemandIE(VimeoIE):
    IE_NAME = 'vimeo:ondemand'
-    _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/([^/]+/)?(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?:[^/]+/)?(?P<id>[^/?#&]+)'
    _TESTS = [{
        # ondemand video not available via https://vimeo.com/id
        'url': 'https://vimeo.com/ondemand/20704',
--- a/haruhi_dl/extractor/vk.py
+++ b/haruhi_dl/extractor/vk.py
@ -300,6 +300,13 @@ class VKIE(VKBaseIE):
            'only_matching': True,
        }]

+    @staticmethod
+    def _extract_sibnet_urls(webpage):
+        # https://help.sibnet.ru/?sibnet_video_embed
+        return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
+            r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.sibnet\.ru/shell\.php\?.*?\bvideoid=\d+.*?)\1',
+            webpage)]
+
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('videoid')
@ -408,6 +415,10 @@ class VKIE(VKBaseIE):
        if odnoklassniki_url:
            return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())

+        sibnet_urls = self._extract_sibnet_urls(info_page)
+        if sibnet_urls:
+            return self.url_result(sibnet_urls[0])
+
        m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
        if m_opts:
            m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
--- a/haruhi_dl/extractor/wppilot.py
+++ b/haruhi_dl/extractor/wppilot.py
@ -0,0 +1,232 @@
+# coding: utf-8
+
+from .common import InfoExtractor
+from ..utils import (
+    std_headers,
+    try_get,
+    ExtractorError,
+)
+
+import json
+import random
+import re
+
+
+class WPPilotBaseIE(InfoExtractor):
+    _NETRC_MACHINE = 'wppilot'
+    _LOGGED_IN = False
+
+    _VIDEO_URL = 'https://pilot.wp.pl/api/v1/channel/%s'
+    _VIDEO_GUEST_URL = 'https://pilot.wp.pl/api/v1/guest/channel/%s'
+    _VIDEO_LIST_URL = 'https://pilot.wp.pl/api/v1/channels/list'
+    _VIDEO_CLOSE_URL = 'https://pilot.wp.pl/api/v1/channels/close'
+    _LOGIN_URL = 'https://pilot.wp.pl/api/v1/user_auth/login'
+
+    _HEADERS_ATV = {
+        'User-Agent': 'ExoMedia 4.3.0 (43000) / Android 8.0.0 / foster_e',
+        'Accept': 'application/json',
+        'X-Version': 'pl.videostar|3.25.0|Android|26|foster_e',
+        'Content-Type': 'application/json; charset=UTF-8',
+    }
+    _HEADERS_WEB = {
+        'Content-Type': 'application/json; charset=UTF-8',
+        'Referer': 'https://pilot.wp.pl/tv/',
+    }
+    _STREAM_HEADERS_WEB = {
+        'Referer': 'https://pilot.wp.pl/',
+        'Origin': 'https://pilot.wp.pl',
+    }
+
+    def _real_initialize(self):
+        self._login()
+
+    def _login(self):
+        if any(cookie.domain == '.wp.pl' and cookie.name == 'netviapisessid'
+               for cookie in self._downloader.cookiejar):
+            # session exists, already logged in
+            self._LOGGED_IN = True
+            return None
+
+        username, password = self._get_login_info()
+        if not username:
+            return None
+
+        login = self._download_json(
+            self._LOGIN_URL, None, 'Logging in', 'Unable to log in',
+            headers=self._HEADERS_ATV,
+            data=bytes(json.dumps({
+                'device': 'android_tv',
+                'login': username,
+                'password': password,
+            }).encode('utf-8')))
+
+        error = try_get(login, lambda x: x['_meta']['error']['name'])
+        if error:
+            raise ExtractorError(f'WP login error: "{error}"')
+        self._LOGGED_IN = True
+
+    def _get_channel_list(self, cache=True):
+        if cache is True:
+            cache_res = self._downloader.cache.load('wppilot', 'channel-list')
+            if cache_res:
+                cache_res['_hdl_cached'] = True
+                return cache_res
+        res = self._download_json(
+            self._VIDEO_LIST_URL, None, 'Downloading channel list')
+        self._downloader.cache.store('wppilot', 'channel-list', res)
+        return res
+
+    def _parse_channel(self, chan, categories):
+        thumbnails = []
+        for key in ('thumbnail', 'thumbnail_mobile', 'thumbnail_mobile_bg', 'icon'):
+            if chan.get(key):
+                thumbnails.append({
+                    'id': key,
+                    'url': chan[key],
+                })
+        return {
+            'id': str(chan['id']),
+            'title': chan['name'],
+            'categories': [categories[str(i)] for i in chan['categories']],
+        }
+
+
+class WPPilotIE(WPPilotBaseIE):
+    _VALID_URL = r'(?:https?://pilot\.wp\.pl/tv/?#|wppilot:)(?P<id>[a-z\d-]+)'
+    IE_NAME = 'wppilot'
+
+    _TESTS = [{
+        'url': 'https://pilot.wp.pl/tv/#telewizja-wp-hd',
+        'info_dict': {
+            'id': '158',
+            'ext': 'm3u8',
+            'title': 'Telewizja WP HD',
+        },
+        'params': {
+            'format': 'bestvideo',
+        },
+    }, {
+        # audio only
+        'url': 'https://pilot.wp.pl/tv/#radio-nowy-swiat',
+        'info_dict': {
+            'id': '238',
+            'ext': 'm3u8',
+            'title': 'Radio Nowy Świat',
+        },
+        'params': {
+            'format': 'bestaudio',
+        },
+    }, {
+        'url': 'wppilot:9',
+        'only_matching': True,
+    }]
+
+    def _get_channel(self, id_or_slug):
+        video_list = self._get_channel_list(cache=True)
+        key = 'id' if re.match(r'^\d+$', id_or_slug) else 'slug'
+        for video in video_list['data']:
+            if video.get(key) == id_or_slug:
+                return self._parse_channel(video, video_list['_meta']['categories'])
+        # if cached channel not found, download and retry
+        if video_list.get('_hdl_cached') is True:
+            video_list = self._get_channel_list(cache=False)
+            for video in video_list['data']:
+                if video.get(key) == id_or_slug:
+                    return self._parse_channel(video, video_list['_meta']['categories'])
+        raise ExtractorError('Channel not found')
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        channel = self._get_channel(video_id)
+        video_id = str(channel['id'])
+        if self._LOGGED_IN:
+            video = self._download_json(
+                self._VIDEO_URL % video_id, video_id, query={
+                    'format_id': '2',
+                    'device_type': 'android',
+                }, headers=self._HEADERS_ATV, expected_status=(200, 422))
+        else:
+            video = self._download_json(
+                self._VIDEO_GUEST_URL % video_id, video_id, query={
+                    'device_type': 'web',
+                }, headers=self._HEADERS_WEB, expected_status=(200))
+
+        stream_token = try_get(video, lambda x: x['_meta']['error']['info']['stream_token'])
+        if stream_token:
+            close = self._download_json(
+                self._VIDEO_CLOSE_URL, video_id, 'Invalidating previous stream session',
+                headers=self._HEADERS_ATV,
+                data=bytes(json.dumps({
+                    'channelId': video_id,
+                    't': stream_token,
+                }).encode('utf-8')))
+            if try_get(close, lambda x: x['data']['status']) == 'ok':
+                return self.url_result('wppilot:%s' % video_id, ie=WPPilotIE.ie_key())
+
+        error = try_get(video, lambda x: x['_meta']['error'])
+        if error:
+            raise ExtractorError(f"WP said: \"{error['name']}\" ({error['code']})")
+
+        formats = []
+        stream_headers = {}
+        if self._LOGGED_IN:
+            ua = self._HEADERS_ATV['User-Agent']
+        else:
+            ua = std_headers['User-Agent']
+        stream_headers['User-Agent'] = ua
+
+        for fmt in video['data']['stream_channel']['streams']:
+            # MPD does not work for some reason
+            # if fmt['type'] == 'dash@live:abr':
+            #     formats.extend(
+            #         self._extract_mpd_formats(
+            #             random.choice(fmt['url']), video_id))
+            if fmt['type'] == 'hls@live:abr':
+                formats.extend(
+                    self._extract_m3u8_formats(
+                        random.choice(fmt['url']),
+                        video_id, headers=stream_headers))
+        for i in range(len(formats)):
+            formats[i]['http_headers'] = stream_headers
+
+        self._sort_formats(formats)
+
+        channel['formats'] = formats
+        return channel
+
+
+class WPPilotChannelsIE(WPPilotBaseIE):
+    _VALID_URL = r'(?:https?://pilot\.wp\.pl/(?:tv/?)?(?:\?[^#]*)?#?|wppilot:)$'
+    IE_NAME = 'wppilot:channels'
+
+    _TESTS = [{
+        'url': 'wppilot:',
+        'info_dict': {
+            'id': 'wppilot',
+            'title': 'WP Pilot',
+        },
+        'playlist_mincount': 100,
+    }, {
+        'url': 'https://pilot.wp.pl/',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        channel_list = self._get_channel_list()
+        categories = channel_list['_meta']['categories']
+        entries = []
+        for chan in channel_list['data']:
+            entry = self._parse_channel(chan, categories)
+            entry.update({
+                '_type': 'url_transparent',
+                'url': f'wppilot:{chan["id"]}',
+                'ie_key': WPPilotIE.ie_key(),
+            })
+            entries.append(entry)
+        return {
+            '_type': 'playlist',
+            'id': 'wppilot',
+            'entries': entries,
+            'title': 'WP Pilot',
+        }
--- a/haruhi_dl/extractor/xfileshare.py
+++ b/haruhi_dl/extractor/xfileshare.py
@ -58,6 +58,7 @@ class XFileShareIE(InfoExtractor):
        (r'vidlocker\.xyz', 'VidLocker'),
        (r'vidshare\.tv', 'VidShare'),
        (r'vup\.to', 'VUp'),
+        (r'wolfstream\.tv', 'WolfStream'),
        (r'xvideosharing\.com', 'XVideoSharing'),
    )

@ -82,6 +83,9 @@ class XFileShareIE(InfoExtractor):
    }, {
        'url': 'https://aparat.cam/n4d6dh0wvlpr',
        'only_matching': True,
+    }, {
+        'url': 'https://wolfstream.tv/nthme29v9u2x',
+        'only_matching': True,
    }]

    @staticmethod
--- a/haruhi_dl/extractor/xtube.py
+++ b/haruhi_dl/extractor/xtube.py
@ -11,6 +11,7 @@ from ..utils import (
    parse_duration,
    sanitized_Request,
    str_to_int,
+    url_or_none,
 )


@ -87,10 +88,10 @@ class XTubeIE(InfoExtractor):
                'Cookie': 'age_verified=1; cookiesAccepted=1',
            })

-        title, thumbnail, duration = [None] * 3
+        title, thumbnail, duration, sources, media_definition = [None] * 5

        config = self._parse_json(self._search_regex(
-            r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf)', webpage, 'config',
+            r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf|playerWrapper)', webpage, 'config',
            default='{}'), video_id, transform_source=js_to_json, fatal=False)
        if config:
            config = config.get('mainRoll')
@ -99,20 +100,52 @@ class XTubeIE(InfoExtractor):
                thumbnail = config.get('poster')
                duration = int_or_none(config.get('duration'))
                sources = config.get('sources') or config.get('format')
+                media_definition = config.get('mediaDefinition')

-        if not isinstance(sources, dict):
+        if not isinstance(sources, dict) and not media_definition:
            sources = self._parse_json(self._search_regex(
                r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
                webpage, 'sources', group='sources'), video_id,
                transform_source=js_to_json)

        formats = []
-        for format_id, format_url in sources.items():
-            formats.append({
-                'url': format_url,
-                'format_id': format_id,
-                'height': int_or_none(format_id),
-            })
+        format_urls = set()
+
+        if isinstance(sources, dict):
+            for format_id, format_url in sources.items():
+                format_url = url_or_none(format_url)
+                if not format_url:
+                    continue
+                if format_url in format_urls:
+                    continue
+                format_urls.add(format_url)
+                formats.append({
+                    'url': format_url,
+                    'format_id': format_id,
+                    'height': int_or_none(format_id),
+                })
+
+        if isinstance(media_definition, list):
+            for media in media_definition:
+                video_url = url_or_none(media.get('videoUrl'))
+                if not video_url:
+                    continue
+                if video_url in format_urls:
+                    continue
+                format_urls.add(video_url)
+                format_id = media.get('format')
+                if format_id == 'hls':
+                    formats.extend(self._extract_m3u8_formats(
+                        video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                        m3u8_id='hls', fatal=False))
+                elif format_id == 'mp4':
+                    height = int_or_none(media.get('quality'))
+                    formats.append({
+                        'url': video_url,
+                        'format_id': '%s-%d' % (format_id, height) if height else format_id,
+                        'height': height,
+                    })
+
        self._remove_duplicate_formats(formats)
        self._sort_formats(formats)

--- a/haruhi_dl/extractor/youku.py
+++ b/haruhi_dl/extractor/youku.py
@ -154,7 +154,7 @@ class YoukuIE(InfoExtractor):
        # request basic data
        basic_data_params = {
            'vid': video_id,
-            'ccode': '0590',
+            'ccode': '0532',
            'client_ip': '192.168.1.1',
            'utid': cna,
            'client_ts': time.time() / 1000,
--- a/haruhi_dl/extractor/youporn.py
+++ b/haruhi_dl/extractor/youporn.py
@ -4,13 +4,12 @@ import re

 from .common import InfoExtractor
 from ..utils import (
+    extract_attributes,
    int_or_none,
    str_to_int,
-    unescapeHTML,
    unified_strdate,
    url_or_none,
 )
-from ..aes import aes_decrypt_text


 class YouPornIE(InfoExtractor):
@ -34,6 +33,7 @@ class YouPornIE(InfoExtractor):
            'tags': list,
            'age_limit': 18,
        },
+        'skip': 'This video has been disabled',
    }, {
        # Unknown uploader
        'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
@ -78,6 +78,40 @@ class YouPornIE(InfoExtractor):
        video_id = mobj.group('id')
        display_id = mobj.group('display_id') or video_id

+        definitions = self._download_json(
+            'https://www.youporn.com/api/video/media_definitions/%s/' % video_id,
+            display_id)
+
+        formats = []
+        for definition in definitions:
+            if not isinstance(definition, dict):
+                continue
+            video_url = url_or_none(definition.get('videoUrl'))
+            if not video_url:
+                continue
+            f = {
+                'url': video_url,
+                'filesize': int_or_none(definition.get('videoSize')),
+            }
+            height = int_or_none(definition.get('quality'))
+            # Video URL's path looks like this:
+            #  /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
+            #  /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
+            #  /videos/201703/11/109285532/1080P_4000K_109285532.mp4
+            # We will benefit from it by extracting some metadata
+            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
+            if mobj:
+                if not height:
+                    height = int(mobj.group('height'))
+                bitrate = int(mobj.group('bitrate'))
+                f.update({
+                    'format_id': '%dp-%dk' % (height, bitrate),
+                    'tbr': bitrate,
+                })
+            f['height'] = height
+            formats.append(f)
+        self._sort_formats(formats)
+
        webpage = self._download_webpage(
            'http://www.youporn.com/watch/%s' % video_id, display_id,
            headers={'Cookie': 'age_verified=1'})
@ -88,65 +122,6 @@ class YouPornIE(InfoExtractor):
            webpage, default=None) or self._html_search_meta(
            'title', webpage, fatal=True)

-        links = []
-
-        # Main source
-        definitions = self._parse_json(
-            self._search_regex(
-                r'mediaDefinition\s*[=:]\s*(\[.+?\])\s*[;,]', webpage,
-                'media definitions', default='[]'),
-            video_id, fatal=False)
-        if definitions:
-            for definition in definitions:
-                if not isinstance(definition, dict):
-                    continue
-                video_url = url_or_none(definition.get('videoUrl'))
-                if video_url:
-                    links.append(video_url)
-
-        # Fallback #1, this also contains extra low quality 180p format
-        for _, link in re.findall(r'<a[^>]+href=(["\'])(http(?:(?!\1).)+\.mp4(?:(?!\1).)*)\1[^>]+title=["\']Download [Vv]ideo', webpage):
-            links.append(link)
-
-        # Fallback #2 (unavailable as at 22.06.2017)
-        sources = self._search_regex(
-            r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None)
-        if sources:
-            for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
-                links.append(link)
-
-        # Fallback #3 (unavailable as at 22.06.2017)
-        for _, link in re.findall(
-                r'(?:videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage):
-            links.append(link)
-
-        # Fallback #4, encrypted links (unavailable as at 22.06.2017)
-        for _, encrypted_link in re.findall(
-                r'encryptedQuality\d{3,4}URL\s*=\s*(["\'])([\da-zA-Z+/=]+)\1', webpage):
-            links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8'))
-
-        formats = []
-        for video_url in set(unescapeHTML(link) for link in links):
-            f = {
-                'url': video_url,
-            }
-            # Video URL's path looks like this:
-            #  /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
-            #  /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
-            #  /videos/201703/11/109285532/1080P_4000K_109285532.mp4
-            # We will benefit from it by extracting some metadata
-            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
-            if mobj:
-                height = int(mobj.group('height'))
-                bitrate = int(mobj.group('bitrate'))
-                f.update({
-                    'format_id': '%dp-%dk' % (height, bitrate),
-                    'height': height,
-                    'tbr': bitrate,
-                })
-            formats.append(f)
-        self._sort_formats(formats)
-
        description = self._html_search_regex(
            r'(?s)<div[^>]+\bid=["\']description["\'][^>]*>(.+?)</div>',
            webpage, 'description',
@ -169,13 +144,12 @@ class YouPornIE(InfoExtractor):

        age_limit = self._rta_search(webpage)

-        average_rating = int_or_none(self._search_regex(
-            r'<div[^>]+class=["\']videoRatingPercentage["\'][^>]*>(\d+)%</div>',
-            webpage, 'average rating', fatal=False))
-
-        view_count = str_to_int(self._search_regex(
-            r'(?s)<div[^>]+class=(["\']).*?\bvideoInfoViews\b.*?\1[^>]*>.*?(?P<count>[\d,.]+)<',
-            webpage, 'view count', fatal=False, group='count'))
+        view_count = None
+        views = self._search_regex(
+            r'(<div[^>]+\bclass=["\']js_videoInfoViews["\']>)', webpage,
+            'views', default=None)
+        if views:
+            view_count = str_to_int(extract_attributes(views).get('data-value'))
        comment_count = str_to_int(self._search_regex(
            r'>All [Cc]omments? \(([\d,.]+)\)',
            webpage, 'comment count', default=None))
@ -201,7 +175,6 @@ class YouPornIE(InfoExtractor):
            'duration': duration,
            'uploader': uploader,
            'upload_date': upload_date,
-            'average_rating': average_rating,
            'view_count': view_count,
            'comment_count': comment_count,
            'categories': categories,
--- a/haruhi_dl/extractor/youtube.py
+++ b/haruhi_dl/extractor/youtube.py
@ -4,6 +4,7 @@ from __future__ import unicode_literals
 from datetime import datetime
 import json
 import hashlib
+from inspect import getsource
 import random
 import re
 import time
@ -28,7 +29,6 @@ from ..utils import (
    float_or_none,
    get_element_by_id,
    int_or_none,
-    list_geoblocked_countres,
    mimetype2ext,
    parse_codecs,
    parse_duration,
@ -44,7 +44,12 @@ from ..utils import (
    uppercase_escape,
    url_or_none,
    urlencode_postdata,
+    GeoRestrictedError,
 )
+try:
+    from ..extractor_artifacts.youtube import _decrypt_signature_protected
+except ImportError:
+    _decrypt_signature_protected = None


 class YoutubeBaseInfoExtractor(InfoExtractor):
@ -567,24 +572,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'format': '141/bestaudio[ext=m4a]',
            },
        },
-        # JS player signature function name containing $
-        {
-            'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
-            'info_dict': {
-                'id': 'nfWlot6h_JM',
-                'ext': 'm4a',
-                'title': 'Taylor Swift - Shake It Off',
-                'description': 'md5:9dc0bd58efe700594b54f7d82bed0bac',
-                'duration': 242,
-                'uploader': 'TaylorSwiftVEVO',
-                'uploader_id': 'TaylorSwiftVEVO',
-                'upload_date': '20140818',
-            },
-            'params': {
-                'youtube_include_dash_manifest': True,
-                'format': '141/bestaudio[ext=m4a]',
-            },
-        },
        # Normal age-gate video (No vevo, embed allowed)
        {
            'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
@ -636,24 +623,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'DASH manifest missing',
            ]
        },
-        # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
-        {
-            'url': 'lqQg6PlCWgI',
-            'info_dict': {
-                'id': 'lqQg6PlCWgI',
-                'ext': 'mp4',
-                'duration': 6085,
-                'upload_date': '20150827',
-                'uploader_id': 'olympic',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
-                'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
-                'uploader': 'Olympic',
-                'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
-            },
-            'params': {
-                'skip_download': 'requires avconv',
-            }
-        },
        # Non-square pixels
        {
            'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
@ -879,26 +848,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            'url': 'https://www.youtubekids.com/watch?v=BnC-cpUCdns',
            'only_matching': True,
        },
-        {
-            # invalid -> valid video id redirection
-            'url': 'DJztXj2GPfl',
-            'info_dict': {
-                'id': 'DJztXj2GPfk',
-                'ext': 'mp4',
-                'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
-                'description': 'md5:bf577a41da97918e94fa9798d9228825',
-                'upload_date': '20090125',
-                'uploader': 'Prochorowka',
-                'uploader_id': 'Prochorowka',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
-                'artist': 'Panjabi MC',
-                'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
-                'album': 'Beware of the Boys (Mundian To Bach Ke)',
-            },
-            'params': {
-                'skip_download': True,
-            },
-        },
        {
            # empty description results in an empty string
            'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
@ -919,6 +868,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            'url': 'https://youtube.com/shorts/7awd-y_DTQY',
            'only_matching': True,
        },
+        {
+            'url': 'https://www.youtube.com/video/2NDLF-k2PwA',
+            'only_matching': True,
+        }
    ]

    _VALID_SIG_VALUE_RE = r'^AO[a-zA-Z0-9_-]+=*$'
@ -953,7 +906,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            raise ExtractorError('Cannot identify player %r' % player_url)
        return id_m.group('id')

-    def _extract_signature_function(self, video_id, player_url, example_sig):
+    def _extract_signature_function(self, video_id, player_url):
        player_id = self._extract_player_info(player_url)

        # Read from filesystem cache
@ -1064,33 +1017,45 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                '    return %s\n') % (signature_id_tuple, expr_code)
        self.to_screen('Extracted signature function:\n' + code)

-    def mess(self, a, b):
+    @staticmethod
+    def mess(a, b):
        c = a[0]
        a[0] = a[b % len(a)]
        a[b % len(a)] = c
        return a

-    def _decrypt_signature_protected(self, s):
-        a = list(s)
-        a = a[3:]
-        a = self.mess(a, 63)
-        a = self.mess(a, 20)
-        a = a[1:]
-        a.reverse()
-        a = self.mess(a, 44)
-        a = a[1:]
-        a.reverse()
-        return "".join(a)
-
    def _full_signature_handling(self, sig, player_url, video_id):
-        signature = self._decrypt_signature_protected(sig)
-        if re.match(self._VALID_SIG_VALUE_RE, signature):
-            return signature
+        if _decrypt_signature_protected:
+            signature = _decrypt_signature_protected(sig)
+            if re.match(self._VALID_SIG_VALUE_RE, signature):
+                return signature
        if self._downloader.params.get('verbose'):
            self.to_screen("Built-in signature decryption failed, trying dynamic")
-        sig_decrypt_stack = self._extract_signature_function(video_id, player_url, sig)
+        sig_decrypt_stack = self._extract_signature_function(video_id, player_url)
        return self._do_decrypt_signature(sig, sig_decrypt_stack)

+    def _generate_prerelease_file(self):
+        # It's Monday, so I'm in a bad mood, but at least my sailor uniform is super cute!
+        video_id = 'ieQ1rAIjzXc'
+        self._set_consent()
+        webpage = self._download_webpage('https://www.youtube.com/watch?v=%s' % video_id, video_id)
+        player_url = self._search_regex(r'"jsUrl":"(/s/player/.*?/player_ias.vflset/.*?/base.js)', webpage, 'player url')
+        sig_decrypt_stack = self._extract_signature_function(video_id, player_url)
+        func = re.sub(r'(?m)^    ', '', getsource(self.mess).replace('@staticmethod', ''))
+        func += '\n\ndef _decrypt_signature_protected(sig):\n'
+        stack = ['a = list(sig)']
+        for fun in sig_decrypt_stack:
+            if fun[0] == 'splice':
+                stack.append(f'a = a[{fun[1]}:]')
+            elif fun[0] == 'reverse':
+                stack.append('a.reverse()')
+            elif fun[0] == 'mess':
+                stack.append(f'a = mess(a, {fun[1]})')
+            else:
+                raise ExtractorError('Unknown stack action: %s' % (fun[0]))
+        stack.append("return ''.join(a)")
+        return func + '\n'.join(map(lambda x: ' ' * 4 + x, stack)) + '\n'
+
    def _get_subtitles(self, video_id, webpage):
        try:
            subs_doc = self._download_xml(
@ -1132,7 +1097,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            # to be implemented in future that will replace this workaround (see
            # https://github.com/ytdl-org/youtube-dl/issues/7468,
            # https://github.com/ytdl-org/youtube-dl/pull/7599)
-            r';ytplayer\.config\s*=\s*({.+?});ytplayer',
+            r';ytplayer\.config\s*=\s*({.+?});\s*ytplayer',
            r';ytplayer\.config\s*=\s*({.+?});',
        )
        config = self._search_regex(
@ -1473,34 +1438,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor):

        # Get video info
        video_info = {}
-        embed_webpage = None
        if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
                or re.search(r'player-age-gate-content">', video_webpage) is not None):
            age_gate = True
-            # We simulate the access to the video from www.youtube.com/v/{video_id}
-            # this can be viewed without login into Youtube
-            url = proto + '://www.youtube.com/embed/%s' % video_id
-            embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
-            data = compat_urllib_parse_urlencode({
-                'video_id': video_id,
-                'eurl': 'https://youtube.googleapis.com/v/' + video_id,
-                #                'sts': self._search_regex(
-                #                   r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
-            })
-            video_info_url = proto + '://www.youtube.com/get_video_info?' + data
            try:
-                video_info_webpage = self._download_webpage(
-                    video_info_url, video_id,
-                    note='Refetching age-gated info webpage',
-                    errnote='unable to download video info webpage')
+                yti1_player = self._download_webpage(
+                    proto + '://www.youtube.com/youtubei/v1/player', video_id,
+                    headers={
+                        'User-Agent': 'Mozilla/5.0 (SMART-TV; Linux; Tizen 4.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.0 Safari/537.36',
+                        'Content-Type': 'application/json',
+                        'X-Goog-Api-Key': self._YOUTUBE_API_KEY,
+                    },
+                    data=bytes(json.dumps({
+                        'context': {
+                            'client': {
+                                'clientName': 'WEB',
+                                'clientVersion': '2.20210721.00.00',
+                                'clientScreen': 'EMBED',
+                            },
+                        },
+                        'videoId': video_id,
+                    }).encode('utf-8')),
+                    note='Downloading age-gated player info',
+                    errnote='unable to download video info')
            except ExtractorError:
-                video_info_webpage = None
-            if video_info_webpage:
-                video_info = compat_parse_qs(video_info_webpage)
-                pl_response = video_info.get('player_response', [None])[0]
-                player_response = extract_player_response(pl_response, video_id)
+                yti1_player = None
+            if yti1_player:
+                player_response = extract_player_response(yti1_player, video_id)
                add_dash_mpd(video_info)
-                view_count = extract_view_count(video_info)
+                view_count = extract_view_count(video_id)
        else:
            age_gate = False
            # Try looking directly into the video webpage
@ -1523,9 +1489,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                    player_response = extract_player_response(args.get('player_response'), video_id)
            if not player_response:
                player_response = extract_player_response(
-                    self._search_regex(
+                    self._search_regex((
+                        # js-like syntax
+                        r'(?:window(?:\["|\.)|var )ytInitialPlayerResponse(?:"])?\s*=\s*({.+?(?!\\)});(?:if \(ytcsi|var [a-zA-Z\_])',
                        r'(?:window(?:\["|\.)|var )ytInitialPlayerResponse(?:"])?\s*=\s*({.+?(?!\\)});',
-                        video_webpage, 'ytInitialPlayerResponse', fatal=False), video_id)
+                    ), video_webpage, 'ytInitialPlayerResponse', fatal=False), video_id)
            if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
                add_dash_mpd_pr(player_response)

@ -1723,24 +1691,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                        ASSETS_RE = r'"jsUrl":"(/s/player/.*?/player_ias.vflset/.*?/base.js)'

                        player_url = self._search_regex(
-                            ASSETS_RE,
-                            embed_webpage if age_gate else video_webpage, '', default=player_url)
+                            ASSETS_RE, video_webpage, '', default=player_url)

                        if not player_url and not age_gate:
                            # We need the embed website after all
-                            if embed_webpage is None:
-                                embed_url = proto + '://www.youtube.com/embed/%s' % video_id
-                                embed_webpage = self._download_webpage(
-                                    embed_url, video_id, 'Downloading embed webpage')
+                            embed_url = proto + '://www.youtube.com/embed/%s' % video_id
+                            embed_webpage = self._download_webpage(
+                                embed_url, video_id, 'Downloading embed webpage')
                            player_url = self._search_regex(
                                ASSETS_RE, embed_webpage, 'JS player URL')

-                        # if player_url is None:
-                        #    player_url_json = self._search_regex(
-                        #        r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
-                        #        video_webpage, 'age gate player URL')
-                        #    player_url = json.loads(player_url_json)
-
                    if 'sig' in url_data:
                        url += '&signature=' + url_data['sig'][0]
                    elif 's' in url_data:
@ -1872,12 +1832,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                             or ', who has blocked it on copyright grounds' in error_desc
                             or 'It is not available in your country.' in error_desc
                             or ', who has blocked it in your country on copyright grounds.' in error_desc):
-                        raise ExtractorError(
-                            list_geoblocked_countres(
-                                self._search_regex(
-                                    r'<meta itemprop="regionsAllowed" content="((?:(?:[A-Z]{2},)*[A-Z]{2})?)">',
-                                    video_webpage, 'allowed region list').split(',')),
-                            expected=True)
+                        raise GeoRestrictedError(
+                            error_desc,
+                            countries=self._search_regex(
+                                r'<meta itemprop="regionsAllowed" content="((?:(?:[A-Z]{2},)*[A-Z]{2})?)">',
+                                video_webpage, 'allowed region list').split(','))
                    if error_desc and 'Playback on other websites has been disabled' in error_desc:
                        raise ExtractorError(
                            'Embeds disabled for this video, account (with passed credit card or photo ID check, if in EU/EEA/CH/UK) is required',
@ -2286,8 +2245,9 @@ class YoutubeBaseListInfoExtractor(YoutubeBaseInfoExtractor):
        webpage = self._download_webpage(url, list_id,
                                         note='Downloading %s page #1 (webpage)' % (self._LIST_NAME))
        return self._parse_json(
-            self._search_regex(
-                r'(?:window(?:\["|\.)|var )ytInitialData(?:"])?\s*=\s*({.+});',
+            self._search_regex((
+                r'(?:window(?:\["|\.)|var )ytInitialData(?:"])?\s*=\s*({.+});</script>',
+                r'(?:window(?:\["|\.)|var )ytInitialData(?:"])?\s*=\s*({.+});'),
                webpage, 'initial data JSON'), 'initial data JSON'), webpage

    def _real_extract(self, url, results=None, query=None):
@ -2356,15 +2316,6 @@ class YoutubeBaseListInfoExtractor(YoutubeBaseInfoExtractor):
        return info_dict


-class YoutubeAjaxListInfoExtractor(YoutubeBaseListInfoExtractor):
-    def _download_continuation(self, continuation, list_id, page_no, session_id=None):
-        return self._download_json('https://www.youtube.com/browse_ajax', list_id,
-                                   note='Downloading %s page #%d (ajax)' % (self._LIST_NAME, page_no),
-                                   headers=self._YOUTUBE_CLIENT_HEADERS, query={
-                                       'continuation': continuation,
-                                   })
-
-
 class YoutubeYti1ListInfoExtractor(YoutubeBaseListInfoExtractor):
    # /youtubei/v1/[action]
    _ACTION_URL = 'https://www.youtube.com/youtubei/v1/%s?key=%s' % ('%s', YoutubeBaseInfoExtractor._YOUTUBE_API_KEY)
@ -2403,7 +2354,7 @@ class YoutubeYti1ListInfoExtractor(YoutubeBaseListInfoExtractor):
                                   data=bytes(json.dumps(data), encoding='utf-8'))


-class YoutubeChannelIE(YoutubeAjaxListInfoExtractor):
+class YoutubeChannelIE(YoutubeYti1ListInfoExtractor):
    IE_NAME = 'youtube:channel'
    _VALID_URL = r'https?://(?:www\.|music\.)?youtube\.com/(?P<type>user|channel|c)/(?P<id>[\w-]+)(?!/live)'
    _LIST_NAME = 'channel'
@ -2442,18 +2393,18 @@ class YoutubeChannelIE(YoutubeAjaxListInfoExtractor):
    def _parse_init_video_list(self, data):
        grid_renderer = try_get(data, [
            # initial
-            lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][1]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['gridRenderer'],
-            # continuation ajax
-            lambda x: x[1]['response']['continuationContents']['gridContinuation'],
+            lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][1]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['gridRenderer']['items'],
+            # continuation yti1
+            lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'],
        ])
        if not grid_renderer:
            raise ExtractorError('Could not extract gridRenderer')
        return {
            'entries': [self._parse_video(item, entry_key='gridVideoRenderer', full_data=data)
-                        for item in grid_renderer['items']],
-            'continuation': try_get(grid_renderer,
-                                    lambda x: x['continuations'][0]['nextContinuationData']['continuation'],
-                                    expected_type=compat_str),
+                        for item in grid_renderer],
+            'continuation': try_get(grid_renderer, [
+                lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
+            ], expected_type=str),
            'info_dict': {
                'title': try_get(data, lambda x: x['header']['c4TabbedHeaderRenderer']['title'], expected_type=compat_str),
            },
@ -2509,7 +2460,7 @@ class YoutubePlaylistIE(YoutubeYti1ListInfoExtractor):
            'id': 'PLCjDnXEsxzUTkHuSM5KCTgaUCR4yUySq8',
            'title': 'coolstuff',
        },
-        'playlist_mincount': 58,
+        'playlist_mincount': 57,
    }, {
        # a lot of pages, good for checking continuity
        'url': 'https://www.youtube.com/playlist?list=PLv3TTBr1W_9tppikBxAE_G6qjWdBljBHJ',
--- a/haruhi_dl/options.py
+++ b/haruhi_dl/options.py
@ -401,6 +401,13 @@ def parseOpts(overrideArguments=None):
        action='store_true', dest='ap_list_mso', default=False,
        help='List all supported multiple-system operators')

+    selfhosted_ie = optparse.OptionGroup(parser, 'Selfhosted IE Options')
+    selfhosted_ie.add_option(
+        '--force-use-mastodon',
+        action='store_true', dest='force_use_mastodon', default=False,
+        help='Force use the Mastodon extractor (to get follower-only/direct posts, '
+        'or circuvement PeerTube censorship via Pleroma; both require logging in)')
+
    video_format = optparse.OptionGroup(parser, 'Video Format Options')
    video_format.add_option(
        '-f', '--format',
@ -597,6 +604,10 @@ def parseOpts(overrideArguments=None):
            'Upper bound of a range for randomized sleep before each download '
            '(maximum possible number of seconds to sleep). Must only be used '
            'along with --min-sleep-interval.'))
+    workarounds.add_option(
+        '--force-playwright-browser', dest='force_playwright_browser',
+        help=('Force use a selected browser with extractors using Playwright.'
+              'Must be one of: firefox, chromium, webkit.'))

    verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
    verbosity.add_option(
@ -702,7 +713,7 @@ def parseOpts(overrideArguments=None):
        help='Do NOT contact the haruhi-dl server for debugging')
    verbosity.add_option(
        '--no-headless-playwright',
-        dest='headless_playwright', action='store_false', default=False,
+        dest='headless_playwright', action='store_false', default=True,
        help='Show browsers automated with Playwright on the screen')

    filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
@ -797,7 +808,7 @@ def parseOpts(overrideArguments=None):
        action='store_true', dest='rm_cachedir',
        help='Delete all filesystem cache files')

-    thumbnail = optparse.OptionGroup(parser, 'Thumbnail images')
+    thumbnail = optparse.OptionGroup(parser, 'Thumbnail Options')
    thumbnail.add_option(
        '--write-thumbnail',
        action='store_true', dest='writethumbnail', default=False,
@ -904,6 +915,7 @@ def parseOpts(overrideArguments=None):
    parser.add_option_group(video_format)
    parser.add_option_group(subtitles)
    parser.add_option_group(authentication)
+    parser.add_option_group(selfhosted_ie)
    parser.add_option_group(adobe_pass)
    parser.add_option_group(postproc)

--- a/haruhi_dl/playwright.py
+++ b/haruhi_dl/playwright.py
@ -1,7 +1,5 @@
 # coding: utf-8
-from __future__ import unicode_literals
-
-from .compat import compat_cookiejar_Cookie
+from http.cookiejar import Cookie
 from .utils import (
    ExtractorError,
    is_outdated_version,
@ -87,15 +85,17 @@ class PlaywrightHelper():
    def _set_cookies_from_browser(self, cookies):
        for cookie in cookies:
            self._extractor._downloader.cookiejar.set_cookie(
-                compat_cookiejar_Cookie(0, cookie['name'], cookie['value'], cookie.get('port'), False,
-                                        cookie['domain'], False, cookie['domain'].startswith('.'),
-                                        cookie['path'], cookie['path'] != '/',
-                                        cookie['secure'], cookie['expires'],
-                                        False, None, None, None))
+                Cookie(0, cookie['name'], cookie['value'], cookie.get('port'), False,
+                       cookie['domain'], False, cookie['domain'].startswith('.'),
+                       cookie['path'], cookie['path'] != '/',
+                       cookie['secure'], cookie['expires'],
+                       False, None, None, None))

    def open_page(self, url, display_id, browser_used='firefox', note='Opening page in %(browser)s', html=None):
        pw = self.pw()
        self.pw_instance = pw
+        if self._extractor._downloader.params.get('force_playwright_browser') is not None:
+            browser_used = self._extractor._downloader.params.get('force_playwright_browser')
        browser = {
            'firefox': pw.firefox,
            'chromium': pw.chromium,
--- a/haruhi_dl/postprocessor/ffmpeg.py
+++ b/haruhi_dl/postprocessor/ffmpeg.py
@ -231,7 +231,10 @@ class FFmpegPostProcessor(PostProcessor):
        stdout, stderr = p.communicate()
        if p.returncode != 0:
            stderr = stderr.decode('utf-8', 'replace')
-            msg = stderr.strip().split('\n')[-1]
+            msgs = stderr.strip().split('\n')
+            msg = msgs[-1]
+            if self._downloader.params.get('verbose', False):
+                self._downloader.to_screen('[debug] ' + '\n'.join(msgs[:-1]))
            raise FFmpegPostProcessorError(msg)
        self.try_utime(out_path, oldest_mtime, oldest_mtime)

--- a/haruhi_dl/utils.py
+++ b/haruhi_dl/utils.py
@ -39,6 +39,7 @@ import zlib
 from .compat import (
    compat_HTMLParseError,
    compat_HTMLParser,
+    compat_HTTPError,
    compat_basestring,
    compat_chr,
    compat_cookiejar,
@ -2881,12 +2882,61 @@ class HaruhiDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):


 class HaruhiDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
-    if sys.version_info[0] < 3:
-        def redirect_request(self, req, fp, code, msg, headers, newurl):
-            # On python 2 urlh.geturl() may sometimes return redirect URL
-            # as byte string instead of unicode. This workaround allows
-            # to force it always return unicode.
-            return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
+    """HaruhiDL redirect handler
+
+    The code is based on HTTPRedirectHandler implementation from CPython [1].
+
+    This redirect handler solves two issues:
+     - ensures redirect URL is always unicode under python 2
+     - introduces support for experimental HTTP response status code
+       308 Permanent Redirect [2] used by some sites [3]
+
+    1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
+    2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
+    3. https://github.com/ytdl-org/youtube-dl/issues/28768
+    """
+
+    http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
+
+    def redirect_request(self, req, fp, code, msg, headers, newurl):
+        """Return a Request or None in response to a redirect.
+
+        This is called by the http_error_30x methods when a
+        redirection response is received.  If a redirection should
+        take place, return a new Request to allow http_error_30x to
+        perform the redirect.  Otherwise, raise HTTPError if no-one
+        else should try to handle this url.  Return None if you can't
+        but another Handler might.
+        """
+        m = req.get_method()
+        if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
+                 or code in (301, 302, 303) and m == "POST")):
+            raise compat_HTTPError(req.full_url, code, msg, headers, fp)
+        # Strictly (according to RFC 2616), 301 or 302 in response to
+        # a POST MUST NOT cause a redirection without confirmation
+        # from the user (of urllib.request, in this case).  In practice,
+        # essentially all clients do redirect in this case, so we do
+        # the same.
+
+        # On python 2 urlh.geturl() may sometimes return redirect URL
+        # as byte string instead of unicode. This workaround allows
+        # to force it always return unicode.
+        if sys.version_info[0] < 3:
+            newurl = compat_str(newurl)
+
+        # Be conciliant with URIs containing a space.  This is mainly
+        # redundant with the more complete encoding done in http_error_302(),
+        # but it is kept for compatibility with other callers.
+        newurl = newurl.replace(' ', '%20')
+
+        CONTENT_HEADERS = ("content-length", "content-type")
+        # NB: don't use dict comprehension for python 2.6 compatibility
+        newheaders = dict((k, v) for k, v in req.headers.items()
+                          if k.lower() not in CONTENT_HEADERS)
+        return compat_urllib_request.Request(newurl,
+                                             headers=newheaders,
+                                             origin_req_host=req.origin_req_host,
+                                             unverifiable=True)


 def extract_timezone(date_str):
@ -5725,284 +5775,3 @@ def clean_podcast_url(url):
                st\.fm # https://podsights.com/docs/
            )/e
        )/''', '', url)
-
-
-# http://country.io/names.json
-country_list = {
-    "BD": "Bangladesh",
-    "BE": "Belgium",
-    "BF": "Burkina Faso",
-    "BG": "Bulgaria",
-    "BA": "Bosnia and Herzegovina",
-    "BB": "Barbados",
-    "WF": "Wallis and Futuna",
-    "BL": "Saint Barthelemy",
-    "BM": "Bermuda",
-    "BN": "Brunei",
-    "BO": "Bolivia",
-    "BH": "Bahrain",
-    "BI": "Burundi",
-    "BJ": "Benin",
-    "BT": "Bhutan",
-    "JM": "Jamaica",
-    "BV": "Bouvet Island",
-    "BW": "Botswana",
-    "WS": "Samoa",
-    "BQ": "Bonaire, Saint Eustatius and Saba ",
-    "BR": "Brazil",
-    "BS": "Bahamas",
-    "JE": "Jersey",
-    "BY": "Belarus",
-    "BZ": "Belize",
-    "RU": "Russia",
-    "RW": "Rwanda",
-    "RS": "Serbia",
-    "TL": "East Timor",
-    "RE": "Reunion",
-    "TM": "Turkmenistan",
-    "TJ": "Tajikistan",
-    "RO": "Romania",
-    "TK": "Tokelau",
-    "GW": "Guinea-Bissau",
-    "GU": "Guam",
-    "GT": "Guatemala",
-    "GS": "South Georgia and the South Sandwich Islands",
-    "GR": "Greece",
-    "GQ": "Equatorial Guinea",
-    "GP": "Guadeloupe",
-    "JP": "Japan",
-    "GY": "Guyana",
-    "GG": "Guernsey",
-    "GF": "French Guiana",
-    "GE": "Georgia",
-    "GD": "Grenada",
-    "GB": "United Kingdom",
-    "GA": "Gabon",
-    "SV": "El Salvador",
-    "GN": "Guinea",
-    "GM": "Gambia",
-    "GL": "Greenland",
-    "GI": "Gibraltar",
-    "GH": "Ghana",
-    "OM": "Oman",
-    "TN": "Tunisia",
-    "JO": "Jordan",
-    "HR": "Croatia",
-    "HT": "Haiti",
-    "HU": "Hungary",
-    "HK": "Hong Kong",
-    "HN": "Honduras",
-    "HM": "Heard Island and McDonald Islands",
-    "VE": "Venezuela",
-    "PR": "Puerto Rico",
-    "PS": "Palestinian Territory",
-    "PW": "Palau",
-    "PT": "Portugal",
-    "SJ": "Svalbard and Jan Mayen",
-    "PY": "Paraguay",
-    "IQ": "Iraq",
-    "PA": "Panama",
-    "PF": "French Polynesia",
-    "PG": "Papua New Guinea",
-    "PE": "Peru",
-    "PK": "Pakistan",
-    "PH": "Philippines",
-    "PN": "Pitcairn",
-    "PL": "Poland",
-    "PM": "Saint Pierre and Miquelon",
-    "ZM": "Zambia",
-    "EH": "Western Sahara",
-    "EE": "Estonia",
-    "EG": "Egypt",
-    "ZA": "South Africa",
-    "EC": "Ecuador",
-    "IT": "Italy",
-    "VN": "Vietnam",
-    "SB": "Solomon Islands",
-    "ET": "Ethiopia",
-    "SO": "Somalia",
-    "ZW": "Zimbabwe",
-    "SA": "Saudi Arabia",
-    "ES": "Spain",
-    "ER": "Eritrea",
-    "ME": "Montenegro",
-    "MD": "Moldova",
-    "MG": "Madagascar",
-    "MF": "Saint Martin",
-    "MA": "Morocco",
-    "MC": "Monaco",
-    "UZ": "Uzbekistan",
-    "MM": "Myanmar",
-    "ML": "Mali",
-    "MO": "Macao",
-    "MN": "Mongolia",
-    "MH": "Marshall Islands",
-    "MK": "Macedonia",
-    "MU": "Mauritius",
-    "MT": "Malta",
-    "MW": "Malawi",
-    "MV": "Maldives",
-    "MQ": "Martinique",
-    "MP": "Northern Mariana Islands",
-    "MS": "Montserrat",
-    "MR": "Mauritania",
-    "IM": "Isle of Man",
-    "UG": "Uganda",
-    "TZ": "Tanzania",
-    "MY": "Malaysia",
-    "MX": "Mexico",
-    "IL": "Israel",
-    "FR": "France",
-    "IO": "British Indian Ocean Territory",
-    "SH": "Saint Helena",
-    "FI": "Finland",
-    "FJ": "Fiji",
-    "FK": "Falkland Islands",
-    "FM": "Micronesia",
-    "FO": "Faroe Islands",
-    "NI": "Nicaragua",
-    "NL": "Netherlands",
-    "NO": "Norway",
-    "NA": "Namibia",
-    "VU": "Vanuatu",
-    "NC": "New Caledonia",
-    "NE": "Niger",
-    "NF": "Norfolk Island",
-    "NG": "Nigeria",
-    "NZ": "New Zealand",
-    "NP": "Nepal",
-    "NR": "Nauru",
-    "NU": "Niue",
-    "CK": "Cook Islands",
-    "XK": "Kosovo",
-    "CI": "Ivory Coast",
-    "CH": "Switzerland",
-    "CO": "Colombia",
-    "CN": "China",
-    "CM": "Cameroon",
-    "CL": "Chile",
-    "CC": "Cocos Islands",
-    "CA": "Canada",
-    "CG": "Republic of the Congo",
-    "CF": "Central African Republic",
-    "CD": "Democratic Republic of the Congo",
-    "CZ": "Czech Republic",
-    "CY": "Cyprus",
-    "CX": "Christmas Island",
-    "CR": "Costa Rica",
-    "CW": "Curacao",
-    "CV": "Cape Verde",
-    "CU": "Cuba",
-    "SZ": "Swaziland",
-    "SY": "Syria",
-    "SX": "Sint Maarten",
-    "KG": "Kyrgyzstan",
-    "KE": "Kenya",
-    "SS": "South Sudan",
-    "SR": "Suriname",
-    "KI": "Kiribati",
-    "KH": "Cambodia",
-    "KN": "Saint Kitts and Nevis",
-    "KM": "Comoros",
-    "ST": "Sao Tome and Principe",
-    "SK": "Slovakia",
-    "KR": "South Korea",
-    "SI": "Slovenia",
-    "KP": "North Korea",
-    "KW": "Kuwait",
-    "SN": "Senegal",
-    "SM": "San Marino",
-    "SL": "Sierra Leone",
-    "SC": "Seychelles",
-    "KZ": "Kazakhstan",
-    "KY": "Cayman Islands",
-    "SG": "Singapore",
-    "SE": "Sweden",
-    "SD": "Sudan",
-    "DO": "Dominican Republic",
-    "DM": "Dominica",
-    "DJ": "Djibouti",
-    "DK": "Denmark",
-    "VG": "British Virgin Islands",
-    "DE": "Germany",
-    "YE": "Yemen",
-    "DZ": "Algeria",
-    "US": "United States",
-    "UY": "Uruguay",
-    "YT": "Mayotte",
-    "UM": "United States Minor Outlying Islands",
-    "LB": "Lebanon",
-    "LC": "Saint Lucia",
-    "LA": "Laos",
-    "TV": "Tuvalu",
-    "TW": "Taiwan",
-    "TT": "Trinidad and Tobago",
-    "TR": "Turkey",
-    "LK": "Sri Lanka",
-    "LI": "Liechtenstein",
-    "LV": "Latvia",
-    "TO": "Tonga",
-    "LT": "Lithuania",
-    "LU": "Luxembourg",
-    "LR": "Liberia",
-    "LS": "Lesotho",
-    "TH": "Thailand",
-    "TF": "French Southern Territories",
-    "TG": "Togo",
-    "TD": "Chad",
-    "TC": "Turks and Caicos Islands",
-    "LY": "Libya",
-    "VA": "Vatican",
-    "VC": "Saint Vincent and the Grenadines",
-    "AE": "United Arab Emirates",
-    "AD": "Andorra",
-    "AG": "Antigua and Barbuda",
-    "AF": "Afghanistan",
-    "AI": "Anguilla",
-    "VI": "U.S. Virgin Islands",
-    "IS": "Iceland",
-    "IR": "Iran",
-    "AM": "Armenia",
-    "AL": "Albania",
-    "AO": "Angola",
-    "AQ": "Antarctica",
-    "AS": "American Samoa",
-    "AR": "Argentina",
-    "AU": "Australia",
-    "AT": "Austria",
-    "AW": "Aruba",
-    "IN": "India",
-    "AX": "Aland Islands",
-    "AZ": "Azerbaijan",
-    "IE": "Ireland",
-    "ID": "Indonesia",
-    "UA": "Ukraine",
-    "QA": "Qatar",
-    "MZ": "Mozambique"
-}
-
-
-def list_countries():
-    return country_list.keys()
-
-
-def list_geoblocked_countres(allowed_countries, reverse=False):
-    geoblocked = []
-    geounlocked = []
-    for country in list_countries():
-        if country in allowed_countries or (country not in allowed_countries and reverse is True):
-            geounlocked.append(country)
-        elif country not in allowed_countries or (country in allowed_countries and reverse is True):
-            geoblocked.append(country)
-    if len(geounlocked) == 0:
-        return 'This video is blocked in all countries'
-    if len(geoblocked) <= 10:
-        return 'This video is blocked in these countries: %s' % ', '.join(sorted(country_list[cnt] for cnt in geoblocked))
-    if len(geounlocked) <= 10:
-        geoblocked.sort()
-        return 'This video is only available in these countries: %s' % ', '.join(country_list[cnt] for cnt in geounlocked)
-    if len(geoblocked) >= len(geounlocked):
-        geounlocked.sort()
-        return 'This video is only available in these countries: %s' % ', '.join(geounlocked)
-    geoblocked.sort()
-    return 'This video is blocked in these countries: %s' % ', '.join(geoblocked)
--- a/haruhi_dl/version.py
+++ b/haruhi_dl/version.py
@ -1,6 +1,6 @@
 from __future__ import unicode_literals

-__version__ = '2021.04.01'
+__version__ = '2021.08.01'

 if __name__ == '__main__':
    print(__version__)
--- a/setup.py
+++ b/setup.py
@ -115,7 +115,7 @@ setup(
    packages=[
        'haruhi_dl',
        'haruhi_dl.extractor', 'haruhi_dl.downloader',
-        'haruhi_dl.postprocessor'],
+        'haruhi_dl.postprocessor', 'haruhi_dl.extractor_artifacts'],

    # Provokes warning on most systems (why?!)
    # test_suite = 'nose.collector',
--- a/test/test_execution.py
+++ b/test/test_execution.py
@ -39,6 +39,16 @@ class TestExecution(unittest.TestCase):
        _, stderr = p.communicate()
        self.assertFalse(stderr)

+    def test_lazy_extractors(self):
+        try:
+            subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'haruhi_dl/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
+            subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
+        finally:
+            try:
+                os.remove('haruhi_dl/extractor/lazy_extractors.py')
+            except (IOError, OSError):
+                pass
+

 if __name__ == '__main__':
    unittest.main()