fix/speedup ci

vider support
[polskieradio] fix PR4 audition shit
2021-09-09 12:38:11 +02:00 · 2021-09-06 22:34:06 +02:00 · 2021-08-31 20:25:12 +02:00 · 2021-08-07 02:23:28 +02:00 · 2021-08-07 01:08:07 +02:00 · 2021-08-01 17:44:07 +02:00
31 changed files with 819 additions and 721 deletions
--- a/.gitignore
+++ b/.gitignore
@ -15,6 +15,7 @@ haruhi-dl.1
 haruhi-dl.bash-completion
 haruhi-dl.fish
 haruhi_dl/extractor/lazy_extractors.py
 haruhi_dl/extractor_artifacts/
 haruhi-dl
 haruhi-dl.exe
 haruhi-dl.tar.gz
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -1,5 +1,6 @@
 default:
  before_script:
    - sed -i "s@dl-cdn.alpinelinux.org@alpine.sakamoto.pl@g" /etc/apk/repositories
    - apk add bash
    - pip install nose
--- a/6
+++ b/6
@ -1,3 +1,9 @@
 version 2021.08.01
 Extractor
 * [youtube] fixed agegate
 * [niconico] dmc downloader from youtube-dlp
 * [peertube] new URL schemas
 version 2021.06.20
 Core
 * [playwright] fixed headlessness
--- a/devscripts/prerelease_codegen.py
+++ b/devscripts/prerelease_codegen.py
@ -0,0 +1,32 @@
 # this is intended to speed-up some extractors,
 # which sometimes need to extract some data that doesn't change very much often,
 # but it does on random times, like youtube's signature "crypto" or soundcloud's client id
 import os
 from os.path import dirname as dirn
 import sys
 sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
 from haruhi_dl import HaruhiDL
 from haruhi_dl.utils import (
    ExtractorError,
 )
 hdl = HaruhiDL(params={
    'quiet': True,
 })
 artifact_dir = os.path.join(dirn(dirn((os.path.abspath(__file__)))), 'haruhi_dl', 'extractor_artifacts')
 if not os.path.exists(artifact_dir):
    os.mkdir(artifact_dir)
 for ie_name in (
    'Youtube',
    'Soundcloud',
 ):
    ie = hdl.get_info_extractor(ie_name)
    try:
        file_contents = ie._generate_prerelease_file()
        with open(os.path.join(artifact_dir, ie_name.lower() + '.py'), 'w') as file:
            file.write(file_contents)
    except ExtractorError as err:
        print(err)
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@ -1,141 +1,24 @@
 #!/bin/bash
-# IMPORTANT: the following assumptions are made
+if [[ "$(basename $(pwd))" == 'devscripts' ]]; then
-# * the GH repo is on the origin remote
+	cd ..
 # * the gh-pages branch is named so locally
 # * the git config user.signingkey is properly set
 # You will need
 # pip install coverage nose rsa wheel
 # TODO
 # release notes
 # make hash on local files
 set -e
 skip_tests=true
 gpg_sign_commits=""
 buildserver='localhost:8142'
 while true
 do
 case "$1" in
    --run-tests)
        skip_tests=false
        shift
    ;;
    --gpg-sign-commits|-S)
        gpg_sign_commits="-S"
        shift
    ;;
    --buildserver)
        buildserver="$2"
        shift 2
    ;;
    --*)
        echo "ERROR: unknown option $1"
        exit 1
    ;;
    *)
        break
    ;;
 esac
 done
 if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
 version="$1"
 major_version=$(echo "$version" | sed -n 's#^\([0-9]*\.[0-9]*\.[0-9]*\).*#\1#p')
 if test "$major_version" '!=' "$(date '+%Y.%m.%d')"; then
    echo "$version does not start with today's date!"
    exit 1
 fi
-if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already present'; exit 1; fi
+v="$(date "+%Y.%m.%d")"
 if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi
 useless_files=$(find haruhi_dl -type f -not -name '*.py')
 if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in haruhi_dl: $useless_files"; exit 1; fi
 if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
 if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi
 if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
 if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
-read -p "Is ChangeLog up to date? (y/n) " -n 1
+if [[ "$(grep "'$v" haruhi_dl/version.py)" != '' ]]; then #' is this the first release of the day?
-if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
+	if [[ "$(grep -Poh '[0-9]{4}\.[0-9]{2}\.[0-9]{2}\.[0-9]' haruhi_dl/version.py)" != '' ]]; then # so, 2nd or nth?
-
+		v="$v.$(($(cat haruhi_dl/version.py | grep -Poh '[0-9]{4}\.[0-9]{2}\.[0-9]{2}\.[0-9]' | grep -Poh '[0-9]+$')+1))"
-/bin/echo -e "\n### First of all, testing..."
+	else
-make clean
+		v="$v.1"
-if $skip_tests ; then
+	fi
    echo 'SKIPPING TESTS'
 else
    nosetests --verbose --with-coverage --cover-package=haruhi_dl --cover-html test --stop || exit 1
 fi
-/bin/echo -e "\n### Changing version in version.py..."
+sed "s/__version__ = '.*'/__version__ = '$v'/g" -i haruhi_dl/version.py
 sed -i "s/__version__ = '.*'/__version__ = '$version'/" haruhi_dl/version.py
-/bin/echo -e "\n### Changing version in ChangeLog..."
+python3 setup.py build_lazy_extractors
-sed -i "s/<unreleased>/$version/" ChangeLog
+python3 devscripts/prerelease_codegen.py
-
+rm -R build dist
-/bin/echo -e "\n### Committing documentation, templates and haruhi_dl/version.py..."
+python3 setup.py sdist bdist_wheel
-make README.md CONTRIBUTING.md issuetemplates supportedsites
+python3 -m twine upload dist/*
-git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md .github/ISSUE_TEMPLATE/6_question.md docs/supportedsites.md haruhi_dl/version.py ChangeLog
+devscripts/wine-py2exe.sh setup.py
 git commit $gpg_sign_commits -m "release $version"
 /bin/echo -e "\n### Now tagging, signing and pushing..."
 git tag -s -m "Release $version" "$version"
 git show "$version"
 read -p "Is it good, can I push? (y/n) " -n 1
 if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
 echo
 MASTER=$(git rev-parse --abbrev-ref HEAD)
 git push origin $MASTER:master
 git push origin "$version"
 /bin/echo -e "\n### OK, now it is time to build the binaries..."
 REV=$(git rev-parse HEAD)
 make haruhi-dl haruhi-dl.tar.gz
 read -p "VM running? (y/n) " -n 1
 wget "http://$buildserver/build/ytdl-org/haruhi-dl/haruhi-dl.exe?rev=$REV" -O haruhi-dl.exe
 mkdir -p "build/$version"
 mv haruhi-dl haruhi-dl.exe "build/$version"
 mv haruhi-dl.tar.gz "build/$version/haruhi-dl-$version.tar.gz"
 RELEASE_FILES="haruhi-dl haruhi-dl.exe haruhi-dl-$version.tar.gz"
 (cd build/$version/ && md5sum $RELEASE_FILES > MD5SUMS)
 (cd build/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS)
 (cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
 (cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
 /bin/echo -e "\n### Signing and uploading the new binaries to GitHub..."
 for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
 ROOT=$(pwd)
 python devscripts/create-github-release.py ChangeLog $version "$ROOT/build/$version"
 #ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
 /bin/echo -e "\n### Now switching to gh-pages..."
 git clone --branch gh-pages --single-branch . build/gh-pages
 (
    set -e
    ORIGIN_URL=$(git config --get remote.origin.url)
    cd build/gh-pages
    "$ROOT/devscripts/gh-pages/add-version.py" $version
    "$ROOT/devscripts/gh-pages/update-feed.py"
    "$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
    "$ROOT/devscripts/gh-pages/generate-download.py"
    "$ROOT/devscripts/gh-pages/update-copyright.py"
    "$ROOT/devscripts/gh-pages/update-sites.py"
    git add *.html *.html.in update
    git commit $gpg_sign_commits -m "release $version"
    git push "$ROOT" gh-pages
    git push "$ORIGIN_URL" gh-pages
 )
 rm -rf build
 make pypi-files
 echo "Uploading to PyPi ..."
 python setup.py sdist bdist_wheel upload
 make clean
 /bin/echo -e "\n### DONE!"
--- a/haruhi_dl/downloader/init.py
+++ b/haruhi_dl/downloader/init.py
@ -1,5 +1,18 @@
 from __future__ import unicode_literals
 from ..utils import (
    determine_protocol,
 )
 def _get_real_downloader(info_dict, protocol=None, *args, **kwargs):
    info_copy = info_dict.copy()
    if protocol:
        info_copy['protocol'] = protocol
    return get_suitable_downloader(info_copy, *args, **kwargs)
 # Some of these require _get_real_downloader
 from .common import FileDownloader
 from .f4m import F4mFD
 from .hls import HlsFD
@ -8,16 +21,13 @@ from .rtmp import RtmpFD
 from .dash import DashSegmentsFD
 from .rtsp import RtspFD
 from .ism import IsmFD
 from .niconico import NiconicoDmcFD
 from .external import (
    get_external_downloader,
    Aria2cFD,
    FFmpegFD,
 )
 from ..utils import (
    determine_protocol,
 )
 PROTOCOL_MAP = {
    'rtmp': RtmpFD,
    'm3u8_native': HlsFD,
@ -28,6 +38,7 @@ PROTOCOL_MAP = {
    'http_dash_segments': DashSegmentsFD,
    'ism': IsmFD,
    'bittorrent': Aria2cFD,
    'niconico_dmc': NiconicoDmcFD,
 }
--- a/haruhi_dl/downloader/niconico.py
+++ b/haruhi_dl/downloader/niconico.py
@ -0,0 +1,55 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import threading
 from .common import FileDownloader
 from ..downloader import _get_real_downloader
 from ..extractor.niconico import NiconicoIE
 from ..compat import compat_urllib_request
 class NiconicoDmcFD(FileDownloader):
    """ Downloading niconico douga from DMC with heartbeat """
    FD_NAME = 'niconico_dmc'
    def real_download(self, filename, info_dict):
        self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
        ie = NiconicoIE(self.hdl)
        info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
        fd = _get_real_downloader(info_dict, params=self.params)(self.hdl, self.params)
        success = download_complete = False
        timer = [None]
        heartbeat_lock = threading.Lock()
        heartbeat_url = heartbeat_info_dict['url']
        heartbeat_data = heartbeat_info_dict['data'].encode()
        heartbeat_interval = heartbeat_info_dict.get('interval', 30)
        def heartbeat():
            try:
                compat_urllib_request.urlopen(url=heartbeat_url, data=heartbeat_data)
            except Exception:
                self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
            with heartbeat_lock:
                if not download_complete:
                    timer[0] = threading.Timer(heartbeat_interval, heartbeat)
                    timer[0].start()
        heartbeat_info_dict['ping']()
        self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
        try:
            heartbeat()
            if type(fd).__name__ == 'HlsFD':
                info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
            success = fd.real_download(filename, info_dict)
        finally:
            if heartbeat_lock:
                with heartbeat_lock:
                    timer[0].cancel()
                    download_complete = True
            return success
--- a/haruhi_dl/extractor/appleconnect.py
+++ b/haruhi_dl/extractor/appleconnect.py
@ -9,10 +9,10 @@ from ..utils import (
 class AppleConnectIE(InfoExtractor):
-    _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
+    _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
-        'md5': 'e7c38568a01ea45402570e6029206723',
+        'md5': 'c1d41f72c8bcaf222e089434619316e4',
        'info_dict': {
            'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
            'ext': 'm4v',
@ -22,7 +22,10 @@ class AppleConnectIE(InfoExtractor):
            'upload_date': '20150710',
            'timestamp': 1436545535,
        },
-    }
+    }, {
        'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
@ -36,7 +39,7 @@ class AppleConnectIE(InfoExtractor):
        video_data = self._parse_json(video_json, video_id)
        timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
-        like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
+        like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None))
        return {
            'id': video_id,
--- a/haruhi_dl/extractor/bilibili.py
+++ b/haruhi_dl/extractor/bilibili.py
@ -233,7 +233,7 @@ class BiliBiliIE(InfoExtractor):
            webpage)
        if uploader_mobj:
            info.update({
-                'uploader': uploader_mobj.group('name'),
+                'uploader': uploader_mobj.group('name').strip(),
                'uploader_id': uploader_mobj.group('id'),
            })
        if not info.get('uploader'):
--- a/haruhi_dl/extractor/curiositystream.py
+++ b/haruhi_dl/extractor/curiositystream.py
@ -145,7 +145,7 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
 class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
    IE_NAME = 'curiositystream:collection'
-    _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P<id>\d+)'
    _TESTS = [{
        'url': 'https://app.curiositystream.com/collection/2',
        'info_dict': {
@ -157,6 +157,9 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
    }, {
        'url': 'https://curiositystream.com/series/2',
        'only_matching': True,
    }, {
        'url': 'https://curiositystream.com/collections/36',
        'only_matching': True,
    }]
    def _real_extract(self, url):
--- a/haruhi_dl/extractor/egghead.py
+++ b/haruhi_dl/extractor/egghead.py
@ -22,16 +22,19 @@ class EggheadBaseIE(InfoExtractor):
 class EggheadCourseIE(EggheadBaseIE):
    IE_DESC = 'egghead.io course'
    IE_NAME = 'egghead:course'
-    _VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:course|playlist)s/(?P<id>[^/?#&]+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
        'playlist_count': 29,
        'info_dict': {
-            'id': '72',
+            'id': '432655',
            'title': 'Professor Frisby Introduces Composable Functional JavaScript',
            'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
        },
-    }
+    }, {
        'url': 'https://app.egghead.io/playlists/professor-frisby-introduces-composable-functional-javascript',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        playlist_id = self._match_id(url)
@ -65,7 +68,7 @@ class EggheadCourseIE(EggheadBaseIE):
 class EggheadLessonIE(EggheadBaseIE):
    IE_DESC = 'egghead.io lesson'
    IE_NAME = 'egghead:lesson'
-    _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
+    _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
        'info_dict': {
@ -88,6 +91,9 @@ class EggheadLessonIE(EggheadBaseIE):
    }, {
        'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
        'only_matching': True,
    }, {
        'url': 'https://app.egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
        'only_matching': True,
    }]
    def _real_extract(self, url):
--- a/haruhi_dl/extractor/extractors.py
+++ b/haruhi_dl/extractor/extractors.py
@ -643,10 +643,6 @@ from .linkedin import (
 from .linuxacademy import LinuxAcademyIE
 from .litv import LiTVIE
 from .livejournal import LiveJournalIE
 from .liveleak import (
    LiveLeakIE,
    LiveLeakEmbedIE,
 )
 from .livestream import (
    LivestreamIE,
    LivestreamOriginalIE,
@ -1517,6 +1513,7 @@ from .videomore import (
 )
 from .videopress import VideoPressIE
 from .videotarget import VideoTargetIE
 from .vider import ViderIE
 from .vidio import VidioIE
 from .vidlii import VidLiiIE
 from .vidme import (
--- a/haruhi_dl/extractor/facebook.py
+++ b/haruhi_dl/extractor/facebook.py
@ -521,7 +521,10 @@ class FacebookIE(InfoExtractor):
                raise ExtractorError(
                    'The video is not available, Facebook said: "%s"' % m_msg.group(1),
                    expected=True)
-            elif '>You must log in to continue' in webpage:
+            elif any(p in webpage for p in (
                    '>You must log in to continue',
                    'id="login_form"',
                    'id="loginbutton"')):
                self.raise_login_required()
        if not video_data and '/watchparty/' in url:
--- a/haruhi_dl/extractor/formula1.py
+++ b/haruhi_dl/extractor/formula1.py
@ -5,29 +5,23 @@ from .common import InfoExtractor
 class Formula1IE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?formula1\.com/(?:content/fom-website/)?en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
+    _VALID_URL = r'https?://(?:www\.)?formula1\.com/en/latest/video\.[^.]+\.(?P<id>\d+)\.html'
-    _TESTS = [{
+    _TEST = {
-        'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html',
+        'url': 'https://www.formula1.com/en/latest/video.race-highlights-spain-2016.6060988138001.html',
-        'md5': '8c79e54be72078b26b89e0e111c0502b',
+        'md5': 'be7d3a8c2f804eb2ab2aa5d941c359f8',
        'info_dict': {
-            'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
+            'id': '6060988138001',
            'ext': 'mp4',
            'title': 'Race highlights - Spain 2016',
            'timestamp': 1463332814,
            'upload_date': '20160515',
            'uploader_id': '6057949432001',
        },
-        'params': {
+        'add_ie': ['BrightcoveNew'],
-            # m3u8 download
+    }
-            'skip_download': True,
+    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/6057949432001/S1WMrhjlh_default/index.html?videoId=%s'
        },
        'add_ie': ['Ooyala'],
    }, {
        'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html',
        'only_matching': True,
    }]
    def _real_extract(self, url):
-        display_id = self._match_id(url)
+        bc_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        ooyala_embed_code = self._search_regex(
            r'data-videoid="([^"]+)"', webpage, 'ooyala embed code')
        return self.url_result(
-            'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code)
+            self.BRIGHTCOVE_URL_TEMPLATE % bc_id, 'BrightcoveNew', bc_id)
--- a/haruhi_dl/extractor/generic.py
+++ b/haruhi_dl/extractor/generic.py
@ -84,7 +84,6 @@ from .jwplatform import JWPlatformIE
 from .digiteka import DigitekaIE
 from .arkena import ArkenaIE
 from .instagram import InstagramIE
 from .liveleak import LiveLeakIE
 from .threeqsdn import ThreeQSDNIE
 from .theplatform import ThePlatformIE
 from .kaltura import KalturaIE
@ -1640,34 +1639,6 @@ class GenericIE(InfoExtractor):
                'upload_date': '20160409',
            },
        },
        # LiveLeak embed
        {
            'url': 'http://www.wykop.pl/link/3088787/',
            'md5': '7619da8c820e835bef21a1efa2a0fc71',
            'info_dict': {
                'id': '874_1459135191',
                'ext': 'mp4',
                'title': 'Man shows poor quality of new apartment building',
                'description': 'The wall is like a sand pile.',
                'uploader': 'Lake8737',
            },
            'add_ie': [LiveLeakIE.ie_key()],
            'params': {
                'force_generic_extractor': True,
            },
        },
        # Another LiveLeak embed pattern (#13336)
        {
            'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
            'info_dict': {
                'id': '2eb_1496309988',
                'ext': 'mp4',
                'title': 'Thief robs place where everyone was armed',
                'description': 'md5:694d73ee79e535953cf2488562288eee',
                'uploader': 'brazilwtf',
            },
            'add_ie': [LiveLeakIE.ie_key()],
        },
        # Duplicated embedded video URLs
        {
            'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
@ -2744,7 +2715,6 @@ class GenericIE(InfoExtractor):
            SoundcloudEmbedIE,
            TuneInBaseIE,
            JWPlatformIE,
            LiveLeakIE,
            DBTVIE,
            VideaIE,
            TwentyMinutenIE,
--- a/haruhi_dl/extractor/ipla.py
+++ b/haruhi_dl/extractor/ipla.py
@ -8,6 +8,7 @@ from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    url_or_none,
    ExtractorError,
 )
@ -79,7 +80,11 @@ class IplaIE(InfoExtractor):
            'Content-type': 'application/json'
        }
-        res = self._download_json('http://b2c-mobile.redefine.pl/rpc/navigation/', media_id, data=req, headers=headers)
+        res = self._download_json('https://b2c-mobile.redefine.pl/rpc/navigation/', media_id, data=req, headers=headers)
        if not res.get('result'):
            if res['error']['code'] == 13404:
                raise ExtractorError('Video requires DRM protection', expected=True)
            raise ExtractorError(f"Ipla said: {res['error']['message']} - {res['error']['data']['userMessage']}")
        return res['result']['mediaItem']
    def get_url(self, media_id, source_id):
@ -93,4 +98,6 @@ class IplaIE(InfoExtractor):
        }
        res = self._download_json('https://b2c-mobile.redefine.pl/rpc/drm/', media_id, data=req, headers=headers)
        if not res.get('result'):
            raise ExtractorError(f"Ipla said: {res['error']['message']} - {res['error']['data']['userMessage']}")
        return res['result']['url']
--- a/haruhi_dl/extractor/liveleak.py
+++ b/haruhi_dl/extractor/liveleak.py
@ -1,191 +0,0 @@
 from __future__ import unicode_literals
 import re
 from .common import InfoExtractor
 from ..utils import int_or_none
 class LiveLeakIE(InfoExtractor):
    _VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
    _TESTS = [{
        'url': 'http://www.liveleak.com/view?i=757_1364311680',
        'md5': '0813c2430bea7a46bf13acf3406992f4',
        'info_dict': {
            'id': '757_1364311680',
            'ext': 'mp4',
            'description': 'extremely bad day for this guy..!',
            'uploader': 'ljfriel2',
            'title': 'Most unlucky car accident',
            'thumbnail': r're:^https?://.*\.jpg$'
        }
    }, {
        'url': 'http://www.liveleak.com/view?i=f93_1390833151',
        'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
        'info_dict': {
            'id': 'f93_1390833151',
            'ext': 'mp4',
            'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
            'uploader': 'ARD_Stinkt',
            'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
            'thumbnail': r're:^https?://.*\.jpg$'
        }
    }, {
        # Prochan embed
        'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
        'md5': '42c6d97d54f1db107958760788c5f48f',
        'info_dict': {
            'id': '4f7_1392687779',
            'ext': 'mp4',
            'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing...  I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.",
            'uploader': 'CapObveus',
            'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
            'age_limit': 18,
        },
        'skip': 'Video is dead',
    }, {
        # Covers https://github.com/ytdl-org/youtube-dl/pull/5983
        # Multiple resolutions
        'url': 'http://www.liveleak.com/view?i=801_1409392012',
        'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
        'info_dict': {
            'id': '801_1409392012',
            'ext': 'mp4',
            'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
            'uploader': 'bony333',
            'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
            'thumbnail': r're:^https?://.*\.jpg$'
        }
    }, {
        # Covers https://github.com/ytdl-org/youtube-dl/pull/10664#issuecomment-247439521
        'url': 'http://m.liveleak.com/view?i=763_1473349649',
        'add_ie': ['Youtube'],
        'info_dict': {
            'id': '763_1473349649',
            'ext': 'mp4',
            'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty',
            'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.',
            'uploader': 'Ziz',
            'upload_date': '20160908',
            'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw'
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'https://www.liveleak.com/view?i=677_1439397581',
        'info_dict': {
            'id': '677_1439397581',
            'title': 'Fuel Depot in China Explosion caught on video',
        },
        'playlist_count': 3,
    }, {
        'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
        'only_matching': True,
    }, {
        # No original video
        'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
        'only_matching': True,
    }]
    @staticmethod
    def _extract_urls(webpage, **kwargs):
        return re.findall(
            r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
            webpage)
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
        video_description = self._og_search_description(webpage)
        video_uploader = self._html_search_regex(
            r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
        age_limit = int_or_none(self._search_regex(
            r'you confirm that you are ([0-9]+) years and over.',
            webpage, 'age limit', default=None))
        video_thumbnail = self._og_search_thumbnail(webpage)
        entries = self._parse_html5_media_entries(url, webpage, video_id)
        if not entries:
            # Maybe an embed?
            embed_url = self._search_regex(
                r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
                webpage, 'embed URL')
            return {
                '_type': 'url_transparent',
                'url': embed_url,
                'id': video_id,
                'title': video_title,
                'description': video_description,
                'uploader': video_uploader,
                'age_limit': age_limit,
            }
        for idx, info_dict in enumerate(entries):
            formats = []
            for a_format in info_dict['formats']:
                if not a_format.get('height'):
                    a_format['height'] = int_or_none(self._search_regex(
                        r'([0-9]+)p\.mp4', a_format['url'], 'height label',
                        default=None))
                formats.append(a_format)
                # Removing '.*.mp4' gives the raw video, which is essentially
                # the same video without the LiveLeak logo at the top (see
                # https://github.com/ytdl-org/youtube-dl/pull/4768)
                orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
                if a_format['url'] != orig_url:
                    format_id = a_format.get('format_id')
                    format_id = 'original' + ('-' + format_id if format_id else '')
                    if self._is_valid_url(orig_url, video_id, format_id):
                        formats.append({
                            'format_id': format_id,
                            'url': orig_url,
                            'preference': 1,
                        })
            self._sort_formats(formats)
            info_dict['formats'] = formats
            # Don't append entry ID for one-video pages to keep backward compatibility
            if len(entries) > 1:
                info_dict['id'] = '%s_%s' % (video_id, idx + 1)
            else:
                info_dict['id'] = video_id
            info_dict.update({
                'title': video_title,
                'description': video_description,
                'uploader': video_uploader,
                'age_limit': age_limit,
                'thumbnail': video_thumbnail,
            })
        return self.playlist_result(entries, video_id, video_title)
 class LiveLeakEmbedIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
    # See generic.py for actual test cases
    _TESTS = [{
        'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
        'only_matching': True,
    }, {
        'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        kind, video_id = re.match(self._VALID_URL, url).groups()
        if kind == 'f':
            webpage = self._download_webpage(url, video_id)
            liveleak_url = self._search_regex(
                r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
                webpage, 'LiveLeak URL', group='url')
        else:
            liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
        return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
--- a/haruhi_dl/extractor/niconico.py
+++ b/haruhi_dl/extractor/niconico.py
@ -1,25 +1,28 @@
 # coding: utf-8
 from __future__ import unicode_literals
-import datetime
+import re
 import functools
 import json
-import math
+import datetime
 from .common import InfoExtractor
 from ..postprocessor.ffmpeg import FFmpegPostProcessor
 from ..compat import (
    compat_str,
    compat_parse_qs,
    compat_urllib_parse_urlparse,
 )
 from ..utils import (
    determine_ext,
    dict_get,
    ExtractorError,
    float_or_none,
    InAdvancePagedList,
    int_or_none,
    float_or_none,
    OnDemandPagedList,
    parse_duration,
    parse_iso8601,
    PostProcessingError,
    str_or_none,
    remove_start,
    try_get,
    unified_timestamp,
@ -34,7 +37,7 @@ class NiconicoIE(InfoExtractor):
    _TESTS = [{
        'url': 'http://www.nicovideo.jp/watch/sm22312215',
-        'md5': 'd1a75c0823e2f629128c43e1212760f9',
+        'md5': 'a5bad06f1347452102953f323c69da34s',
        'info_dict': {
            'id': 'sm22312215',
            'ext': 'mp4',
@ -162,6 +165,11 @@ class NiconicoIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
    _NETRC_MACHINE = 'niconico'
    _API_HEADERS = {
        'X-Frontend-ID': '6',
        'X-Frontend-Version': '0'
    }
    def _real_initialize(self):
        self._login()
@ -188,40 +196,92 @@ class NiconicoIE(InfoExtractor):
            if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
                login_ok = False
        if not login_ok:
-            self._downloader.report_warning('unable to log in: bad username or password')
+            self.report_warning('unable to log in: bad username or password')
        return login_ok
-    def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
+    def _get_heartbeat_info(self, info_dict):
        def yesno(boolean):
            return 'yes' if boolean else 'no'
-        session_api_data = api_data['video']['dmcInfo']['session_api']
+        video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
        session_api_endpoint = session_api_data['urls'][0]
-        format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
+        api_data = (
            info_dict.get('_api_data')
            or self._parse_json(
                self._html_search_regex(
                    'data-api-data="([^"]+)"',
                    self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
                    'API data', default='{}'),
                video_id))
        session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
        session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
        def ping():
            status = try_get(
                self._download_json(
                    'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id,
                    query={'t': try_get(api_data, lambda x: x['media']['delivery']['trackingId'])},
                    note='Acquiring permission for downloading video',
                    headers=self._API_HEADERS),
                lambda x: x['meta']['status'])
            if status != 200:
                self.report_warning('Failed to acquire permission for playing video. The video may not download.')
        yesno = lambda x: 'yes' if x else 'no'
        # m3u8 (encryption)
        if try_get(api_data, lambda x: x['media']['delivery']['encryption']) is not None:
            protocol = 'm3u8'
            encryption = self._parse_json(session_api_data['token'], video_id)['hls_encryption']
            session_api_http_parameters = {
                'parameters': {
                    'hls_parameters': {
                        'encryption': {
                            encryption: {
                                'encrypted_key': try_get(api_data, lambda x: x['media']['delivery']['encryption']['encryptedKey']),
                                'key_uri': try_get(api_data, lambda x: x['media']['delivery']['encryption']['keyUri'])
                            }
                        },
                        'transfer_preset': '',
                        'use_ssl': yesno(session_api_endpoint['isSsl']),
                        'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
                        'segment_duration': 6000,
                    }
                }
            }
        # http
        else:
            protocol = 'http'
            session_api_http_parameters = {
                'parameters': {
                    'http_output_download_parameters': {
                        'use_ssl': yesno(session_api_endpoint['isSsl']),
                        'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
                    }
                }
            }
        session_response = self._download_json(
            session_api_endpoint['url'], video_id,
            query={'_format': 'json'},
            headers={'Content-Type': 'application/json'},
-            note='Downloading JSON metadata for %s' % format_id,
+            note='Downloading JSON metadata for %s' % info_dict['format_id'],
            data=json.dumps({
                'session': {
                    'client_info': {
-                        'player_id': session_api_data['player_id'],
+                        'player_id': session_api_data.get('playerId'),
                    },
                    'content_auth': {
-                        'auth_type': session_api_data['auth_types'][session_api_data['protocols'][0]],
+                        'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
-                        'content_key_timeout': session_api_data['content_key_timeout'],
+                        'content_key_timeout': session_api_data.get('contentKeyTimeout'),
                        'service_id': 'nicovideo',
-                        'service_user_id': session_api_data['service_user_id']
+                        'service_user_id': session_api_data.get('serviceUserId')
                    },
-                    'content_id': session_api_data['content_id'],
+                    'content_id': session_api_data.get('contentId'),
                    'content_src_id_sets': [{
                        'content_src_ids': [{
                            'src_id_to_mux': {
-                                'audio_src_ids': [audio_quality['id']],
+                                'audio_src_ids': [audio_src_id],
-                                'video_src_ids': [video_quality['id']],
+                                'video_src_ids': [video_src_id],
                            }
                        }]
                    }],
@ -229,52 +289,81 @@ class NiconicoIE(InfoExtractor):
                    'content_uri': '',
                    'keep_method': {
                        'heartbeat': {
-                            'lifetime': session_api_data['heartbeat_lifetime']
+                            'lifetime': session_api_data.get('heartbeatLifetime')
                        }
                    },
-                    'priority': session_api_data['priority'],
+                    'priority': session_api_data.get('priority'),
                    'protocol': {
                        'name': 'http',
                        'parameters': {
-                            'http_parameters': {
+                            'http_parameters': session_api_http_parameters
                                'parameters': {
                                    'http_output_download_parameters': {
                                        'use_ssl': yesno(session_api_endpoint['is_ssl']),
                                        'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
                                    }
                                }
                            }
                        }
                    },
-                    'recipe_id': session_api_data['recipe_id'],
+                    'recipe_id': session_api_data.get('recipeId'),
                    'session_operation_auth': {
                        'session_operation_auth_by_signature': {
-                            'signature': session_api_data['signature'],
+                            'signature': session_api_data.get('signature'),
-                            'token': session_api_data['token'],
+                            'token': session_api_data.get('token'),
                        }
                    },
                    'timing_constraint': 'unlimited'
                }
            }).encode())
-        resolution = video_quality.get('resolution', {})
+        info_dict['url'] = session_response['data']['session']['content_uri']
        info_dict['protocol'] = protocol
        # get heartbeat info
        heartbeat_info_dict = {
            'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
            'data': json.dumps(session_response['data']),
            # interval, convert milliseconds to seconds, then halve to make a buffer.
            'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
            'ping': ping
        }
        return info_dict, heartbeat_info_dict
    def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
        def parse_format_id(id_code):
            mobj = re.match(r'''(?x)
                    (?:archive_)?
                    (?:(?P<codec>[^_]+)_)?
                    (?:(?P<br>[\d]+)kbps_)?
                    (?:(?P<res>[\d+]+)p_)?
                ''', '%s_' % id_code)
            return mobj.groupdict() if mobj else {}
        protocol = 'niconico_dmc'
        format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
        vdict = parse_format_id(video_quality['id'])
        adict = parse_format_id(audio_quality['id'])
        resolution = try_get(video_quality, lambda x: x['metadata']['resolution'], dict) or {'height': vdict.get('res')}
        vbr = try_get(video_quality, lambda x: x['metadata']['bitrate'], float)
        return {
-            'url': session_response['data']['session']['content_uri'],
+            'url': '%s:%s/%s/%s' % (protocol, video_id, video_quality['id'], audio_quality['id']),
            'format_id': format_id,
            'format_note': 'DMC %s' % try_get(video_quality, lambda x: x['metadata']['label'], compat_str),
            'ext': 'mp4',  # Session API are used in HTML5, which always serves mp4
-            'abr': float_or_none(audio_quality.get('bitrate'), 1000),
+            'vcodec': vdict.get('codec'),
-            'vbr': float_or_none(video_quality.get('bitrate'), 1000),
+            'acodec': adict.get('codec'),
-            'height': resolution.get('height'),
+            'vbr': float_or_none(vbr, 1000) or float_or_none(vdict.get('br')),
-            'width': resolution.get('width'),
+            'abr': float_or_none(audio_quality.get('bitrate'), 1000) or float_or_none(adict.get('br')),
            'height': int_or_none(resolution.get('height', vdict.get('res'))),
            'width': int_or_none(resolution.get('width')),
            'quality': -2 if 'low' in format_id else -1,  # Default quality value is -1
            'protocol': protocol,
            'http_headers': {
                'Origin': 'https://www.nicovideo.jp',
                'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
            }
        }
    def _real_extract(self, url):
        video_id = self._match_id(url)
-        # Get video webpage. We are not actually interested in it for normal
+        # Get video webpage for API data.
        # cases, but need the cookies in order to be able to download the
        # info webpage
        webpage, handle = self._download_webpage_handle(
            'http://www.nicovideo.jp/watch/' + video_id, video_id)
        if video_id.startswith('so'):
@ -284,86 +373,136 @@ class NiconicoIE(InfoExtractor):
            'data-api-data="([^"]+)"', webpage,
            'API data', default='{}'), video_id)
-        def _format_id_from_url(video_url):
+        def get_video_info_web(items):
-            return 'economy' if video_real_url.endswith('low') else 'normal'
+            return dict_get(api_data['video'], items)
-        try:
+        # Get video info
-            video_real_url = api_data['video']['smileInfo']['url']
+        video_info_xml = self._download_xml(
-        except KeyError:  # Flash videos
+            'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
-            # Get flv info
+            video_id, note='Downloading video info page')
            flv_info_webpage = self._download_webpage(
                'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
                video_id, 'Downloading flv info')
-            flv_info = compat_parse_qs(flv_info_webpage)
+        def get_video_info_xml(items):
-            if 'url' not in flv_info:
+            if not isinstance(items, list):
-                if 'deleted' in flv_info:
+                items = [items]
-                    raise ExtractorError('The video has been deleted.',
+            for item in items:
-                                         expected=True)
+                ret = xpath_text(video_info_xml, './/' + item)
-                elif 'closed' in flv_info:
+                if ret:
-                    raise ExtractorError('Niconico videos now require logging in',
+                    return ret
                                         expected=True)
                elif 'error' in flv_info:
                    raise ExtractorError('%s reports error: %s' % (
                        self.IE_NAME, flv_info['error'][0]), expected=True)
                else:
                    raise ExtractorError('Unable to find video URL')
-            video_info_xml = self._download_xml(
+        if get_video_info_xml('error'):
-                'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
+            error_code = get_video_info_xml('code')
                video_id, note='Downloading video info page')
-            def get_video_info(items):
+            if error_code == 'DELETED':
-                if not isinstance(items, list):
+                raise ExtractorError('The video has been deleted.',
-                    items = [items]
+                                     expected=True)
-                for item in items:
+            elif error_code == 'NOT_FOUND':
-                    ret = xpath_text(video_info_xml, './/' + item)
+                raise ExtractorError('The video is not found.',
-                    if ret:
+                                     expected=True)
-                        return ret
+            elif error_code == 'COMMUNITY':
                self.to_screen('%s: The video is community members only.' % video_id)
            else:
                raise ExtractorError('%s reports error: %s' % (self.IE_NAME, error_code))
-            video_real_url = flv_info['url'][0]
+        # Start extracting video formats
        formats = []
-            extension = get_video_info('movie_type')
+        # Get HTML5 videos info
-            if not extension:
+        quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie'])
-                extension = determine_ext(video_real_url)
+        if not quality_info:
            raise ExtractorError('The video can\'t be downloaded', expected=True)
-            formats = [{
+        for audio_quality in quality_info.get('audios') or {}:
-                'url': video_real_url,
+            for video_quality in quality_info.get('videos') or {}:
-                'ext': extension,
+                if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
-                'format_id': _format_id_from_url(video_real_url),
+                    continue
-            }]
+                formats.append(self._extract_format_for_quality(
-        else:
+                    api_data, video_id, audio_quality, video_quality))
            formats = []
-            dmc_info = api_data['video'].get('dmcInfo')
+        # Get flv/swf info
-            if dmc_info:  # "New" HTML5 videos
+        timestamp = None
-                quality_info = dmc_info['quality']
+        video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url'])
-                for audio_quality in quality_info['audios']:
+        if video_real_url:
-                    for video_quality in quality_info['videos']:
+            is_economy = video_real_url.endswith('low')
                        if not audio_quality['available'] or not video_quality['available']:
                            continue
                        formats.append(self._extract_format_for_quality(
                            api_data, video_id, audio_quality, video_quality))
-                self._sort_formats(formats)
+            if is_economy:
-            else:  # "Old" HTML5 videos
+                self.report_warning('Site is currently in economy mode! You will only have access to lower quality streams')
-                formats = [{
+
            # Invoking ffprobe to determine resolution
            pp = FFmpegPostProcessor(self._downloader)
            cookies = self._get_cookies('https://nicovideo.jp').output(header='', sep='; path=/; domain=nicovideo.jp;\n')
            self.to_screen('%s: %s' % (video_id, 'Checking smile format with ffprobe'))
            try:
                metadata = pp.get_metadata_object(video_real_url, ['-cookies', cookies])
            except PostProcessingError as err:
                raise ExtractorError(err.msg, expected=True)
            v_stream = a_stream = {}
            # Some complex swf files doesn't have video stream (e.g. nm4809023)
            for stream in metadata['streams']:
                if stream['codec_type'] == 'video':
                    v_stream = stream
                elif stream['codec_type'] == 'audio':
                    a_stream = stream
            # Community restricted videos seem to have issues with the thumb API not returning anything at all
            filesize = int(
                (get_video_info_xml('size_high') if not is_economy else get_video_info_xml('size_low'))
                or metadata['format']['size']
            )
            extension = (
                get_video_info_xml('movie_type')
                or 'mp4' if 'mp4' in metadata['format']['format_name'] else metadata['format']['format_name']
            )
            # 'creation_time' tag on video stream of re-encoded SMILEVIDEO mp4 files are '1970-01-01T00:00:00.000000Z'.
            timestamp = (
                parse_iso8601(get_video_info_web('first_retrieve'))
                or unified_timestamp(get_video_info_web('postedDateTime'))
            )
            metadata_timestamp = (
                parse_iso8601(try_get(v_stream, lambda x: x['tags']['creation_time']))
                or timestamp if extension != 'mp4' else 0
            )
            # According to compconf, smile videos from pre-2017 are always better quality than their DMC counterparts
            smile_threshold_timestamp = parse_iso8601('2016-12-08T00:00:00+09:00')
            is_source = timestamp < smile_threshold_timestamp or metadata_timestamp > 0
            # If movie file size is unstable, old server movie is not source movie.
            if filesize > 1:
                formats.append({
                    'url': video_real_url,
-                    'ext': 'mp4',
+                    'format_id': 'smile' if not is_economy else 'smile_low',
-                    'format_id': _format_id_from_url(video_real_url),
+                    'format_note': 'SMILEVIDEO source' if not is_economy else 'SMILEVIDEO low quality',
-                }]
+                    'ext': extension,
                    'container': extension,
                    'vcodec': v_stream.get('codec_name'),
                    'acodec': a_stream.get('codec_name'),
                    # Some complex swf files doesn't have total bit rate metadata (e.g. nm6049209)
                    'tbr': int_or_none(metadata['format'].get('bit_rate'), scale=1000),
                    'vbr': int_or_none(v_stream.get('bit_rate'), scale=1000),
                    'abr': int_or_none(a_stream.get('bit_rate'), scale=1000),
                    'height': int_or_none(v_stream.get('height')),
                    'width': int_or_none(v_stream.get('width')),
                    'source_preference': 5 if not is_economy else -2,
                    'quality': 5 if is_source and not is_economy else None,
                    'filesize': filesize
                })
-            def get_video_info(items):
+        self._sort_formats(formats)
                return dict_get(api_data['video'], items)
        # Start extracting information
-        title = get_video_info('title')
+        title = (
-        if not title:
+            get_video_info_xml('title')  # prefer to get the untranslated original title
-            title = self._og_search_title(webpage, default=None)
+            or get_video_info_web(['originalTitle', 'title'])
-        if not title:
+            or self._og_search_title(webpage, default=None)
-            title = self._html_search_regex(
+            or self._html_search_regex(
                r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
-                webpage, 'video title')
+                webpage, 'video title'))
        watch_api_data_string = self._html_search_regex(
            r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
@ -372,14 +511,15 @@ class NiconicoIE(InfoExtractor):
        video_detail = watch_api_data.get('videoDetail', {})
        thumbnail = (
-            get_video_info(['thumbnail_url', 'thumbnailURL'])
+            self._html_search_regex(r'<meta property="og:image" content="([^"]+)">', webpage, 'thumbnail data', default=None)
            or dict_get(  # choose highest from 720p to 240p
                get_video_info_web('thumbnail'),
                ['ogp', 'player', 'largeUrl', 'middleUrl', 'url'])
            or self._html_search_meta('image', webpage, 'thumbnail', default=None)
            or video_detail.get('thumbnail'))
-        description = get_video_info('description')
+        description = get_video_info_web('description')
        timestamp = (parse_iso8601(get_video_info('first_retrieve'))
                     or unified_timestamp(get_video_info('postedDateTime')))
        if not timestamp:
            match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
            if match:
@ -388,19 +528,25 @@ class NiconicoIE(InfoExtractor):
            timestamp = parse_iso8601(
                video_detail['postedAt'].replace('/', '-'),
                delimiter=' ', timezone=datetime.timedelta(hours=9))
        timestamp = timestamp or try_get(api_data, lambda x: parse_iso8601(x['video']['registeredAt']))
-        view_count = int_or_none(get_video_info(['view_counter', 'viewCount']))
+        view_count = int_or_none(get_video_info_web(['view_counter', 'viewCount']))
        if not view_count:
            match = self._html_search_regex(
                r'>Views: <strong[^>]*>([^<]+)</strong>',
                webpage, 'view count', default=None)
            if match:
                view_count = int_or_none(match.replace(',', ''))
-        view_count = view_count or video_detail.get('viewCount')
+        view_count = (
            view_count
            or video_detail.get('viewCount')
            or try_get(api_data, lambda x: x['video']['count']['view']))
        comment_count = (
            int_or_none(get_video_info_web('comment_num'))
            or video_detail.get('commentCount')
            or try_get(api_data, lambda x: x['video']['count']['comment']))
        comment_count = (int_or_none(get_video_info('comment_num'))
                         or video_detail.get('commentCount')
                         or try_get(api_data, lambda x: x['thread']['commentCount']))
        if not comment_count:
            match = self._html_search_regex(
                r'>Comments: <strong[^>]*>([^<]+)</strong>',
@ -409,22 +555,41 @@ class NiconicoIE(InfoExtractor):
                comment_count = int_or_none(match.replace(',', ''))
        duration = (parse_duration(
-            get_video_info('length')
+            get_video_info_web('length')
            or self._html_search_meta(
                'video:duration', webpage, 'video duration', default=None))
            or video_detail.get('length')
-            or get_video_info('duration'))
+            or get_video_info_web('duration'))
-        webpage_url = get_video_info('watch_url') or url
+        webpage_url = get_video_info_web('watch_url') or url
        # for channel movie and community movie
        channel_id = try_get(
            api_data,
            (lambda x: x['channel']['globalId'],
             lambda x: x['community']['globalId']))
        channel = try_get(
            api_data,
            (lambda x: x['channel']['name'],
             lambda x: x['community']['name']))
        # Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
        # in the JSON, which will cause None to be returned instead of {}.
        owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
-        uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
+        uploader_id = str_or_none(
-        uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')
+            get_video_info_web(['ch_id', 'user_id'])
            or owner.get('id')
            or channel_id
        )
        uploader = (
            get_video_info_web(['ch_name', 'user_nickname'])
            or owner.get('nickname')
            or channel
        )
        return {
            'id': video_id,
            '_api_data': api_data,
            'title': title,
            'formats': formats,
            'thumbnail': thumbnail,
@ -432,6 +597,8 @@ class NiconicoIE(InfoExtractor):
            'uploader': uploader,
            'timestamp': timestamp,
            'uploader_id': uploader_id,
            'channel': channel,
            'channel_id': channel_id,
            'view_count': view_count,
            'comment_count': comment_count,
            'duration': duration,
@ -440,7 +607,7 @@ class NiconicoIE(InfoExtractor):
 class NiconicoPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
+    _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://www.nicovideo.jp/mylist/27411728',
@ -456,60 +623,77 @@ class NiconicoPlaylistIE(InfoExtractor):
        'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
        'only_matching': True,
    }]
    _PAGE_SIZE = 100
-    def _call_api(self, list_id, resource, query):
+    _API_HEADERS = {
-        return self._download_json(
+        'X-Frontend-ID': '6',
-            'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
+        'X-Frontend-Version': '0'
-            'Downloading %s JSON metatdata' % resource, query=query,
+    }
            headers={'X-Frontend-Id': 6})['data']['mylist']
    def _parse_owner(self, item):
        owner = item.get('owner') or {}
        if owner:
            return {
                'uploader': owner.get('name'),
                'uploader_id': owner.get('id'),
            }
        return {}
    def _fetch_page(self, list_id, page):
        page += 1
        items = self._call_api(list_id, 'page %d' % page, {
            'page': page,
            'pageSize': self._PAGE_SIZE,
        })['items']
        for item in items:
            video = item.get('video') or {}
            video_id = video.get('id')
            if not video_id:
                continue
            count = video.get('count') or {}
            get_count = lambda x: int_or_none(count.get(x))
            info = {
                '_type': 'url',
                'id': video_id,
                'title': video.get('title'),
                'url': 'https://www.nicovideo.jp/watch/' + video_id,
                'description': video.get('shortDescription'),
                'duration': int_or_none(video.get('duration')),
                'view_count': get_count('view'),
                'comment_count': get_count('comment'),
                'ie_key': NiconicoIE.ie_key(),
            }
            info.update(self._parse_owner(video))
            yield info
    def _real_extract(self, url):
        list_id = self._match_id(url)
-        mylist = self._call_api(list_id, 'list', {
+
-            'pageSize': 1,
+        def get_page_data(pagenum, pagesize):
-        })
+            return self._download_json(
-        entries = InAdvancePagedList(
+                'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
-            functools.partial(self._fetch_page, list_id),
+                query={'page': 1 + pagenum, 'pageSize': pagesize},
-            math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE),
+                headers=self._API_HEADERS).get('data').get('mylist')
-            self._PAGE_SIZE)
+
-        result = self.playlist_result(
+        data = get_page_data(0, 1)
-            entries, list_id, mylist.get('name'), mylist.get('description'))
+        title = data.get('name')
-        result.update(self._parse_owner(mylist))
+        description = data.get('description')
-        return result
+        uploader = data.get('owner').get('name')
        uploader_id = data.get('owner').get('id')
        def pagefunc(pagenum):
            data = get_page_data(pagenum, 25)
            return ({
                '_type': 'url',
                'url': 'http://www.nicovideo.jp/watch/' + item.get('watchId'),
            } for item in data.get('items'))
        return {
            '_type': 'playlist',
            'id': list_id,
            'title': title,
            'description': description,
            'uploader': uploader,
            'uploader_id': uploader_id,
            'entries': OnDemandPagedList(pagefunc, 25),
        }
 class NiconicoUserIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
    _TEST = {
        'url': 'https://www.nicovideo.jp/user/419948',
        'info_dict': {
            'id': '419948',
        },
        'playlist_mincount': 101,
    }
    _API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
    _PAGE_SIZE = 100
    _API_HEADERS = {
        'X-Frontend-ID': '6',
        'X-Frontend-Version': '0'
    }
    def _entries(self, list_id, ):
        total_count = 1
        count = page_num = 0
        while count < total_count:
            json_parsed = self._download_json(
                self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
                headers=self._API_HEADERS,
                note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
            if not page_num:
                total_count = int_or_none(json_parsed['data'].get('totalCount'))
            for entry in json_parsed["data"]["items"]:
                count += 1
                yield self.url_result('https://www.nicovideo.jp/watch/%s' % entry['id'])
            page_num += 1
    def _real_extract(self, url):
        list_id = self._match_id(url)
        return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())
--- a/haruhi_dl/extractor/nrk.py
+++ b/haruhi_dl/extractor/nrk.py
@ -58,7 +58,7 @@ class NRKBaseIE(InfoExtractor):
    def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
        return self._download_json(
-            urljoin('http://psapi.nrk.no/', path),
+            urljoin('https://psapi.nrk.no/', path),
            video_id, note or 'Downloading %s JSON' % item,
            fatal=fatal, query=query,
            headers={'Accept-Encoding': 'gzip, deflate, br'})
--- a/haruhi_dl/extractor/orf.py
+++ b/haruhi_dl/extractor/orf.py
@ -98,6 +98,9 @@ class ORFTVthekIE(InfoExtractor):
                elif ext == 'f4m':
                    formats.extend(self._extract_f4m_formats(
                        src, video_id, f4m_id=format_id, fatal=False))
                elif ext == 'mpd':
                    formats.extend(self._extract_mpd_formats(
                        src, video_id, mpd_id=format_id, fatal=False))
                else:
                    formats.append({
                        'format_id': format_id,
@ -140,6 +143,25 @@ class ORFTVthekIE(InfoExtractor):
                })
            upload_date = unified_strdate(sd.get('created_date'))
            thumbnails = []
            preview = sd.get('preview_image_url')
            if preview:
                thumbnails.append({
                    'id': 'preview',
                    'url': preview,
                    'preference': 0,
                })
            image = sd.get('image_full_url')
            if not image and len(data_jsb) == 1:
                image = self._og_search_thumbnail(webpage)
            if image:
                thumbnails.append({
                    'id': 'full',
                    'url': image,
                    'preference': 1,
                })
            entries.append({
                '_type': 'video',
                'id': video_id,
@ -149,7 +171,7 @@ class ORFTVthekIE(InfoExtractor):
                'description': sd.get('description'),
                'duration': int_or_none(sd.get('duration_in_seconds')),
                'upload_date': upload_date,
-                'thumbnail': sd.get('image_full_url'),
+                'thumbnails': thumbnails,
            })
        return {
--- a/haruhi_dl/extractor/peertube.py
+++ b/haruhi_dl/extractor/peertube.py
@ -21,7 +21,7 @@ from ..utils import (
 class PeerTubeBaseExtractor(SelfhostedInfoExtractor):
-    _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
+    _UUID_RE = r'[\da-zA-Z]{22}|[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
    _API_BASE = 'https://%s/api/v1/%s/%s/%s'
    _SH_VALID_CONTENT_STRINGS = (
        '<title>PeerTube<',
@ -180,16 +180,16 @@ class PeerTubeBaseExtractor(SelfhostedInfoExtractor):
 class PeerTubeSHIE(PeerTubeBaseExtractor):
    _VALID_URL = r'peertube:(?P<host>[^:]+):(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
-    _SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)|api/v\d/videos)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
+    _SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)|api/v\d/videos|w)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
    _TESTS = [{
        'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
-        'md5': '9bed8c0137913e17b86334e5885aacff',
+        'md5': '8563064d245a4be5705bddb22bb00a28',
        'info_dict': {
            'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
            'ext': 'mp4',
            'title': 'What is PeerTube?',
-            'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
+            'description': 'md5:96adbaf219b4d41747bfc5937df0b017',
            'thumbnail': r're:https?://.*\.(?:jpg|png)',
            'timestamp': 1538391166,
            'upload_date': '20181001',
@ -220,6 +220,27 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):
            'upload_date': '20200420',
            'uploader': 'Drew DeVault',
        }
    }, {
        # new url scheme since PeerTube 3.3
        'url': 'https://peertube2.cpy.re/w/3fbif9S3WmtTP8gGsC5HBd',
        'info_dict': {
            'id': '122d093a-1ede-43bd-bd34-59d2931ffc5e',
            'ext': 'mp4',
            'title': 'E2E tests',
            'uploader_id': '37855',
            'timestamp': 1589276219,
            'upload_date': '20200512',
            'uploader': 'chocobozzz',
        },
    }, {
        'url': 'https://peertube2.cpy.re/w/122d093a-1ede-43bd-bd34-59d2931ffc5e',
        'only_matching': True,
    }, {
        'url': 'https://peertube2.cpy.re/api/v1/videos/3fbif9S3WmtTP8gGsC5HBd',
        'only_matching': True,
    }, {
        'url': 'peertube:peertube2.cpy.re:3fbif9S3WmtTP8gGsC5HBd',
        'only_matching': True,
    }, {
        'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
        'only_matching': True,
@ -289,7 +310,7 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):
        description = None
        if webpage:
-            description = self._og_search_description(webpage)
+            description = self._og_search_description(webpage, default=None)
        if not description:
            full_description = self._call_api(
                host, 'videos', video_id, 'description', note='Downloading description JSON',
@ -305,7 +326,7 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):
 class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
    _VALID_URL = r'peertube:playlist:(?P<host>[^:]+):(?P<id>.+)'
-    _SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)/playlist|api/v\d/video-playlists)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
+    _SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)/playlist|api/v\d/video-playlists|w/p)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
    _TESTS = [{
        'url': 'https://video.internet-czas-dzialac.pl/videos/watch/playlist/3c81b894-acde-4539-91a2-1748b208c14c?playlistPosition=1',
@ -316,6 +337,9 @@ class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
            'uploader': 'Internet. Czas działać!',
        },
        'playlist_mincount': 14,
    }, {
        'url': 'https://peertube2.cpy.re/w/p/hrAdcvjkMMkHJ28upnoN21',
        'only_matching': True,
    }]
    def _selfhosted_extract(self, url, webpage=None):
@ -352,18 +376,21 @@ class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
 class PeerTubeChannelSHIE(PeerTubeBaseExtractor):
    _VALID_URL = r'peertube:channel:(?P<host>[^:]+):(?P<id>.+)'
-    _SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:api/v\d/)?video-channels/(?P<id>[^/?#]+)(?:/videos)?'
+    _SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:(?:api/v\d/)?video-channels|c)/(?P<id>[^/?#]+)(?:/videos)?'
    _TESTS = [{
        'url': 'https://video.internet-czas-dzialac.pl/video-channels/internet_czas_dzialac/videos',
        'info_dict': {
            'id': '2',
-            'title': 'internet_czas_dzialac',
+            'title': 'Internet. Czas działać!',
-            'description': 'md5:4d2e215ea0d9ae4501a556ef6e9a5308',
+            'description': 'md5:ac35d70f6625b04b189e0b4b76e62e17',
            'uploader_id': 3,
            'uploader': 'Internet. Czas działać!',
        },
        'playlist_mincount': 14,
    }, {
        'url': 'https://video.internet-czas-dzialac.pl/c/internet_czas_dzialac',
        'only_matching': True,
    }]
    def _selfhosted_extract(self, url, webpage=None):
@ -401,18 +428,21 @@ class PeerTubeChannelSHIE(PeerTubeBaseExtractor):
 class PeerTubeAccountSHIE(PeerTubeBaseExtractor):
    _VALID_URL = r'peertube:account:(?P<host>[^:]+):(?P<id>.+)'
-    _SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:api/v\d/)?accounts/(?P<id>[^/?#]+)(?:/video(?:s|-channels))?'
+    _SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:(?:api/v\d/)?accounts|a)/(?P<id>[^/?#]+)(?:/video(?:s|-channels))?'
    _TESTS = [{
        'url': 'https://video.internet-czas-dzialac.pl/accounts/icd/video-channels',
        'info_dict': {
            'id': '3',
-            'description': 'md5:ab3c9b934dd39030eea1c9fe76079870',
+            'description': 'md5:ac35d70f6625b04b189e0b4b76e62e17',
            'uploader': 'Internet. Czas działać!',
            'title': 'Internet. Czas działać!',
            'uploader_id': 3,
        },
        'playlist_mincount': 14,
    }, {
        'url': 'https://video.internet-czas-dzialac.pl/a/icd',
        'only_matching': True,
    }]
    def _selfhosted_extract(self, url, webpage=None):
--- a/haruhi_dl/extractor/polskieradio.py
+++ b/haruhi_dl/extractor/polskieradio.py
@ -91,6 +91,14 @@ class PolskieRadioIE(PolskieRadioBaseExtractor):
                'upload_date': '20201116',
            },
        }]
    }, {
        # PR4 audition - other frontend
        'url': 'https://www.polskieradio.pl/10/6071/Artykul/2610977,Poglos-29-pazdziernika-godz-2301',
        'info_dict': {
            'id': '2610977',
            'ext': 'mp3',
            'title': 'Pogłos 29 października godz. 23:01',
        },
    }, {
        'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis',
        'only_matching': True,
@ -113,24 +121,34 @@ class PolskieRadioIE(PolskieRadioBaseExtractor):
        content = self._search_regex(
            r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>',
-            webpage, 'content')
+            webpage, 'content', default=None)
        timestamp = unified_timestamp(self._html_search_regex(
            r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
-            webpage, 'timestamp', fatal=False))
+            webpage, 'timestamp', default=None))
-        thumbnail_url = self._og_search_thumbnail(webpage)
+        thumbnail_url = self._og_search_thumbnail(webpage, default=None)
        title = self._og_search_title(webpage).strip()
        description = strip_or_none(self._og_search_description(webpage, default=None))
        if not content:
            return {
                'id': playlist_id,
                'url': 'https:' + self._search_regex(r"source:\s*'(//static\.prsa\.pl/[^']+)'", webpage, 'audition record url'),
                'title': title,
                'description': description,
                'timestamp': timestamp,
                'thumbnail': thumbnail_url,
            }
        entries = self._extract_webpage_player_entries(content, playlist_id, {
            'title': title,
            'timestamp': timestamp,
            'thumbnail': thumbnail_url,
        })
        description = strip_or_none(self._og_search_description(webpage))
        return self.playlist_result(entries, playlist_id, title, description)
--- a/haruhi_dl/extractor/pornhub.py
+++ b/haruhi_dl/extractor/pornhub.py
@ -31,6 +31,7 @@ from ..utils import (
 class PornHubBaseIE(InfoExtractor):
    _REQUIRES_PLAYWRIGHT = True
    _NETRC_MACHINE = 'pornhub'
    _PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubthbh7ap3u\.onion)'
    def _download_webpage_handle(self, *args, **kwargs):
        def dl(*args, **kwargs):
@ -125,11 +126,13 @@ class PornHubIE(PornHubBaseIE):
    _VALID_URL = r'''(?x)
                    https?://
                        (?:
-                            (?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
+                            (?:[^/]+\.)?
                            %s
                            /(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
                            (?:www\.)?thumbzilla\.com/video/
                        )
                        (?P<id>[\da-z]+)
-                    '''
+                    ''' % PornHubBaseIE._PORNHUB_HOST_RE
    _TESTS = [{
        'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
        'info_dict': {
@ -238,6 +241,13 @@ class PornHubIE(PornHubBaseIE):
    }, {
        'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
        'only_matching': True,
    }, {
        # geo restricted
        'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156',
        'only_matching': True,
    }, {
        'url': 'http://pornhubthbh7ap3u.onion/view_video.php?viewkey=ph5a9813bfa7156',
        'only_matching': True,
    }]
    @staticmethod
@ -277,6 +287,11 @@ class PornHubIE(PornHubBaseIE):
                'PornHub said: %s' % error_msg,
                expected=True, video_id=video_id)
        if any(re.search(p, webpage) for p in (
                r'class=["\']geoBlocked["\']',
                r'>\s*This content is unavailable in your country')):
            self.raise_geo_restricted()
        # video_title from flashvars contains whitespace instead of non-ASCII (see
        # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
        # on that anymore.
@ -410,17 +425,14 @@ class PornHubIE(PornHubBaseIE):
                    format_url, video_id, 'mp4', entry_protocol='m3u8_native',
                    m3u8_id='hls', fatal=False))
                return
-            tbr = None
+            if not height:
-            mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
+                height = int_or_none(self._search_regex(
-            if mobj:
+                    r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height',
-                if not height:
+                    default=None))
                    height = int(mobj.group('height'))
                tbr = int(mobj.group('tbr'))
            formats.append({
                'url': format_url,
                'format_id': '%dp' % height if height else None,
                'height': height,
                'tbr': tbr,
            })
        for video_url, height in video_urls:
@ -442,7 +454,8 @@ class PornHubIE(PornHubBaseIE):
                        add_format(video_url, height)
                continue
            add_format(video_url)
-        self._sort_formats(formats)
+        self._sort_formats(
            formats, field_preference=('height', 'width', 'fps', 'format_id'))
        video_uploader = self._html_search_regex(
            r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
@ -511,7 +524,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
 class PornHubUserIE(PornHubPlaylistBaseIE):
-    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
+    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE
    _TESTS = [{
        'url': 'https://www.pornhub.com/model/zoe_ph',
        'playlist_mincount': 118,
@ -540,6 +553,9 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
        # Same as before, multi page
        'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
        'only_matching': True,
    }, {
        'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph',
        'only_matching': True,
    }]
    def _real_extract(self, url):
@ -615,7 +631,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
 class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
-    _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
+    _VALID_URL = r'https?://(?:[^/]+\.)?%s/(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE
    _TESTS = [{
        'url': 'https://www.pornhub.com/model/zoe_ph/videos',
        'only_matching': True,
@ -720,6 +736,9 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
    }, {
        'url': 'https://de.pornhub.com/playlist/4667351',
        'only_matching': True,
    }, {
        'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph/videos',
        'only_matching': True,
    }]
    @classmethod
@ -730,7 +749,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
 class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
-    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
+    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE
    _TESTS = [{
        'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
        'info_dict': {
@ -740,4 +759,7 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
    }, {
        'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
        'only_matching': True,
    }, {
        'url': 'http://pornhubthbh7ap3u.onion/pornstar/jenny-blighe/videos/upload',
        'only_matching': True,
    }]
--- a/haruhi_dl/extractor/soundcloud.py
+++ b/haruhi_dl/extractor/soundcloud.py
@ -30,6 +30,10 @@ from ..utils import (
    url_or_none,
    urlhandle_detect_ext,
 )
 try:
    from ..extractor_artifacts.soundcloud import prerelease_client_id
 except ImportError:
    prerelease_client_id = None
 class SoundcloudEmbedIE(InfoExtractor):
@ -289,6 +293,10 @@ class SoundcloudIE(InfoExtractor):
                    return
        raise ExtractorError('Unable to extract client id')
    def _generate_prerelease_file(self):
        self._update_client_id()
        return 'prerelease_client_id = {!r}\n'.format(self._CLIENT_ID)
    def _download_json(self, *args, **kwargs):
        non_fatal = kwargs.get('fatal') is False
        if non_fatal:
@ -310,7 +318,7 @@ class SoundcloudIE(InfoExtractor):
                raise
    def _real_initialize(self):
-        self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk'
+        self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or prerelease_client_id or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk'
    @classmethod
    def _resolv_url(cls, url):
--- a/haruhi_dl/extractor/umg.py
+++ b/haruhi_dl/extractor/umg.py
@ -28,7 +28,7 @@ class UMGDeIE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)
        video_data = self._download_json(
-            'https://api.universal-music.de/graphql',
+            'https://graphql.universal-music.de/',
            video_id, query={
                'query': '''{
  universalMusic(channel:16) {
@ -56,11 +56,9 @@ class UMGDeIE(InfoExtractor):
        formats = []
        def add_m3u8_format(format_id):
-            m3u8_formats = self._extract_m3u8_formats(
+            formats.extend(self._extract_m3u8_formats(
                hls_url_template % format_id, video_id, 'mp4',
-                'm3u8_native', m3u8_id='hls', fatal='False')
+                'm3u8_native', m3u8_id='hls', fatal=False))
            if m3u8_formats and m3u8_formats[0].get('height'):
                formats.extend(m3u8_formats)
        for f in video_data.get('formats', []):
            f_url = f.get('url')
--- a/haruhi_dl/extractor/vider.py
+++ b/haruhi_dl/extractor/vider.py
@ -0,0 +1,37 @@
 from .common import InfoExtractor
 class ViderIE(InfoExtractor):
    _VALID_URL = r'https?://vider\.(?:pl|info)/(?:vid/\+f|embed/video/)(?P<id>[a-z\d]+)'
    _TESTS = [{
        'url': 'https://vider.info/vid/+fsx51se',
        'info_dict': {
            'id': 'sx51se',
            'ext': 'mp4',
            'title': 'Big Buck Bunny',
            'upload_date': '20210906',
            'timestamp': 1630927351,
        },
    }]
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(f'https://vider.info/vid/+f{video_id}', video_id)
        json_ld = self._parse_json(
            self._search_regex(
                r'(?s)<script type="application/ld\+json">(.+?)</script>',
                webpage, 'JSON-LD'), video_id)
        info_dict = self._json_ld(json_ld, video_id)
        # generated SEO junk
        info_dict['description'] = None
        info_dict['id'] = video_id
        info_dict['formats'] = [{
            'url': self._search_regex(r'\?file=(.+)', json_ld['embedUrl'], 'video url'),
            'http_headers': {
                'Referer': 'https://vider.info/',
            },
        }]
        return info_dict
--- a/haruhi_dl/extractor/youporn.py
+++ b/haruhi_dl/extractor/youporn.py
@ -4,13 +4,12 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    extract_attributes,
    int_or_none,
    str_to_int,
    unescapeHTML,
    unified_strdate,
    url_or_none,
 )
 from ..aes import aes_decrypt_text
 class YouPornIE(InfoExtractor):
@ -34,6 +33,7 @@ class YouPornIE(InfoExtractor):
            'tags': list,
            'age_limit': 18,
        },
        'skip': 'This video has been disabled',
    }, {
        # Unknown uploader
        'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
@ -78,6 +78,40 @@ class YouPornIE(InfoExtractor):
        video_id = mobj.group('id')
        display_id = mobj.group('display_id') or video_id
        definitions = self._download_json(
            'https://www.youporn.com/api/video/media_definitions/%s/' % video_id,
            display_id)
        formats = []
        for definition in definitions:
            if not isinstance(definition, dict):
                continue
            video_url = url_or_none(definition.get('videoUrl'))
            if not video_url:
                continue
            f = {
                'url': video_url,
                'filesize': int_or_none(definition.get('videoSize')),
            }
            height = int_or_none(definition.get('quality'))
            # Video URL's path looks like this:
            #  /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
            #  /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
            #  /videos/201703/11/109285532/1080P_4000K_109285532.mp4
            # We will benefit from it by extracting some metadata
            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
            if mobj:
                if not height:
                    height = int(mobj.group('height'))
                bitrate = int(mobj.group('bitrate'))
                f.update({
                    'format_id': '%dp-%dk' % (height, bitrate),
                    'tbr': bitrate,
                })
            f['height'] = height
            formats.append(f)
        self._sort_formats(formats)
        webpage = self._download_webpage(
            'http://www.youporn.com/watch/%s' % video_id, display_id,
            headers={'Cookie': 'age_verified=1'})
@ -88,65 +122,6 @@ class YouPornIE(InfoExtractor):
            webpage, default=None) or self._html_search_meta(
            'title', webpage, fatal=True)
        links = []
        # Main source
        definitions = self._parse_json(
            self._search_regex(
                r'mediaDefinition\s*[=:]\s*(\[.+?\])\s*[;,]', webpage,
                'media definitions', default='[]'),
            video_id, fatal=False)
        if definitions:
            for definition in definitions:
                if not isinstance(definition, dict):
                    continue
                video_url = url_or_none(definition.get('videoUrl'))
                if video_url:
                    links.append(video_url)
        # Fallback #1, this also contains extra low quality 180p format
        for _, link in re.findall(r'<a[^>]+href=(["\'])(http(?:(?!\1).)+\.mp4(?:(?!\1).)*)\1[^>]+title=["\']Download [Vv]ideo', webpage):
            links.append(link)
        # Fallback #2 (unavailable as at 22.06.2017)
        sources = self._search_regex(
            r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None)
        if sources:
            for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
                links.append(link)
        # Fallback #3 (unavailable as at 22.06.2017)
        for _, link in re.findall(
                r'(?:videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage):
            links.append(link)
        # Fallback #4, encrypted links (unavailable as at 22.06.2017)
        for _, encrypted_link in re.findall(
                r'encryptedQuality\d{3,4}URL\s*=\s*(["\'])([\da-zA-Z+/=]+)\1', webpage):
            links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8'))
        formats = []
        for video_url in set(unescapeHTML(link) for link in links):
            f = {
                'url': video_url,
            }
            # Video URL's path looks like this:
            #  /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
            #  /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
            #  /videos/201703/11/109285532/1080P_4000K_109285532.mp4
            # We will benefit from it by extracting some metadata
            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
            if mobj:
                height = int(mobj.group('height'))
                bitrate = int(mobj.group('bitrate'))
                f.update({
                    'format_id': '%dp-%dk' % (height, bitrate),
                    'height': height,
                    'tbr': bitrate,
                })
            formats.append(f)
        self._sort_formats(formats)
        description = self._html_search_regex(
            r'(?s)<div[^>]+\bid=["\']description["\'][^>]*>(.+?)</div>',
            webpage, 'description',
@ -169,13 +144,12 @@ class YouPornIE(InfoExtractor):
        age_limit = self._rta_search(webpage)
-        average_rating = int_or_none(self._search_regex(
+        view_count = None
-            r'<div[^>]+class=["\']videoRatingPercentage["\'][^>]*>(\d+)%</div>',
+        views = self._search_regex(
-            webpage, 'average rating', fatal=False))
+            r'(<div[^>]+\bclass=["\']js_videoInfoViews["\']>)', webpage,
-
+            'views', default=None)
-        view_count = str_to_int(self._search_regex(
+        if views:
-            r'(?s)<div[^>]+class=(["\']).*?\bvideoInfoViews\b.*?\1[^>]*>.*?(?P<count>[\d,.]+)<',
+            view_count = str_to_int(extract_attributes(views).get('data-value'))
            webpage, 'view count', fatal=False, group='count'))
        comment_count = str_to_int(self._search_regex(
            r'>All [Cc]omments? \(([\d,.]+)\)',
            webpage, 'comment count', default=None))
@ -201,7 +175,6 @@ class YouPornIE(InfoExtractor):
            'duration': duration,
            'uploader': uploader,
            'upload_date': upload_date,
            'average_rating': average_rating,
            'view_count': view_count,
            'comment_count': comment_count,
            'categories': categories,
--- a/haruhi_dl/extractor/youtube.py
+++ b/haruhi_dl/extractor/youtube.py
@ -4,6 +4,7 @@ from __future__ import unicode_literals
 from datetime import datetime
 import json
 import hashlib
 from inspect import getsource
 import random
 import re
 import time
@ -45,6 +46,10 @@ from ..utils import (
    urlencode_postdata,
    GeoRestrictedError,
 )
 try:
    from ..extractor_artifacts.youtube import _decrypt_signature_protected
 except ImportError:
    _decrypt_signature_protected = None
 class YoutubeBaseInfoExtractor(InfoExtractor):
@ -901,7 +906,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            raise ExtractorError('Cannot identify player %r' % player_url)
        return id_m.group('id')
-    def _extract_signature_function(self, video_id, player_url, example_sig):
+    def _extract_signature_function(self, video_id, player_url):
        player_id = self._extract_player_info(player_url)
        # Read from filesystem cache
@ -1012,31 +1017,45 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                '    return %s\n') % (signature_id_tuple, expr_code)
        self.to_screen('Extracted signature function:\n' + code)
-    def mess(self, a, b):
+    @staticmethod
    def mess(a, b):
        c = a[0]
        a[0] = a[b % len(a)]
        a[b % len(a)] = c
        return a
    def _decrypt_signature_protected(self, s):
        a = list(s)
        a = self.mess(a, 49)
        a = self.mess(a, 26)
        a.reverse()
        a = self.mess(a, 62)
        a.reverse()
        a = a[2:]
        return "".join(a)
    def _full_signature_handling(self, sig, player_url, video_id):
-        signature = self._decrypt_signature_protected(sig)
+        if _decrypt_signature_protected:
-        if re.match(self._VALID_SIG_VALUE_RE, signature):
+            signature = _decrypt_signature_protected(sig)
-            return signature
+            if re.match(self._VALID_SIG_VALUE_RE, signature):
                return signature
        if self._downloader.params.get('verbose'):
            self.to_screen("Built-in signature decryption failed, trying dynamic")
-        sig_decrypt_stack = self._extract_signature_function(video_id, player_url, sig)
+        sig_decrypt_stack = self._extract_signature_function(video_id, player_url)
        return self._do_decrypt_signature(sig, sig_decrypt_stack)
    def _generate_prerelease_file(self):
        # It's Monday, so I'm in a bad mood, but at least my sailor uniform is super cute!
        video_id = 'ieQ1rAIjzXc'
        self._set_consent()
        webpage = self._download_webpage('https://www.youtube.com/watch?v=%s' % video_id, video_id)
        player_url = self._search_regex(r'"jsUrl":"(/s/player/.*?/player_ias.vflset/.*?/base.js)', webpage, 'player url')
        sig_decrypt_stack = self._extract_signature_function(video_id, player_url)
        func = re.sub(r'(?m)^    ', '', getsource(self.mess).replace('@staticmethod', ''))
        func += '\n\ndef _decrypt_signature_protected(sig):\n'
        stack = ['a = list(sig)']
        for fun in sig_decrypt_stack:
            if fun[0] == 'splice':
                stack.append(f'a = a[{fun[1]}:]')
            elif fun[0] == 'reverse':
                stack.append('a.reverse()')
            elif fun[0] == 'mess':
                stack.append(f'a = mess(a, {fun[1]})')
            else:
                raise ExtractorError('Unknown stack action: %s' % (fun[0]))
        stack.append("return ''.join(a)")
        return func + '\n'.join(map(lambda x: ' ' * 4 + x, stack)) + '\n'
    def _get_subtitles(self, video_id, webpage):
        try:
            subs_doc = self._download_xml(
@ -1422,29 +1441,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
                or re.search(r'player-age-gate-content">', video_webpage) is not None):
            age_gate = True
            # We simulate the access to the video from www.youtube.com/v/{video_id}
            # this can be viewed without login into Youtube
            data = compat_urllib_parse_urlencode({
                'video_id': video_id,
                'eurl': 'https://youtube.googleapis.com/v/' + video_id,
                'html5': 1,
                'c': 'TVHTML5',
                'cver': '6.20180913',
            })
            video_info_url = proto + '://www.youtube.com/get_video_info?' + data
            try:
-                video_info_webpage = self._download_webpage(
+                yti1_player = self._download_webpage(
-                    video_info_url, video_id,
+                    proto + '://www.youtube.com/youtubei/v1/player', video_id,
-                    note='Downloading age-gated video info',
+                    headers={
                        'User-Agent': 'Mozilla/5.0 (SMART-TV; Linux; Tizen 4.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.0 Safari/537.36',
                        'Content-Type': 'application/json',
                        'X-Goog-Api-Key': self._YOUTUBE_API_KEY,
                    },
                    data=bytes(json.dumps({
                        'context': {
                            'client': {
                                'clientName': 'WEB',
                                'clientVersion': '2.20210721.00.00',
                                'clientScreen': 'EMBED',
                            },
                        },
                        'videoId': video_id,
                    }).encode('utf-8')),
                    note='Downloading age-gated player info',
                    errnote='unable to download video info')
            except ExtractorError:
-                video_info_webpage = None
+                yti1_player = None
-            if video_info_webpage:
+            if yti1_player:
-                video_info = compat_parse_qs(video_info_webpage)
+                player_response = extract_player_response(yti1_player, video_id)
                pl_response = video_info.get('player_response', [None])[0]
                player_response = extract_player_response(pl_response, video_id)
                add_dash_mpd(video_info)
-                view_count = extract_view_count(video_info)
+                view_count = extract_view_count(video_id)
        else:
            age_gate = False
            # Try looking directly into the video webpage
@ -1814,8 +1836,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                            error_desc,
                            countries=self._search_regex(
                                r'<meta itemprop="regionsAllowed" content="((?:(?:[A-Z]{2},)*[A-Z]{2})?)">',
-                                video_webpage, 'allowed region list').split(','),
+                                video_webpage, 'allowed region list').split(','))
                            expected=True)
                    if error_desc and 'Playback on other websites has been disabled' in error_desc:
                        raise ExtractorError(
                            'Embeds disabled for this video, account (with passed credit card or photo ID check, if in EU/EEA/CH/UK) is required',
@ -2224,8 +2245,9 @@ class YoutubeBaseListInfoExtractor(YoutubeBaseInfoExtractor):
        webpage = self._download_webpage(url, list_id,
                                         note='Downloading %s page #1 (webpage)' % (self._LIST_NAME))
        return self._parse_json(
-            self._search_regex(
+            self._search_regex((
-                r'(?:window(?:\["|\.)|var )ytInitialData(?:"])?\s*=\s*({.+});',
+                r'(?:window(?:\["|\.)|var )ytInitialData(?:"])?\s*=\s*({.+});</script>',
                r'(?:window(?:\["|\.)|var )ytInitialData(?:"])?\s*=\s*({.+});'),
                webpage, 'initial data JSON'), 'initial data JSON'), webpage
    def _real_extract(self, url, results=None, query=None):
--- a/haruhi_dl/postprocessor/ffmpeg.py
+++ b/haruhi_dl/postprocessor/ffmpeg.py
@ -231,7 +231,10 @@ class FFmpegPostProcessor(PostProcessor):
        stdout, stderr = p.communicate()
        if p.returncode != 0:
            stderr = stderr.decode('utf-8', 'replace')
-            msg = stderr.strip().split('\n')[-1]
+            msgs = stderr.strip().split('\n')
            msg = msgs[-1]
            if self._downloader.params.get('verbose', False):
                self._downloader.to_screen('[debug] ' + '\n'.join(msgs[:-1]))
            raise FFmpegPostProcessorError(msg)
        self.try_utime(out_path, oldest_mtime, oldest_mtime)
--- a/haruhi_dl/version.py
+++ b/haruhi_dl/version.py
@ -1,6 +1,6 @@
 from __future__ import unicode_literals
-__version__ = '2021.06.20'
+__version__ = '2021.08.01'
 if __name__ == '__main__':
    print(__version__)
--- a/setup.py
+++ b/setup.py
@ -115,7 +115,7 @@ setup(
    packages=[
        'haruhi_dl',
        'haruhi_dl.extractor', 'haruhi_dl.downloader',
-        'haruhi_dl.postprocessor'],
+        'haruhi_dl.postprocessor', 'haruhi_dl.extractor_artifacts'],
    # Provokes warning on most systems (why?!)
    # test_suite = 'nose.collector',
Author	SHA1	Message	Date
Lauren Liberda	2f375d447c	fix/speedup ci	2021-09-09 12:38:11 +02:00
Lauren Liberda	d464b29113	vider support	2021-09-06 22:34:06 +02:00
Lauren Liberda	19602fb3f5	[polskieradio] fix PR4 audition shit	2021-08-31 20:25:12 +02:00
Lauren Liberda	a550e21b8c	[ipla] state the DRM requirement clearly	2021-08-07 02:23:28 +02:00
Lauren Liberda	1ae67712e8	[ipla] error handling	2021-08-07 01:08:07 +02:00
Dominika Liberda	a96bf110da	* version 2021.08.01	2021-08-01 17:44:07 +02:00
Lauren Liberda	973652cf4d	[youtube] fix age gate for some videos	2021-08-01 17:39:30 +02:00
Lauren Liberda	d81137a604	[peertube] pt 3.3+ url scheme support, fix tests, minor fixes	2021-07-30 20:40:19 +02:00
Lauren Liberda	a0d52ce5be	[niconico] dmc downloader and other stuff from yt-dlp (as of 40078a5)	2021-06-26 14:40:02 +02:00
Dominika Liberda	81b5018d99	* version 2021.06.24.1	2021-06-24 14:01:25 +02:00
Dominika Liberda	31b7bf5bdb	* fixes crash if signature decryption code isn't packed with artifacts	2021-06-24 13:58:36 +02:00
Dominika Liberda	a0cb1b40a2	* fix in release script	2021-06-24 13:18:36 +02:00
Dominika Liberda	c3e48f4934	* version 2021.06.24	2021-06-24 13:07:07 +02:00
Dominika Liberda	ca6cbb6234	* fixes youtube list extractor	2021-06-24 12:27:39 +02:00
Lauren Liberda	7858dc7b9f	fix app crash/tests	2021-06-22 03:17:30 +02:00
Lauren Liberda	2234b1100c	[liveleak] remove for real	2021-06-22 03:02:52 +02:00
Lauren Liberda	75442522b2	[soundcloud] prerelease client id fetching	2021-06-22 02:43:50 +02:00
Lauren Liberda	f4070e6fe4	prerelease artifact generator, for youtube sig	2021-06-21 23:01:02 +02:00
Lauren Liberda	b30cd7afbb	[liveleak] remove extractor	2021-06-21 20:43:52 +02:00
Lauren Liberda	29389b4935	[pornhub] Add support for pornhubthbh7ap3u.onion Original author: dstftw <dstftw@gmail.com>	2021-06-21 20:26:48 +02:00
=?UTF-8?q?Sergey=20M=E2=80=A4?=	3fc2d04e08	[pornhub] Detect geo restriction	2021-06-21 20:22:14 +02:00
=?UTF-8?q?Sergey=20M=E2=80=A4?=	30a3fb457e	[pornhub] Dismiss tbr extracted from download URLs (closes #28927 ) No longer reliable	2021-06-21 20:22:07 +02:00
=?UTF-8?q?Sergey=20M=E2=80=A4?=	69813b6be8	[curiositystream:collection] Extend _VALID_URL (closes #26326 , closes… … #29117)	2021-06-21 20:22:00 +02:00
Tianyi Shi	f1a365faf8	[bilibili] Strip uploader name (#29202 )	2021-06-21 20:21:17 +02:00
Logan B	86c90f7d47	[umg:de] Update GraphQL API URL (#29304 ) Previous one no longer resolves Co-authored-by: Sergey M. <dstftw@gmail.com>	2021-06-21 20:20:56 +02:00
=?UTF-8?q?Sergey=20M=E2=80=A4?=	a33a92ba4b	[nrk] Switch psapi URL to https (closes #29344 ) Catalog calls no longer work via http	2021-06-21 20:20:49 +02:00
kikuyan	6057163d97	[postprocessor/ffmpeg] Show ffmpeg output on error (refs #22680 ) (#29… …336)	2021-06-21 20:20:43 +02:00
kikuyan	aad8936157	[egghead] Add support for app.egghead.io (closes #28404 ) (#29303 ) Co-authored-by: Sergey M. <dstftw@gmail.com>	2021-06-21 20:20:36 +02:00
kikuyan	18dd355e39	[appleconnect] Fix extraction (#29208 )	2021-06-21 20:20:29 +02:00
kikuyan	e628fc3794	[orf:tvthek] Add support for MPD formats (closes #28672 ) (#29236 )	2021-06-21 20:20:18 +02:00
=?UTF-8?q?Sergey=20M=E2=80=A4?=	ac99e96a1e	[facebook] Improve login required detection	2021-06-21 20:19:41 +02:00
=?UTF-8?q?Sergey=20M=E2=80=A4?=	93131809f2	[youporn] Fix formats and view count extraction (closes #29216 )	2021-06-21 20:19:35 +02:00
=?UTF-8?q?Sergey=20M=E2=80=A4?=	9cced7b3d2	[orf:tvthek] Fix thumbnails extraction (closes #29217 )	2021-06-21 20:19:28 +02:00
Remita Amine	b526b67bc1	[formula1] fix extraction(closes #29206 )	2021-06-21 20:19:20 +02:00
Lauren Liberda	e676b759d1	[youtube] fix the fancy georestricted error	2021-06-20 23:00:58 +02:00