Compare commits
35 commits
v2021.06.2
...
master
Author | SHA1 | Date | |
---|---|---|---|
2f375d447c | |||
d464b29113 | |||
19602fb3f5 | |||
a550e21b8c | |||
1ae67712e8 | |||
Dominika Liberda | a96bf110da | ||
973652cf4d | |||
d81137a604 | |||
a0d52ce5be | |||
Dominika Liberda | 81b5018d99 | ||
Dominika Liberda | 31b7bf5bdb | ||
Dominika Liberda | a0cb1b40a2 | ||
Dominika Liberda | c3e48f4934 | ||
Dominika Liberda | ca6cbb6234 | ||
7858dc7b9f | |||
2234b1100c | |||
75442522b2 | |||
f4070e6fe4 | |||
b30cd7afbb | |||
29389b4935 | |||
3fc2d04e08 | |||
30a3fb457e | |||
69813b6be8 | |||
f1a365faf8 | |||
86c90f7d47 | |||
a33a92ba4b | |||
6057163d97 | |||
aad8936157 | |||
18dd355e39 | |||
e628fc3794 | |||
ac99e96a1e | |||
93131809f2 | |||
9cced7b3d2 | |||
b526b67bc1 | |||
e676b759d1 |
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -15,6 +15,7 @@ haruhi-dl.1
|
||||||
haruhi-dl.bash-completion
|
haruhi-dl.bash-completion
|
||||||
haruhi-dl.fish
|
haruhi-dl.fish
|
||||||
haruhi_dl/extractor/lazy_extractors.py
|
haruhi_dl/extractor/lazy_extractors.py
|
||||||
|
haruhi_dl/extractor_artifacts/
|
||||||
haruhi-dl
|
haruhi-dl
|
||||||
haruhi-dl.exe
|
haruhi-dl.exe
|
||||||
haruhi-dl.tar.gz
|
haruhi-dl.tar.gz
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
default:
|
default:
|
||||||
before_script:
|
before_script:
|
||||||
|
- sed -i "s@dl-cdn.alpinelinux.org@alpine.sakamoto.pl@g" /etc/apk/repositories
|
||||||
- apk add bash
|
- apk add bash
|
||||||
- pip install nose
|
- pip install nose
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,9 @@
|
||||||
|
version 2021.08.01
|
||||||
|
Extractor
|
||||||
|
* [youtube] fixed agegate
|
||||||
|
* [niconico] dmc downloader from youtube-dlp
|
||||||
|
* [peertube] new URL schemas
|
||||||
|
|
||||||
version 2021.06.20
|
version 2021.06.20
|
||||||
Core
|
Core
|
||||||
* [playwright] fixed headlessness
|
* [playwright] fixed headlessness
|
||||||
|
|
32
devscripts/prerelease_codegen.py
Normal file
32
devscripts/prerelease_codegen.py
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
# this is intended to speed-up some extractors,
|
||||||
|
# which sometimes need to extract some data that doesn't change very much often,
|
||||||
|
# but it does on random times, like youtube's signature "crypto" or soundcloud's client id
|
||||||
|
|
||||||
|
import os
|
||||||
|
from os.path import dirname as dirn
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
||||||
|
|
||||||
|
from haruhi_dl import HaruhiDL
|
||||||
|
from haruhi_dl.utils import (
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
hdl = HaruhiDL(params={
|
||||||
|
'quiet': True,
|
||||||
|
})
|
||||||
|
artifact_dir = os.path.join(dirn(dirn((os.path.abspath(__file__)))), 'haruhi_dl', 'extractor_artifacts')
|
||||||
|
if not os.path.exists(artifact_dir):
|
||||||
|
os.mkdir(artifact_dir)
|
||||||
|
|
||||||
|
for ie_name in (
|
||||||
|
'Youtube',
|
||||||
|
'Soundcloud',
|
||||||
|
):
|
||||||
|
ie = hdl.get_info_extractor(ie_name)
|
||||||
|
try:
|
||||||
|
file_contents = ie._generate_prerelease_file()
|
||||||
|
with open(os.path.join(artifact_dir, ie_name.lower() + '.py'), 'w') as file:
|
||||||
|
file.write(file_contents)
|
||||||
|
except ExtractorError as err:
|
||||||
|
print(err)
|
|
@ -1,141 +1,24 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# IMPORTANT: the following assumptions are made
|
if [[ "$(basename $(pwd))" == 'devscripts' ]]; then
|
||||||
# * the GH repo is on the origin remote
|
cd ..
|
||||||
# * the gh-pages branch is named so locally
|
|
||||||
# * the git config user.signingkey is properly set
|
|
||||||
|
|
||||||
# You will need
|
|
||||||
# pip install coverage nose rsa wheel
|
|
||||||
|
|
||||||
# TODO
|
|
||||||
# release notes
|
|
||||||
# make hash on local files
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
skip_tests=true
|
|
||||||
gpg_sign_commits=""
|
|
||||||
buildserver='localhost:8142'
|
|
||||||
|
|
||||||
while true
|
|
||||||
do
|
|
||||||
case "$1" in
|
|
||||||
--run-tests)
|
|
||||||
skip_tests=false
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
--gpg-sign-commits|-S)
|
|
||||||
gpg_sign_commits="-S"
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
--buildserver)
|
|
||||||
buildserver="$2"
|
|
||||||
shift 2
|
|
||||||
;;
|
|
||||||
--*)
|
|
||||||
echo "ERROR: unknown option $1"
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
break
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
|
|
||||||
version="$1"
|
|
||||||
major_version=$(echo "$version" | sed -n 's#^\([0-9]*\.[0-9]*\.[0-9]*\).*#\1#p')
|
|
||||||
if test "$major_version" '!=' "$(date '+%Y.%m.%d')"; then
|
|
||||||
echo "$version does not start with today's date!"
|
|
||||||
exit 1
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already present'; exit 1; fi
|
v="$(date "+%Y.%m.%d")"
|
||||||
if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi
|
|
||||||
useless_files=$(find haruhi_dl -type f -not -name '*.py')
|
|
||||||
if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in haruhi_dl: $useless_files"; exit 1; fi
|
|
||||||
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
|
|
||||||
if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi
|
|
||||||
if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
|
|
||||||
if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
|
|
||||||
|
|
||||||
read -p "Is ChangeLog up to date? (y/n) " -n 1
|
if [[ "$(grep "'$v" haruhi_dl/version.py)" != '' ]]; then #' is this the first release of the day?
|
||||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
|
if [[ "$(grep -Poh '[0-9]{4}\.[0-9]{2}\.[0-9]{2}\.[0-9]' haruhi_dl/version.py)" != '' ]]; then # so, 2nd or nth?
|
||||||
|
v="$v.$(($(cat haruhi_dl/version.py | grep -Poh '[0-9]{4}\.[0-9]{2}\.[0-9]{2}\.[0-9]' | grep -Poh '[0-9]+$')+1))"
|
||||||
/bin/echo -e "\n### First of all, testing..."
|
else
|
||||||
make clean
|
v="$v.1"
|
||||||
if $skip_tests ; then
|
fi
|
||||||
echo 'SKIPPING TESTS'
|
|
||||||
else
|
|
||||||
nosetests --verbose --with-coverage --cover-package=haruhi_dl --cover-html test --stop || exit 1
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
/bin/echo -e "\n### Changing version in version.py..."
|
sed "s/__version__ = '.*'/__version__ = '$v'/g" -i haruhi_dl/version.py
|
||||||
sed -i "s/__version__ = '.*'/__version__ = '$version'/" haruhi_dl/version.py
|
|
||||||
|
|
||||||
/bin/echo -e "\n### Changing version in ChangeLog..."
|
python3 setup.py build_lazy_extractors
|
||||||
sed -i "s/<unreleased>/$version/" ChangeLog
|
python3 devscripts/prerelease_codegen.py
|
||||||
|
rm -R build dist
|
||||||
/bin/echo -e "\n### Committing documentation, templates and haruhi_dl/version.py..."
|
python3 setup.py sdist bdist_wheel
|
||||||
make README.md CONTRIBUTING.md issuetemplates supportedsites
|
python3 -m twine upload dist/*
|
||||||
git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md .github/ISSUE_TEMPLATE/6_question.md docs/supportedsites.md haruhi_dl/version.py ChangeLog
|
devscripts/wine-py2exe.sh setup.py
|
||||||
git commit $gpg_sign_commits -m "release $version"
|
|
||||||
|
|
||||||
/bin/echo -e "\n### Now tagging, signing and pushing..."
|
|
||||||
git tag -s -m "Release $version" "$version"
|
|
||||||
git show "$version"
|
|
||||||
read -p "Is it good, can I push? (y/n) " -n 1
|
|
||||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
|
|
||||||
echo
|
|
||||||
MASTER=$(git rev-parse --abbrev-ref HEAD)
|
|
||||||
git push origin $MASTER:master
|
|
||||||
git push origin "$version"
|
|
||||||
|
|
||||||
/bin/echo -e "\n### OK, now it is time to build the binaries..."
|
|
||||||
REV=$(git rev-parse HEAD)
|
|
||||||
make haruhi-dl haruhi-dl.tar.gz
|
|
||||||
read -p "VM running? (y/n) " -n 1
|
|
||||||
wget "http://$buildserver/build/ytdl-org/haruhi-dl/haruhi-dl.exe?rev=$REV" -O haruhi-dl.exe
|
|
||||||
mkdir -p "build/$version"
|
|
||||||
mv haruhi-dl haruhi-dl.exe "build/$version"
|
|
||||||
mv haruhi-dl.tar.gz "build/$version/haruhi-dl-$version.tar.gz"
|
|
||||||
RELEASE_FILES="haruhi-dl haruhi-dl.exe haruhi-dl-$version.tar.gz"
|
|
||||||
(cd build/$version/ && md5sum $RELEASE_FILES > MD5SUMS)
|
|
||||||
(cd build/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS)
|
|
||||||
(cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
|
|
||||||
(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
|
|
||||||
|
|
||||||
/bin/echo -e "\n### Signing and uploading the new binaries to GitHub..."
|
|
||||||
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
|
|
||||||
|
|
||||||
ROOT=$(pwd)
|
|
||||||
python devscripts/create-github-release.py ChangeLog $version "$ROOT/build/$version"
|
|
||||||
|
|
||||||
#ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
|
|
||||||
|
|
||||||
/bin/echo -e "\n### Now switching to gh-pages..."
|
|
||||||
git clone --branch gh-pages --single-branch . build/gh-pages
|
|
||||||
(
|
|
||||||
set -e
|
|
||||||
ORIGIN_URL=$(git config --get remote.origin.url)
|
|
||||||
cd build/gh-pages
|
|
||||||
"$ROOT/devscripts/gh-pages/add-version.py" $version
|
|
||||||
"$ROOT/devscripts/gh-pages/update-feed.py"
|
|
||||||
"$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
|
|
||||||
"$ROOT/devscripts/gh-pages/generate-download.py"
|
|
||||||
"$ROOT/devscripts/gh-pages/update-copyright.py"
|
|
||||||
"$ROOT/devscripts/gh-pages/update-sites.py"
|
|
||||||
git add *.html *.html.in update
|
|
||||||
git commit $gpg_sign_commits -m "release $version"
|
|
||||||
git push "$ROOT" gh-pages
|
|
||||||
git push "$ORIGIN_URL" gh-pages
|
|
||||||
)
|
|
||||||
rm -rf build
|
|
||||||
|
|
||||||
make pypi-files
|
|
||||||
echo "Uploading to PyPi ..."
|
|
||||||
python setup.py sdist bdist_wheel upload
|
|
||||||
make clean
|
|
||||||
|
|
||||||
/bin/echo -e "\n### DONE!"
|
|
||||||
|
|
|
@ -1,5 +1,18 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
determine_protocol,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_real_downloader(info_dict, protocol=None, *args, **kwargs):
|
||||||
|
info_copy = info_dict.copy()
|
||||||
|
if protocol:
|
||||||
|
info_copy['protocol'] = protocol
|
||||||
|
return get_suitable_downloader(info_copy, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
# Some of these require _get_real_downloader
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from .f4m import F4mFD
|
from .f4m import F4mFD
|
||||||
from .hls import HlsFD
|
from .hls import HlsFD
|
||||||
|
@ -8,16 +21,13 @@ from .rtmp import RtmpFD
|
||||||
from .dash import DashSegmentsFD
|
from .dash import DashSegmentsFD
|
||||||
from .rtsp import RtspFD
|
from .rtsp import RtspFD
|
||||||
from .ism import IsmFD
|
from .ism import IsmFD
|
||||||
|
from .niconico import NiconicoDmcFD
|
||||||
from .external import (
|
from .external import (
|
||||||
get_external_downloader,
|
get_external_downloader,
|
||||||
Aria2cFD,
|
Aria2cFD,
|
||||||
FFmpegFD,
|
FFmpegFD,
|
||||||
)
|
)
|
||||||
|
|
||||||
from ..utils import (
|
|
||||||
determine_protocol,
|
|
||||||
)
|
|
||||||
|
|
||||||
PROTOCOL_MAP = {
|
PROTOCOL_MAP = {
|
||||||
'rtmp': RtmpFD,
|
'rtmp': RtmpFD,
|
||||||
'm3u8_native': HlsFD,
|
'm3u8_native': HlsFD,
|
||||||
|
@ -28,6 +38,7 @@ PROTOCOL_MAP = {
|
||||||
'http_dash_segments': DashSegmentsFD,
|
'http_dash_segments': DashSegmentsFD,
|
||||||
'ism': IsmFD,
|
'ism': IsmFD,
|
||||||
'bittorrent': Aria2cFD,
|
'bittorrent': Aria2cFD,
|
||||||
|
'niconico_dmc': NiconicoDmcFD,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
55
haruhi_dl/downloader/niconico.py
Normal file
55
haruhi_dl/downloader/niconico.py
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import threading
|
||||||
|
|
||||||
|
from .common import FileDownloader
|
||||||
|
from ..downloader import _get_real_downloader
|
||||||
|
from ..extractor.niconico import NiconicoIE
|
||||||
|
from ..compat import compat_urllib_request
|
||||||
|
|
||||||
|
|
||||||
|
class NiconicoDmcFD(FileDownloader):
|
||||||
|
""" Downloading niconico douga from DMC with heartbeat """
|
||||||
|
|
||||||
|
FD_NAME = 'niconico_dmc'
|
||||||
|
|
||||||
|
def real_download(self, filename, info_dict):
|
||||||
|
self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
|
||||||
|
|
||||||
|
ie = NiconicoIE(self.hdl)
|
||||||
|
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
|
||||||
|
|
||||||
|
fd = _get_real_downloader(info_dict, params=self.params)(self.hdl, self.params)
|
||||||
|
|
||||||
|
success = download_complete = False
|
||||||
|
timer = [None]
|
||||||
|
heartbeat_lock = threading.Lock()
|
||||||
|
heartbeat_url = heartbeat_info_dict['url']
|
||||||
|
heartbeat_data = heartbeat_info_dict['data'].encode()
|
||||||
|
heartbeat_interval = heartbeat_info_dict.get('interval', 30)
|
||||||
|
|
||||||
|
def heartbeat():
|
||||||
|
try:
|
||||||
|
compat_urllib_request.urlopen(url=heartbeat_url, data=heartbeat_data)
|
||||||
|
except Exception:
|
||||||
|
self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
|
||||||
|
|
||||||
|
with heartbeat_lock:
|
||||||
|
if not download_complete:
|
||||||
|
timer[0] = threading.Timer(heartbeat_interval, heartbeat)
|
||||||
|
timer[0].start()
|
||||||
|
|
||||||
|
heartbeat_info_dict['ping']()
|
||||||
|
self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
|
||||||
|
try:
|
||||||
|
heartbeat()
|
||||||
|
if type(fd).__name__ == 'HlsFD':
|
||||||
|
info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
|
||||||
|
success = fd.real_download(filename, info_dict)
|
||||||
|
finally:
|
||||||
|
if heartbeat_lock:
|
||||||
|
with heartbeat_lock:
|
||||||
|
timer[0].cancel()
|
||||||
|
download_complete = True
|
||||||
|
return success
|
|
@ -9,10 +9,10 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class AppleConnectIE(InfoExtractor):
|
class AppleConnectIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
|
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||||
'md5': 'e7c38568a01ea45402570e6029206723',
|
'md5': 'c1d41f72c8bcaf222e089434619316e4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||||
'ext': 'm4v',
|
'ext': 'm4v',
|
||||||
|
@ -22,7 +22,10 @@ class AppleConnectIE(InfoExtractor):
|
||||||
'upload_date': '20150710',
|
'upload_date': '20150710',
|
||||||
'timestamp': 1436545535,
|
'timestamp': 1436545535,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -36,7 +39,7 @@ class AppleConnectIE(InfoExtractor):
|
||||||
|
|
||||||
video_data = self._parse_json(video_json, video_id)
|
video_data = self._parse_json(video_json, video_id)
|
||||||
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
|
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
|
||||||
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
|
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
|
|
@ -233,7 +233,7 @@ class BiliBiliIE(InfoExtractor):
|
||||||
webpage)
|
webpage)
|
||||||
if uploader_mobj:
|
if uploader_mobj:
|
||||||
info.update({
|
info.update({
|
||||||
'uploader': uploader_mobj.group('name'),
|
'uploader': uploader_mobj.group('name').strip(),
|
||||||
'uploader_id': uploader_mobj.group('id'),
|
'uploader_id': uploader_mobj.group('id'),
|
||||||
})
|
})
|
||||||
if not info.get('uploader'):
|
if not info.get('uploader'):
|
||||||
|
|
|
@ -145,7 +145,7 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||||
|
|
||||||
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||||
IE_NAME = 'curiositystream:collection'
|
IE_NAME = 'curiositystream:collection'
|
||||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://app.curiositystream.com/collection/2',
|
'url': 'https://app.curiositystream.com/collection/2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -157,6 +157,9 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://curiositystream.com/series/2',
|
'url': 'https://curiositystream.com/series/2',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://curiositystream.com/collections/36',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -22,16 +22,19 @@ class EggheadBaseIE(InfoExtractor):
|
||||||
class EggheadCourseIE(EggheadBaseIE):
|
class EggheadCourseIE(EggheadBaseIE):
|
||||||
IE_DESC = 'egghead.io course'
|
IE_DESC = 'egghead.io course'
|
||||||
IE_NAME = 'egghead:course'
|
IE_NAME = 'egghead:course'
|
||||||
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https://(?:app\.)?egghead\.io/(?:course|playlist)s/(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
|
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
|
||||||
'playlist_count': 29,
|
'playlist_count': 29,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '72',
|
'id': '432655',
|
||||||
'title': 'Professor Frisby Introduces Composable Functional JavaScript',
|
'title': 'Professor Frisby Introduces Composable Functional JavaScript',
|
||||||
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
|
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://app.egghead.io/playlists/professor-frisby-introduces-composable-functional-javascript',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
|
@ -65,7 +68,7 @@ class EggheadCourseIE(EggheadBaseIE):
|
||||||
class EggheadLessonIE(EggheadBaseIE):
|
class EggheadLessonIE(EggheadBaseIE):
|
||||||
IE_DESC = 'egghead.io lesson'
|
IE_DESC = 'egghead.io lesson'
|
||||||
IE_NAME = 'egghead:lesson'
|
IE_NAME = 'egghead:lesson'
|
||||||
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https://(?:app\.)?egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
|
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -88,6 +91,9 @@ class EggheadLessonIE(EggheadBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
|
'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://app.egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -643,10 +643,6 @@ from .linkedin import (
|
||||||
from .linuxacademy import LinuxAcademyIE
|
from .linuxacademy import LinuxAcademyIE
|
||||||
from .litv import LiTVIE
|
from .litv import LiTVIE
|
||||||
from .livejournal import LiveJournalIE
|
from .livejournal import LiveJournalIE
|
||||||
from .liveleak import (
|
|
||||||
LiveLeakIE,
|
|
||||||
LiveLeakEmbedIE,
|
|
||||||
)
|
|
||||||
from .livestream import (
|
from .livestream import (
|
||||||
LivestreamIE,
|
LivestreamIE,
|
||||||
LivestreamOriginalIE,
|
LivestreamOriginalIE,
|
||||||
|
@ -1517,6 +1513,7 @@ from .videomore import (
|
||||||
)
|
)
|
||||||
from .videopress import VideoPressIE
|
from .videopress import VideoPressIE
|
||||||
from .videotarget import VideoTargetIE
|
from .videotarget import VideoTargetIE
|
||||||
|
from .vider import ViderIE
|
||||||
from .vidio import VidioIE
|
from .vidio import VidioIE
|
||||||
from .vidlii import VidLiiIE
|
from .vidlii import VidLiiIE
|
||||||
from .vidme import (
|
from .vidme import (
|
||||||
|
|
|
@ -521,7 +521,10 @@ class FacebookIE(InfoExtractor):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
||||||
expected=True)
|
expected=True)
|
||||||
elif '>You must log in to continue' in webpage:
|
elif any(p in webpage for p in (
|
||||||
|
'>You must log in to continue',
|
||||||
|
'id="login_form"',
|
||||||
|
'id="loginbutton"')):
|
||||||
self.raise_login_required()
|
self.raise_login_required()
|
||||||
|
|
||||||
if not video_data and '/watchparty/' in url:
|
if not video_data and '/watchparty/' in url:
|
||||||
|
|
|
@ -5,29 +5,23 @@ from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class Formula1IE(InfoExtractor):
|
class Formula1IE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?formula1\.com/(?:content/fom-website/)?en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
|
_VALID_URL = r'https?://(?:www\.)?formula1\.com/en/latest/video\.[^.]+\.(?P<id>\d+)\.html'
|
||||||
_TESTS = [{
|
_TEST = {
|
||||||
'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
'url': 'https://www.formula1.com/en/latest/video.race-highlights-spain-2016.6060988138001.html',
|
||||||
'md5': '8c79e54be72078b26b89e0e111c0502b',
|
'md5': 'be7d3a8c2f804eb2ab2aa5d941c359f8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
|
'id': '6060988138001',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Race highlights - Spain 2016',
|
'title': 'Race highlights - Spain 2016',
|
||||||
|
'timestamp': 1463332814,
|
||||||
|
'upload_date': '20160515',
|
||||||
|
'uploader_id': '6057949432001',
|
||||||
},
|
},
|
||||||
'params': {
|
'add_ie': ['BrightcoveNew'],
|
||||||
# m3u8 download
|
}
|
||||||
'skip_download': True,
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/6057949432001/S1WMrhjlh_default/index.html?videoId=%s'
|
||||||
},
|
|
||||||
'add_ie': ['Ooyala'],
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
bc_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
ooyala_embed_code = self._search_regex(
|
|
||||||
r'data-videoid="([^"]+)"', webpage, 'ooyala embed code')
|
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code)
|
self.BRIGHTCOVE_URL_TEMPLATE % bc_id, 'BrightcoveNew', bc_id)
|
||||||
|
|
|
@ -84,7 +84,6 @@ from .jwplatform import JWPlatformIE
|
||||||
from .digiteka import DigitekaIE
|
from .digiteka import DigitekaIE
|
||||||
from .arkena import ArkenaIE
|
from .arkena import ArkenaIE
|
||||||
from .instagram import InstagramIE
|
from .instagram import InstagramIE
|
||||||
from .liveleak import LiveLeakIE
|
|
||||||
from .threeqsdn import ThreeQSDNIE
|
from .threeqsdn import ThreeQSDNIE
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
|
@ -1640,34 +1639,6 @@ class GenericIE(InfoExtractor):
|
||||||
'upload_date': '20160409',
|
'upload_date': '20160409',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# LiveLeak embed
|
|
||||||
{
|
|
||||||
'url': 'http://www.wykop.pl/link/3088787/',
|
|
||||||
'md5': '7619da8c820e835bef21a1efa2a0fc71',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '874_1459135191',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Man shows poor quality of new apartment building',
|
|
||||||
'description': 'The wall is like a sand pile.',
|
|
||||||
'uploader': 'Lake8737',
|
|
||||||
},
|
|
||||||
'add_ie': [LiveLeakIE.ie_key()],
|
|
||||||
'params': {
|
|
||||||
'force_generic_extractor': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
# Another LiveLeak embed pattern (#13336)
|
|
||||||
{
|
|
||||||
'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2eb_1496309988',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Thief robs place where everyone was armed',
|
|
||||||
'description': 'md5:694d73ee79e535953cf2488562288eee',
|
|
||||||
'uploader': 'brazilwtf',
|
|
||||||
},
|
|
||||||
'add_ie': [LiveLeakIE.ie_key()],
|
|
||||||
},
|
|
||||||
# Duplicated embedded video URLs
|
# Duplicated embedded video URLs
|
||||||
{
|
{
|
||||||
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
|
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
|
||||||
|
@ -2744,7 +2715,6 @@ class GenericIE(InfoExtractor):
|
||||||
SoundcloudEmbedIE,
|
SoundcloudEmbedIE,
|
||||||
TuneInBaseIE,
|
TuneInBaseIE,
|
||||||
JWPlatformIE,
|
JWPlatformIE,
|
||||||
LiveLeakIE,
|
|
||||||
DBTVIE,
|
DBTVIE,
|
||||||
VideaIE,
|
VideaIE,
|
||||||
TwentyMinutenIE,
|
TwentyMinutenIE,
|
||||||
|
|
|
@ -8,6 +8,7 @@ from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -79,7 +80,11 @@ class IplaIE(InfoExtractor):
|
||||||
'Content-type': 'application/json'
|
'Content-type': 'application/json'
|
||||||
}
|
}
|
||||||
|
|
||||||
res = self._download_json('http://b2c-mobile.redefine.pl/rpc/navigation/', media_id, data=req, headers=headers)
|
res = self._download_json('https://b2c-mobile.redefine.pl/rpc/navigation/', media_id, data=req, headers=headers)
|
||||||
|
if not res.get('result'):
|
||||||
|
if res['error']['code'] == 13404:
|
||||||
|
raise ExtractorError('Video requires DRM protection', expected=True)
|
||||||
|
raise ExtractorError(f"Ipla said: {res['error']['message']} - {res['error']['data']['userMessage']}")
|
||||||
return res['result']['mediaItem']
|
return res['result']['mediaItem']
|
||||||
|
|
||||||
def get_url(self, media_id, source_id):
|
def get_url(self, media_id, source_id):
|
||||||
|
@ -93,4 +98,6 @@ class IplaIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
|
|
||||||
res = self._download_json('https://b2c-mobile.redefine.pl/rpc/drm/', media_id, data=req, headers=headers)
|
res = self._download_json('https://b2c-mobile.redefine.pl/rpc/drm/', media_id, data=req, headers=headers)
|
||||||
|
if not res.get('result'):
|
||||||
|
raise ExtractorError(f"Ipla said: {res['error']['message']} - {res['error']['data']['userMessage']}")
|
||||||
return res['result']['url']
|
return res['result']['url']
|
||||||
|
|
|
@ -1,191 +0,0 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import int_or_none
|
|
||||||
|
|
||||||
|
|
||||||
class LiveLeakIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
|
||||||
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '757_1364311680',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': 'extremely bad day for this guy..!',
|
|
||||||
'uploader': 'ljfriel2',
|
|
||||||
'title': 'Most unlucky car accident',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$'
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
|
||||||
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'f93_1390833151',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
|
|
||||||
'uploader': 'ARD_Stinkt',
|
|
||||||
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$'
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
# Prochan embed
|
|
||||||
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
|
|
||||||
'md5': '42c6d97d54f1db107958760788c5f48f',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '4f7_1392687779',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing... I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.",
|
|
||||||
'uploader': 'CapObveus',
|
|
||||||
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
|
|
||||||
'age_limit': 18,
|
|
||||||
},
|
|
||||||
'skip': 'Video is dead',
|
|
||||||
}, {
|
|
||||||
# Covers https://github.com/ytdl-org/youtube-dl/pull/5983
|
|
||||||
# Multiple resolutions
|
|
||||||
'url': 'http://www.liveleak.com/view?i=801_1409392012',
|
|
||||||
'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '801_1409392012',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
|
|
||||||
'uploader': 'bony333',
|
|
||||||
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$'
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
# Covers https://github.com/ytdl-org/youtube-dl/pull/10664#issuecomment-247439521
|
|
||||||
'url': 'http://m.liveleak.com/view?i=763_1473349649',
|
|
||||||
'add_ie': ['Youtube'],
|
|
||||||
'info_dict': {
|
|
||||||
'id': '763_1473349649',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty',
|
|
||||||
'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.',
|
|
||||||
'uploader': 'Ziz',
|
|
||||||
'upload_date': '20160908',
|
|
||||||
'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw'
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.liveleak.com/view?i=677_1439397581',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '677_1439397581',
|
|
||||||
'title': 'Fuel Depot in China Explosion caught on video',
|
|
||||||
},
|
|
||||||
'playlist_count': 3,
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
# No original video
|
|
||||||
'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _extract_urls(webpage, **kwargs):
|
|
||||||
return re.findall(
|
|
||||||
r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
|
|
||||||
webpage)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
|
|
||||||
video_description = self._og_search_description(webpage)
|
|
||||||
video_uploader = self._html_search_regex(
|
|
||||||
r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
|
|
||||||
age_limit = int_or_none(self._search_regex(
|
|
||||||
r'you confirm that you are ([0-9]+) years and over.',
|
|
||||||
webpage, 'age limit', default=None))
|
|
||||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
|
||||||
|
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
|
||||||
if not entries:
|
|
||||||
# Maybe an embed?
|
|
||||||
embed_url = self._search_regex(
|
|
||||||
r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
|
|
||||||
webpage, 'embed URL')
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': embed_url,
|
|
||||||
'id': video_id,
|
|
||||||
'title': video_title,
|
|
||||||
'description': video_description,
|
|
||||||
'uploader': video_uploader,
|
|
||||||
'age_limit': age_limit,
|
|
||||||
}
|
|
||||||
|
|
||||||
for idx, info_dict in enumerate(entries):
|
|
||||||
formats = []
|
|
||||||
for a_format in info_dict['formats']:
|
|
||||||
if not a_format.get('height'):
|
|
||||||
a_format['height'] = int_or_none(self._search_regex(
|
|
||||||
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
|
|
||||||
default=None))
|
|
||||||
formats.append(a_format)
|
|
||||||
|
|
||||||
# Removing '.*.mp4' gives the raw video, which is essentially
|
|
||||||
# the same video without the LiveLeak logo at the top (see
|
|
||||||
# https://github.com/ytdl-org/youtube-dl/pull/4768)
|
|
||||||
orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
|
|
||||||
if a_format['url'] != orig_url:
|
|
||||||
format_id = a_format.get('format_id')
|
|
||||||
format_id = 'original' + ('-' + format_id if format_id else '')
|
|
||||||
if self._is_valid_url(orig_url, video_id, format_id):
|
|
||||||
formats.append({
|
|
||||||
'format_id': format_id,
|
|
||||||
'url': orig_url,
|
|
||||||
'preference': 1,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
info_dict['formats'] = formats
|
|
||||||
|
|
||||||
# Don't append entry ID for one-video pages to keep backward compatibility
|
|
||||||
if len(entries) > 1:
|
|
||||||
info_dict['id'] = '%s_%s' % (video_id, idx + 1)
|
|
||||||
else:
|
|
||||||
info_dict['id'] = video_id
|
|
||||||
|
|
||||||
info_dict.update({
|
|
||||||
'title': video_title,
|
|
||||||
'description': video_description,
|
|
||||||
'uploader': video_uploader,
|
|
||||||
'age_limit': age_limit,
|
|
||||||
'thumbnail': video_thumbnail,
|
|
||||||
})
|
|
||||||
|
|
||||||
return self.playlist_result(entries, video_id, video_title)
|
|
||||||
|
|
||||||
|
|
||||||
class LiveLeakEmbedIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
|
|
||||||
|
|
||||||
# See generic.py for actual test cases
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
kind, video_id = re.match(self._VALID_URL, url).groups()
|
|
||||||
|
|
||||||
if kind == 'f':
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
liveleak_url = self._search_regex(
|
|
||||||
r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
|
|
||||||
webpage, 'LiveLeak URL', group='url')
|
|
||||||
else:
|
|
||||||
liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
|
|
||||||
|
|
||||||
return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
|
|
|
@ -1,25 +1,28 @@
|
||||||
|
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import datetime
|
import re
|
||||||
import functools
|
|
||||||
import json
|
import json
|
||||||
import math
|
import datetime
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
|
||||||
dict_get,
|
dict_get,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
|
||||||
InAdvancePagedList,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
float_or_none,
|
||||||
|
OnDemandPagedList,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
PostProcessingError,
|
||||||
|
str_or_none,
|
||||||
remove_start,
|
remove_start,
|
||||||
try_get,
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
@ -34,7 +37,7 @@ class NiconicoIE(InfoExtractor):
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||||
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
'md5': 'a5bad06f1347452102953f323c69da34s',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'sm22312215',
|
'id': 'sm22312215',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -162,6 +165,11 @@ class NiconicoIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||||
_NETRC_MACHINE = 'niconico'
|
_NETRC_MACHINE = 'niconico'
|
||||||
|
|
||||||
|
_API_HEADERS = {
|
||||||
|
'X-Frontend-ID': '6',
|
||||||
|
'X-Frontend-Version': '0'
|
||||||
|
}
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
|
@ -188,40 +196,92 @@ class NiconicoIE(InfoExtractor):
|
||||||
if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
|
if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
|
||||||
login_ok = False
|
login_ok = False
|
||||||
if not login_ok:
|
if not login_ok:
|
||||||
self._downloader.report_warning('unable to log in: bad username or password')
|
self.report_warning('unable to log in: bad username or password')
|
||||||
return login_ok
|
return login_ok
|
||||||
|
|
||||||
def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
|
def _get_heartbeat_info(self, info_dict):
|
||||||
def yesno(boolean):
|
|
||||||
return 'yes' if boolean else 'no'
|
|
||||||
|
|
||||||
session_api_data = api_data['video']['dmcInfo']['session_api']
|
video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
|
||||||
session_api_endpoint = session_api_data['urls'][0]
|
|
||||||
|
|
||||||
format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
|
api_data = (
|
||||||
|
info_dict.get('_api_data')
|
||||||
|
or self._parse_json(
|
||||||
|
self._html_search_regex(
|
||||||
|
'data-api-data="([^"]+)"',
|
||||||
|
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
|
||||||
|
'API data', default='{}'),
|
||||||
|
video_id))
|
||||||
|
|
||||||
|
session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
|
||||||
|
session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
|
||||||
|
|
||||||
|
def ping():
|
||||||
|
status = try_get(
|
||||||
|
self._download_json(
|
||||||
|
'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id,
|
||||||
|
query={'t': try_get(api_data, lambda x: x['media']['delivery']['trackingId'])},
|
||||||
|
note='Acquiring permission for downloading video',
|
||||||
|
headers=self._API_HEADERS),
|
||||||
|
lambda x: x['meta']['status'])
|
||||||
|
if status != 200:
|
||||||
|
self.report_warning('Failed to acquire permission for playing video. The video may not download.')
|
||||||
|
|
||||||
|
yesno = lambda x: 'yes' if x else 'no'
|
||||||
|
|
||||||
|
# m3u8 (encryption)
|
||||||
|
if try_get(api_data, lambda x: x['media']['delivery']['encryption']) is not None:
|
||||||
|
protocol = 'm3u8'
|
||||||
|
encryption = self._parse_json(session_api_data['token'], video_id)['hls_encryption']
|
||||||
|
session_api_http_parameters = {
|
||||||
|
'parameters': {
|
||||||
|
'hls_parameters': {
|
||||||
|
'encryption': {
|
||||||
|
encryption: {
|
||||||
|
'encrypted_key': try_get(api_data, lambda x: x['media']['delivery']['encryption']['encryptedKey']),
|
||||||
|
'key_uri': try_get(api_data, lambda x: x['media']['delivery']['encryption']['keyUri'])
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'transfer_preset': '',
|
||||||
|
'use_ssl': yesno(session_api_endpoint['isSsl']),
|
||||||
|
'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
|
||||||
|
'segment_duration': 6000,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# http
|
||||||
|
else:
|
||||||
|
protocol = 'http'
|
||||||
|
session_api_http_parameters = {
|
||||||
|
'parameters': {
|
||||||
|
'http_output_download_parameters': {
|
||||||
|
'use_ssl': yesno(session_api_endpoint['isSsl']),
|
||||||
|
'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
session_response = self._download_json(
|
session_response = self._download_json(
|
||||||
session_api_endpoint['url'], video_id,
|
session_api_endpoint['url'], video_id,
|
||||||
query={'_format': 'json'},
|
query={'_format': 'json'},
|
||||||
headers={'Content-Type': 'application/json'},
|
headers={'Content-Type': 'application/json'},
|
||||||
note='Downloading JSON metadata for %s' % format_id,
|
note='Downloading JSON metadata for %s' % info_dict['format_id'],
|
||||||
data=json.dumps({
|
data=json.dumps({
|
||||||
'session': {
|
'session': {
|
||||||
'client_info': {
|
'client_info': {
|
||||||
'player_id': session_api_data['player_id'],
|
'player_id': session_api_data.get('playerId'),
|
||||||
},
|
},
|
||||||
'content_auth': {
|
'content_auth': {
|
||||||
'auth_type': session_api_data['auth_types'][session_api_data['protocols'][0]],
|
'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
|
||||||
'content_key_timeout': session_api_data['content_key_timeout'],
|
'content_key_timeout': session_api_data.get('contentKeyTimeout'),
|
||||||
'service_id': 'nicovideo',
|
'service_id': 'nicovideo',
|
||||||
'service_user_id': session_api_data['service_user_id']
|
'service_user_id': session_api_data.get('serviceUserId')
|
||||||
},
|
},
|
||||||
'content_id': session_api_data['content_id'],
|
'content_id': session_api_data.get('contentId'),
|
||||||
'content_src_id_sets': [{
|
'content_src_id_sets': [{
|
||||||
'content_src_ids': [{
|
'content_src_ids': [{
|
||||||
'src_id_to_mux': {
|
'src_id_to_mux': {
|
||||||
'audio_src_ids': [audio_quality['id']],
|
'audio_src_ids': [audio_src_id],
|
||||||
'video_src_ids': [video_quality['id']],
|
'video_src_ids': [video_src_id],
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
}],
|
}],
|
||||||
|
@ -229,52 +289,81 @@ class NiconicoIE(InfoExtractor):
|
||||||
'content_uri': '',
|
'content_uri': '',
|
||||||
'keep_method': {
|
'keep_method': {
|
||||||
'heartbeat': {
|
'heartbeat': {
|
||||||
'lifetime': session_api_data['heartbeat_lifetime']
|
'lifetime': session_api_data.get('heartbeatLifetime')
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'priority': session_api_data['priority'],
|
'priority': session_api_data.get('priority'),
|
||||||
'protocol': {
|
'protocol': {
|
||||||
'name': 'http',
|
'name': 'http',
|
||||||
'parameters': {
|
'parameters': {
|
||||||
'http_parameters': {
|
'http_parameters': session_api_http_parameters
|
||||||
'parameters': {
|
|
||||||
'http_output_download_parameters': {
|
|
||||||
'use_ssl': yesno(session_api_endpoint['is_ssl']),
|
|
||||||
'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'recipe_id': session_api_data['recipe_id'],
|
'recipe_id': session_api_data.get('recipeId'),
|
||||||
'session_operation_auth': {
|
'session_operation_auth': {
|
||||||
'session_operation_auth_by_signature': {
|
'session_operation_auth_by_signature': {
|
||||||
'signature': session_api_data['signature'],
|
'signature': session_api_data.get('signature'),
|
||||||
'token': session_api_data['token'],
|
'token': session_api_data.get('token'),
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'timing_constraint': 'unlimited'
|
'timing_constraint': 'unlimited'
|
||||||
}
|
}
|
||||||
}).encode())
|
}).encode())
|
||||||
|
|
||||||
resolution = video_quality.get('resolution', {})
|
info_dict['url'] = session_response['data']['session']['content_uri']
|
||||||
|
info_dict['protocol'] = protocol
|
||||||
|
|
||||||
|
# get heartbeat info
|
||||||
|
heartbeat_info_dict = {
|
||||||
|
'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
|
||||||
|
'data': json.dumps(session_response['data']),
|
||||||
|
# interval, convert milliseconds to seconds, then halve to make a buffer.
|
||||||
|
'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
|
||||||
|
'ping': ping
|
||||||
|
}
|
||||||
|
|
||||||
|
return info_dict, heartbeat_info_dict
|
||||||
|
|
||||||
|
def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
|
||||||
|
def parse_format_id(id_code):
|
||||||
|
mobj = re.match(r'''(?x)
|
||||||
|
(?:archive_)?
|
||||||
|
(?:(?P<codec>[^_]+)_)?
|
||||||
|
(?:(?P<br>[\d]+)kbps_)?
|
||||||
|
(?:(?P<res>[\d+]+)p_)?
|
||||||
|
''', '%s_' % id_code)
|
||||||
|
return mobj.groupdict() if mobj else {}
|
||||||
|
|
||||||
|
protocol = 'niconico_dmc'
|
||||||
|
format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
|
||||||
|
vdict = parse_format_id(video_quality['id'])
|
||||||
|
adict = parse_format_id(audio_quality['id'])
|
||||||
|
resolution = try_get(video_quality, lambda x: x['metadata']['resolution'], dict) or {'height': vdict.get('res')}
|
||||||
|
vbr = try_get(video_quality, lambda x: x['metadata']['bitrate'], float)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'url': session_response['data']['session']['content_uri'],
|
'url': '%s:%s/%s/%s' % (protocol, video_id, video_quality['id'], audio_quality['id']),
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
|
'format_note': 'DMC %s' % try_get(video_quality, lambda x: x['metadata']['label'], compat_str),
|
||||||
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
||||||
'abr': float_or_none(audio_quality.get('bitrate'), 1000),
|
'vcodec': vdict.get('codec'),
|
||||||
'vbr': float_or_none(video_quality.get('bitrate'), 1000),
|
'acodec': adict.get('codec'),
|
||||||
'height': resolution.get('height'),
|
'vbr': float_or_none(vbr, 1000) or float_or_none(vdict.get('br')),
|
||||||
'width': resolution.get('width'),
|
'abr': float_or_none(audio_quality.get('bitrate'), 1000) or float_or_none(adict.get('br')),
|
||||||
|
'height': int_or_none(resolution.get('height', vdict.get('res'))),
|
||||||
|
'width': int_or_none(resolution.get('width')),
|
||||||
|
'quality': -2 if 'low' in format_id else -1, # Default quality value is -1
|
||||||
|
'protocol': protocol,
|
||||||
|
'http_headers': {
|
||||||
|
'Origin': 'https://www.nicovideo.jp',
|
||||||
|
'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
# Get video webpage. We are not actually interested in it for normal
|
# Get video webpage for API data.
|
||||||
# cases, but need the cookies in order to be able to download the
|
|
||||||
# info webpage
|
|
||||||
webpage, handle = self._download_webpage_handle(
|
webpage, handle = self._download_webpage_handle(
|
||||||
'http://www.nicovideo.jp/watch/' + video_id, video_id)
|
'http://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||||
if video_id.startswith('so'):
|
if video_id.startswith('so'):
|
||||||
|
@ -284,86 +373,136 @@ class NiconicoIE(InfoExtractor):
|
||||||
'data-api-data="([^"]+)"', webpage,
|
'data-api-data="([^"]+)"', webpage,
|
||||||
'API data', default='{}'), video_id)
|
'API data', default='{}'), video_id)
|
||||||
|
|
||||||
def _format_id_from_url(video_url):
|
def get_video_info_web(items):
|
||||||
return 'economy' if video_real_url.endswith('low') else 'normal'
|
return dict_get(api_data['video'], items)
|
||||||
|
|
||||||
try:
|
# Get video info
|
||||||
video_real_url = api_data['video']['smileInfo']['url']
|
video_info_xml = self._download_xml(
|
||||||
except KeyError: # Flash videos
|
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
|
||||||
# Get flv info
|
video_id, note='Downloading video info page')
|
||||||
flv_info_webpage = self._download_webpage(
|
|
||||||
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
|
||||||
video_id, 'Downloading flv info')
|
|
||||||
|
|
||||||
flv_info = compat_parse_qs(flv_info_webpage)
|
def get_video_info_xml(items):
|
||||||
if 'url' not in flv_info:
|
if not isinstance(items, list):
|
||||||
if 'deleted' in flv_info:
|
items = [items]
|
||||||
raise ExtractorError('The video has been deleted.',
|
for item in items:
|
||||||
expected=True)
|
ret = xpath_text(video_info_xml, './/' + item)
|
||||||
elif 'closed' in flv_info:
|
if ret:
|
||||||
raise ExtractorError('Niconico videos now require logging in',
|
return ret
|
||||||
expected=True)
|
|
||||||
elif 'error' in flv_info:
|
|
||||||
raise ExtractorError('%s reports error: %s' % (
|
|
||||||
self.IE_NAME, flv_info['error'][0]), expected=True)
|
|
||||||
else:
|
|
||||||
raise ExtractorError('Unable to find video URL')
|
|
||||||
|
|
||||||
video_info_xml = self._download_xml(
|
if get_video_info_xml('error'):
|
||||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
|
error_code = get_video_info_xml('code')
|
||||||
video_id, note='Downloading video info page')
|
|
||||||
|
|
||||||
def get_video_info(items):
|
if error_code == 'DELETED':
|
||||||
if not isinstance(items, list):
|
raise ExtractorError('The video has been deleted.',
|
||||||
items = [items]
|
expected=True)
|
||||||
for item in items:
|
elif error_code == 'NOT_FOUND':
|
||||||
ret = xpath_text(video_info_xml, './/' + item)
|
raise ExtractorError('The video is not found.',
|
||||||
if ret:
|
expected=True)
|
||||||
return ret
|
elif error_code == 'COMMUNITY':
|
||||||
|
self.to_screen('%s: The video is community members only.' % video_id)
|
||||||
|
else:
|
||||||
|
raise ExtractorError('%s reports error: %s' % (self.IE_NAME, error_code))
|
||||||
|
|
||||||
video_real_url = flv_info['url'][0]
|
# Start extracting video formats
|
||||||
|
formats = []
|
||||||
|
|
||||||
extension = get_video_info('movie_type')
|
# Get HTML5 videos info
|
||||||
if not extension:
|
quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie'])
|
||||||
extension = determine_ext(video_real_url)
|
if not quality_info:
|
||||||
|
raise ExtractorError('The video can\'t be downloaded', expected=True)
|
||||||
|
|
||||||
formats = [{
|
for audio_quality in quality_info.get('audios') or {}:
|
||||||
'url': video_real_url,
|
for video_quality in quality_info.get('videos') or {}:
|
||||||
'ext': extension,
|
if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
|
||||||
'format_id': _format_id_from_url(video_real_url),
|
continue
|
||||||
}]
|
formats.append(self._extract_format_for_quality(
|
||||||
else:
|
api_data, video_id, audio_quality, video_quality))
|
||||||
formats = []
|
|
||||||
|
|
||||||
dmc_info = api_data['video'].get('dmcInfo')
|
# Get flv/swf info
|
||||||
if dmc_info: # "New" HTML5 videos
|
timestamp = None
|
||||||
quality_info = dmc_info['quality']
|
video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url'])
|
||||||
for audio_quality in quality_info['audios']:
|
if video_real_url:
|
||||||
for video_quality in quality_info['videos']:
|
is_economy = video_real_url.endswith('low')
|
||||||
if not audio_quality['available'] or not video_quality['available']:
|
|
||||||
continue
|
|
||||||
formats.append(self._extract_format_for_quality(
|
|
||||||
api_data, video_id, audio_quality, video_quality))
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
if is_economy:
|
||||||
else: # "Old" HTML5 videos
|
self.report_warning('Site is currently in economy mode! You will only have access to lower quality streams')
|
||||||
formats = [{
|
|
||||||
|
# Invoking ffprobe to determine resolution
|
||||||
|
pp = FFmpegPostProcessor(self._downloader)
|
||||||
|
cookies = self._get_cookies('https://nicovideo.jp').output(header='', sep='; path=/; domain=nicovideo.jp;\n')
|
||||||
|
|
||||||
|
self.to_screen('%s: %s' % (video_id, 'Checking smile format with ffprobe'))
|
||||||
|
|
||||||
|
try:
|
||||||
|
metadata = pp.get_metadata_object(video_real_url, ['-cookies', cookies])
|
||||||
|
except PostProcessingError as err:
|
||||||
|
raise ExtractorError(err.msg, expected=True)
|
||||||
|
|
||||||
|
v_stream = a_stream = {}
|
||||||
|
|
||||||
|
# Some complex swf files doesn't have video stream (e.g. nm4809023)
|
||||||
|
for stream in metadata['streams']:
|
||||||
|
if stream['codec_type'] == 'video':
|
||||||
|
v_stream = stream
|
||||||
|
elif stream['codec_type'] == 'audio':
|
||||||
|
a_stream = stream
|
||||||
|
|
||||||
|
# Community restricted videos seem to have issues with the thumb API not returning anything at all
|
||||||
|
filesize = int(
|
||||||
|
(get_video_info_xml('size_high') if not is_economy else get_video_info_xml('size_low'))
|
||||||
|
or metadata['format']['size']
|
||||||
|
)
|
||||||
|
extension = (
|
||||||
|
get_video_info_xml('movie_type')
|
||||||
|
or 'mp4' if 'mp4' in metadata['format']['format_name'] else metadata['format']['format_name']
|
||||||
|
)
|
||||||
|
|
||||||
|
# 'creation_time' tag on video stream of re-encoded SMILEVIDEO mp4 files are '1970-01-01T00:00:00.000000Z'.
|
||||||
|
timestamp = (
|
||||||
|
parse_iso8601(get_video_info_web('first_retrieve'))
|
||||||
|
or unified_timestamp(get_video_info_web('postedDateTime'))
|
||||||
|
)
|
||||||
|
metadata_timestamp = (
|
||||||
|
parse_iso8601(try_get(v_stream, lambda x: x['tags']['creation_time']))
|
||||||
|
or timestamp if extension != 'mp4' else 0
|
||||||
|
)
|
||||||
|
|
||||||
|
# According to compconf, smile videos from pre-2017 are always better quality than their DMC counterparts
|
||||||
|
smile_threshold_timestamp = parse_iso8601('2016-12-08T00:00:00+09:00')
|
||||||
|
|
||||||
|
is_source = timestamp < smile_threshold_timestamp or metadata_timestamp > 0
|
||||||
|
|
||||||
|
# If movie file size is unstable, old server movie is not source movie.
|
||||||
|
if filesize > 1:
|
||||||
|
formats.append({
|
||||||
'url': video_real_url,
|
'url': video_real_url,
|
||||||
'ext': 'mp4',
|
'format_id': 'smile' if not is_economy else 'smile_low',
|
||||||
'format_id': _format_id_from_url(video_real_url),
|
'format_note': 'SMILEVIDEO source' if not is_economy else 'SMILEVIDEO low quality',
|
||||||
}]
|
'ext': extension,
|
||||||
|
'container': extension,
|
||||||
|
'vcodec': v_stream.get('codec_name'),
|
||||||
|
'acodec': a_stream.get('codec_name'),
|
||||||
|
# Some complex swf files doesn't have total bit rate metadata (e.g. nm6049209)
|
||||||
|
'tbr': int_or_none(metadata['format'].get('bit_rate'), scale=1000),
|
||||||
|
'vbr': int_or_none(v_stream.get('bit_rate'), scale=1000),
|
||||||
|
'abr': int_or_none(a_stream.get('bit_rate'), scale=1000),
|
||||||
|
'height': int_or_none(v_stream.get('height')),
|
||||||
|
'width': int_or_none(v_stream.get('width')),
|
||||||
|
'source_preference': 5 if not is_economy else -2,
|
||||||
|
'quality': 5 if is_source and not is_economy else None,
|
||||||
|
'filesize': filesize
|
||||||
|
})
|
||||||
|
|
||||||
def get_video_info(items):
|
self._sort_formats(formats)
|
||||||
return dict_get(api_data['video'], items)
|
|
||||||
|
|
||||||
# Start extracting information
|
# Start extracting information
|
||||||
title = get_video_info('title')
|
title = (
|
||||||
if not title:
|
get_video_info_xml('title') # prefer to get the untranslated original title
|
||||||
title = self._og_search_title(webpage, default=None)
|
or get_video_info_web(['originalTitle', 'title'])
|
||||||
if not title:
|
or self._og_search_title(webpage, default=None)
|
||||||
title = self._html_search_regex(
|
or self._html_search_regex(
|
||||||
r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
|
r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
|
||||||
webpage, 'video title')
|
webpage, 'video title'))
|
||||||
|
|
||||||
watch_api_data_string = self._html_search_regex(
|
watch_api_data_string = self._html_search_regex(
|
||||||
r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
|
r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
|
||||||
|
@ -372,14 +511,15 @@ class NiconicoIE(InfoExtractor):
|
||||||
video_detail = watch_api_data.get('videoDetail', {})
|
video_detail = watch_api_data.get('videoDetail', {})
|
||||||
|
|
||||||
thumbnail = (
|
thumbnail = (
|
||||||
get_video_info(['thumbnail_url', 'thumbnailURL'])
|
self._html_search_regex(r'<meta property="og:image" content="([^"]+)">', webpage, 'thumbnail data', default=None)
|
||||||
|
or dict_get( # choose highest from 720p to 240p
|
||||||
|
get_video_info_web('thumbnail'),
|
||||||
|
['ogp', 'player', 'largeUrl', 'middleUrl', 'url'])
|
||||||
or self._html_search_meta('image', webpage, 'thumbnail', default=None)
|
or self._html_search_meta('image', webpage, 'thumbnail', default=None)
|
||||||
or video_detail.get('thumbnail'))
|
or video_detail.get('thumbnail'))
|
||||||
|
|
||||||
description = get_video_info('description')
|
description = get_video_info_web('description')
|
||||||
|
|
||||||
timestamp = (parse_iso8601(get_video_info('first_retrieve'))
|
|
||||||
or unified_timestamp(get_video_info('postedDateTime')))
|
|
||||||
if not timestamp:
|
if not timestamp:
|
||||||
match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
|
match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
|
||||||
if match:
|
if match:
|
||||||
|
@ -388,19 +528,25 @@ class NiconicoIE(InfoExtractor):
|
||||||
timestamp = parse_iso8601(
|
timestamp = parse_iso8601(
|
||||||
video_detail['postedAt'].replace('/', '-'),
|
video_detail['postedAt'].replace('/', '-'),
|
||||||
delimiter=' ', timezone=datetime.timedelta(hours=9))
|
delimiter=' ', timezone=datetime.timedelta(hours=9))
|
||||||
|
timestamp = timestamp or try_get(api_data, lambda x: parse_iso8601(x['video']['registeredAt']))
|
||||||
|
|
||||||
view_count = int_or_none(get_video_info(['view_counter', 'viewCount']))
|
view_count = int_or_none(get_video_info_web(['view_counter', 'viewCount']))
|
||||||
if not view_count:
|
if not view_count:
|
||||||
match = self._html_search_regex(
|
match = self._html_search_regex(
|
||||||
r'>Views: <strong[^>]*>([^<]+)</strong>',
|
r'>Views: <strong[^>]*>([^<]+)</strong>',
|
||||||
webpage, 'view count', default=None)
|
webpage, 'view count', default=None)
|
||||||
if match:
|
if match:
|
||||||
view_count = int_or_none(match.replace(',', ''))
|
view_count = int_or_none(match.replace(',', ''))
|
||||||
view_count = view_count or video_detail.get('viewCount')
|
view_count = (
|
||||||
|
view_count
|
||||||
|
or video_detail.get('viewCount')
|
||||||
|
or try_get(api_data, lambda x: x['video']['count']['view']))
|
||||||
|
|
||||||
|
comment_count = (
|
||||||
|
int_or_none(get_video_info_web('comment_num'))
|
||||||
|
or video_detail.get('commentCount')
|
||||||
|
or try_get(api_data, lambda x: x['video']['count']['comment']))
|
||||||
|
|
||||||
comment_count = (int_or_none(get_video_info('comment_num'))
|
|
||||||
or video_detail.get('commentCount')
|
|
||||||
or try_get(api_data, lambda x: x['thread']['commentCount']))
|
|
||||||
if not comment_count:
|
if not comment_count:
|
||||||
match = self._html_search_regex(
|
match = self._html_search_regex(
|
||||||
r'>Comments: <strong[^>]*>([^<]+)</strong>',
|
r'>Comments: <strong[^>]*>([^<]+)</strong>',
|
||||||
|
@ -409,22 +555,41 @@ class NiconicoIE(InfoExtractor):
|
||||||
comment_count = int_or_none(match.replace(',', ''))
|
comment_count = int_or_none(match.replace(',', ''))
|
||||||
|
|
||||||
duration = (parse_duration(
|
duration = (parse_duration(
|
||||||
get_video_info('length')
|
get_video_info_web('length')
|
||||||
or self._html_search_meta(
|
or self._html_search_meta(
|
||||||
'video:duration', webpage, 'video duration', default=None))
|
'video:duration', webpage, 'video duration', default=None))
|
||||||
or video_detail.get('length')
|
or video_detail.get('length')
|
||||||
or get_video_info('duration'))
|
or get_video_info_web('duration'))
|
||||||
|
|
||||||
webpage_url = get_video_info('watch_url') or url
|
webpage_url = get_video_info_web('watch_url') or url
|
||||||
|
|
||||||
|
# for channel movie and community movie
|
||||||
|
channel_id = try_get(
|
||||||
|
api_data,
|
||||||
|
(lambda x: x['channel']['globalId'],
|
||||||
|
lambda x: x['community']['globalId']))
|
||||||
|
channel = try_get(
|
||||||
|
api_data,
|
||||||
|
(lambda x: x['channel']['name'],
|
||||||
|
lambda x: x['community']['name']))
|
||||||
|
|
||||||
# Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
|
# Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
|
||||||
# in the JSON, which will cause None to be returned instead of {}.
|
# in the JSON, which will cause None to be returned instead of {}.
|
||||||
owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
|
owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
|
||||||
uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
|
uploader_id = str_or_none(
|
||||||
uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')
|
get_video_info_web(['ch_id', 'user_id'])
|
||||||
|
or owner.get('id')
|
||||||
|
or channel_id
|
||||||
|
)
|
||||||
|
uploader = (
|
||||||
|
get_video_info_web(['ch_name', 'user_nickname'])
|
||||||
|
or owner.get('nickname')
|
||||||
|
or channel
|
||||||
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'_api_data': api_data,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
@ -432,6 +597,8 @@ class NiconicoIE(InfoExtractor):
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
|
'channel': channel,
|
||||||
|
'channel_id': channel_id,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
|
@ -440,7 +607,7 @@ class NiconicoIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class NiconicoPlaylistIE(InfoExtractor):
|
class NiconicoPlaylistIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
||||||
|
@ -456,60 +623,77 @@ class NiconicoPlaylistIE(InfoExtractor):
|
||||||
'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
|
'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_PAGE_SIZE = 100
|
|
||||||
|
|
||||||
def _call_api(self, list_id, resource, query):
|
_API_HEADERS = {
|
||||||
return self._download_json(
|
'X-Frontend-ID': '6',
|
||||||
'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
|
'X-Frontend-Version': '0'
|
||||||
'Downloading %s JSON metatdata' % resource, query=query,
|
}
|
||||||
headers={'X-Frontend-Id': 6})['data']['mylist']
|
|
||||||
|
|
||||||
def _parse_owner(self, item):
|
|
||||||
owner = item.get('owner') or {}
|
|
||||||
if owner:
|
|
||||||
return {
|
|
||||||
'uploader': owner.get('name'),
|
|
||||||
'uploader_id': owner.get('id'),
|
|
||||||
}
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def _fetch_page(self, list_id, page):
|
|
||||||
page += 1
|
|
||||||
items = self._call_api(list_id, 'page %d' % page, {
|
|
||||||
'page': page,
|
|
||||||
'pageSize': self._PAGE_SIZE,
|
|
||||||
})['items']
|
|
||||||
for item in items:
|
|
||||||
video = item.get('video') or {}
|
|
||||||
video_id = video.get('id')
|
|
||||||
if not video_id:
|
|
||||||
continue
|
|
||||||
count = video.get('count') or {}
|
|
||||||
get_count = lambda x: int_or_none(count.get(x))
|
|
||||||
info = {
|
|
||||||
'_type': 'url',
|
|
||||||
'id': video_id,
|
|
||||||
'title': video.get('title'),
|
|
||||||
'url': 'https://www.nicovideo.jp/watch/' + video_id,
|
|
||||||
'description': video.get('shortDescription'),
|
|
||||||
'duration': int_or_none(video.get('duration')),
|
|
||||||
'view_count': get_count('view'),
|
|
||||||
'comment_count': get_count('comment'),
|
|
||||||
'ie_key': NiconicoIE.ie_key(),
|
|
||||||
}
|
|
||||||
info.update(self._parse_owner(video))
|
|
||||||
yield info
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
list_id = self._match_id(url)
|
list_id = self._match_id(url)
|
||||||
mylist = self._call_api(list_id, 'list', {
|
|
||||||
'pageSize': 1,
|
def get_page_data(pagenum, pagesize):
|
||||||
})
|
return self._download_json(
|
||||||
entries = InAdvancePagedList(
|
'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
|
||||||
functools.partial(self._fetch_page, list_id),
|
query={'page': 1 + pagenum, 'pageSize': pagesize},
|
||||||
math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE),
|
headers=self._API_HEADERS).get('data').get('mylist')
|
||||||
self._PAGE_SIZE)
|
|
||||||
result = self.playlist_result(
|
data = get_page_data(0, 1)
|
||||||
entries, list_id, mylist.get('name'), mylist.get('description'))
|
title = data.get('name')
|
||||||
result.update(self._parse_owner(mylist))
|
description = data.get('description')
|
||||||
return result
|
uploader = data.get('owner').get('name')
|
||||||
|
uploader_id = data.get('owner').get('id')
|
||||||
|
|
||||||
|
def pagefunc(pagenum):
|
||||||
|
data = get_page_data(pagenum, 25)
|
||||||
|
return ({
|
||||||
|
'_type': 'url',
|
||||||
|
'url': 'http://www.nicovideo.jp/watch/' + item.get('watchId'),
|
||||||
|
} for item in data.get('items'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': list_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'entries': OnDemandPagedList(pagefunc, 25),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NiconicoUserIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.nicovideo.jp/user/419948',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '419948',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 101,
|
||||||
|
}
|
||||||
|
_API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
|
||||||
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
|
_API_HEADERS = {
|
||||||
|
'X-Frontend-ID': '6',
|
||||||
|
'X-Frontend-Version': '0'
|
||||||
|
}
|
||||||
|
|
||||||
|
def _entries(self, list_id, ):
|
||||||
|
total_count = 1
|
||||||
|
count = page_num = 0
|
||||||
|
while count < total_count:
|
||||||
|
json_parsed = self._download_json(
|
||||||
|
self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
|
||||||
|
headers=self._API_HEADERS,
|
||||||
|
note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
|
||||||
|
if not page_num:
|
||||||
|
total_count = int_or_none(json_parsed['data'].get('totalCount'))
|
||||||
|
for entry in json_parsed["data"]["items"]:
|
||||||
|
count += 1
|
||||||
|
yield self.url_result('https://www.nicovideo.jp/watch/%s' % entry['id'])
|
||||||
|
page_num += 1
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
list_id = self._match_id(url)
|
||||||
|
return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())
|
||||||
|
|
|
@ -58,7 +58,7 @@ class NRKBaseIE(InfoExtractor):
|
||||||
|
|
||||||
def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
|
def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
urljoin('http://psapi.nrk.no/', path),
|
urljoin('https://psapi.nrk.no/', path),
|
||||||
video_id, note or 'Downloading %s JSON' % item,
|
video_id, note or 'Downloading %s JSON' % item,
|
||||||
fatal=fatal, query=query,
|
fatal=fatal, query=query,
|
||||||
headers={'Accept-Encoding': 'gzip, deflate, br'})
|
headers={'Accept-Encoding': 'gzip, deflate, br'})
|
||||||
|
|
|
@ -98,6 +98,9 @@ class ORFTVthekIE(InfoExtractor):
|
||||||
elif ext == 'f4m':
|
elif ext == 'f4m':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
src, video_id, f4m_id=format_id, fatal=False))
|
src, video_id, f4m_id=format_id, fatal=False))
|
||||||
|
elif ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
src, video_id, mpd_id=format_id, fatal=False))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
|
@ -140,6 +143,25 @@ class ORFTVthekIE(InfoExtractor):
|
||||||
})
|
})
|
||||||
|
|
||||||
upload_date = unified_strdate(sd.get('created_date'))
|
upload_date = unified_strdate(sd.get('created_date'))
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
preview = sd.get('preview_image_url')
|
||||||
|
if preview:
|
||||||
|
thumbnails.append({
|
||||||
|
'id': 'preview',
|
||||||
|
'url': preview,
|
||||||
|
'preference': 0,
|
||||||
|
})
|
||||||
|
image = sd.get('image_full_url')
|
||||||
|
if not image and len(data_jsb) == 1:
|
||||||
|
image = self._og_search_thumbnail(webpage)
|
||||||
|
if image:
|
||||||
|
thumbnails.append({
|
||||||
|
'id': 'full',
|
||||||
|
'url': image,
|
||||||
|
'preference': 1,
|
||||||
|
})
|
||||||
|
|
||||||
entries.append({
|
entries.append({
|
||||||
'_type': 'video',
|
'_type': 'video',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -149,7 +171,7 @@ class ORFTVthekIE(InfoExtractor):
|
||||||
'description': sd.get('description'),
|
'description': sd.get('description'),
|
||||||
'duration': int_or_none(sd.get('duration_in_seconds')),
|
'duration': int_or_none(sd.get('duration_in_seconds')),
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'thumbnail': sd.get('image_full_url'),
|
'thumbnails': thumbnails,
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|
|
@ -21,7 +21,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class PeerTubeBaseExtractor(SelfhostedInfoExtractor):
|
class PeerTubeBaseExtractor(SelfhostedInfoExtractor):
|
||||||
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
|
_UUID_RE = r'[\da-zA-Z]{22}|[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
|
||||||
_API_BASE = 'https://%s/api/v1/%s/%s/%s'
|
_API_BASE = 'https://%s/api/v1/%s/%s/%s'
|
||||||
_SH_VALID_CONTENT_STRINGS = (
|
_SH_VALID_CONTENT_STRINGS = (
|
||||||
'<title>PeerTube<',
|
'<title>PeerTube<',
|
||||||
|
@ -180,16 +180,16 @@ class PeerTubeBaseExtractor(SelfhostedInfoExtractor):
|
||||||
|
|
||||||
class PeerTubeSHIE(PeerTubeBaseExtractor):
|
class PeerTubeSHIE(PeerTubeBaseExtractor):
|
||||||
_VALID_URL = r'peertube:(?P<host>[^:]+):(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
|
_VALID_URL = r'peertube:(?P<host>[^:]+):(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
|
||||||
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)|api/v\d/videos)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
|
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)|api/v\d/videos|w)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
|
'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
|
||||||
'md5': '9bed8c0137913e17b86334e5885aacff',
|
'md5': '8563064d245a4be5705bddb22bb00a28',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
|
'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'What is PeerTube?',
|
'title': 'What is PeerTube?',
|
||||||
'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
|
'description': 'md5:96adbaf219b4d41747bfc5937df0b017',
|
||||||
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||||
'timestamp': 1538391166,
|
'timestamp': 1538391166,
|
||||||
'upload_date': '20181001',
|
'upload_date': '20181001',
|
||||||
|
@ -220,6 +220,27 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):
|
||||||
'upload_date': '20200420',
|
'upload_date': '20200420',
|
||||||
'uploader': 'Drew DeVault',
|
'uploader': 'Drew DeVault',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# new url scheme since PeerTube 3.3
|
||||||
|
'url': 'https://peertube2.cpy.re/w/3fbif9S3WmtTP8gGsC5HBd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '122d093a-1ede-43bd-bd34-59d2931ffc5e',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'E2E tests',
|
||||||
|
'uploader_id': '37855',
|
||||||
|
'timestamp': 1589276219,
|
||||||
|
'upload_date': '20200512',
|
||||||
|
'uploader': 'chocobozzz',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://peertube2.cpy.re/w/122d093a-1ede-43bd-bd34-59d2931ffc5e',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://peertube2.cpy.re/api/v1/videos/3fbif9S3WmtTP8gGsC5HBd',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'peertube:peertube2.cpy.re:3fbif9S3WmtTP8gGsC5HBd',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
|
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -289,7 +310,7 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):
|
||||||
|
|
||||||
description = None
|
description = None
|
||||||
if webpage:
|
if webpage:
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage, default=None)
|
||||||
if not description:
|
if not description:
|
||||||
full_description = self._call_api(
|
full_description = self._call_api(
|
||||||
host, 'videos', video_id, 'description', note='Downloading description JSON',
|
host, 'videos', video_id, 'description', note='Downloading description JSON',
|
||||||
|
@ -305,7 +326,7 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):
|
||||||
|
|
||||||
class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
|
class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
|
||||||
_VALID_URL = r'peertube:playlist:(?P<host>[^:]+):(?P<id>.+)'
|
_VALID_URL = r'peertube:playlist:(?P<host>[^:]+):(?P<id>.+)'
|
||||||
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)/playlist|api/v\d/video-playlists)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
|
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)/playlist|api/v\d/video-playlists|w/p)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://video.internet-czas-dzialac.pl/videos/watch/playlist/3c81b894-acde-4539-91a2-1748b208c14c?playlistPosition=1',
|
'url': 'https://video.internet-czas-dzialac.pl/videos/watch/playlist/3c81b894-acde-4539-91a2-1748b208c14c?playlistPosition=1',
|
||||||
|
@ -316,6 +337,9 @@ class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
|
||||||
'uploader': 'Internet. Czas działać!',
|
'uploader': 'Internet. Czas działać!',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 14,
|
'playlist_mincount': 14,
|
||||||
|
}, {
|
||||||
|
'url': 'https://peertube2.cpy.re/w/p/hrAdcvjkMMkHJ28upnoN21',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _selfhosted_extract(self, url, webpage=None):
|
def _selfhosted_extract(self, url, webpage=None):
|
||||||
|
@ -352,18 +376,21 @@ class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
|
||||||
|
|
||||||
class PeerTubeChannelSHIE(PeerTubeBaseExtractor):
|
class PeerTubeChannelSHIE(PeerTubeBaseExtractor):
|
||||||
_VALID_URL = r'peertube:channel:(?P<host>[^:]+):(?P<id>.+)'
|
_VALID_URL = r'peertube:channel:(?P<host>[^:]+):(?P<id>.+)'
|
||||||
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:api/v\d/)?video-channels/(?P<id>[^/?#]+)(?:/videos)?'
|
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:(?:api/v\d/)?video-channels|c)/(?P<id>[^/?#]+)(?:/videos)?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://video.internet-czas-dzialac.pl/video-channels/internet_czas_dzialac/videos',
|
'url': 'https://video.internet-czas-dzialac.pl/video-channels/internet_czas_dzialac/videos',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2',
|
'id': '2',
|
||||||
'title': 'internet_czas_dzialac',
|
'title': 'Internet. Czas działać!',
|
||||||
'description': 'md5:4d2e215ea0d9ae4501a556ef6e9a5308',
|
'description': 'md5:ac35d70f6625b04b189e0b4b76e62e17',
|
||||||
'uploader_id': 3,
|
'uploader_id': 3,
|
||||||
'uploader': 'Internet. Czas działać!',
|
'uploader': 'Internet. Czas działać!',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 14,
|
'playlist_mincount': 14,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.internet-czas-dzialac.pl/c/internet_czas_dzialac',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _selfhosted_extract(self, url, webpage=None):
|
def _selfhosted_extract(self, url, webpage=None):
|
||||||
|
@ -401,18 +428,21 @@ class PeerTubeChannelSHIE(PeerTubeBaseExtractor):
|
||||||
|
|
||||||
class PeerTubeAccountSHIE(PeerTubeBaseExtractor):
|
class PeerTubeAccountSHIE(PeerTubeBaseExtractor):
|
||||||
_VALID_URL = r'peertube:account:(?P<host>[^:]+):(?P<id>.+)'
|
_VALID_URL = r'peertube:account:(?P<host>[^:]+):(?P<id>.+)'
|
||||||
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:api/v\d/)?accounts/(?P<id>[^/?#]+)(?:/video(?:s|-channels))?'
|
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:(?:api/v\d/)?accounts|a)/(?P<id>[^/?#]+)(?:/video(?:s|-channels))?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://video.internet-czas-dzialac.pl/accounts/icd/video-channels',
|
'url': 'https://video.internet-czas-dzialac.pl/accounts/icd/video-channels',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3',
|
'id': '3',
|
||||||
'description': 'md5:ab3c9b934dd39030eea1c9fe76079870',
|
'description': 'md5:ac35d70f6625b04b189e0b4b76e62e17',
|
||||||
'uploader': 'Internet. Czas działać!',
|
'uploader': 'Internet. Czas działać!',
|
||||||
'title': 'Internet. Czas działać!',
|
'title': 'Internet. Czas działać!',
|
||||||
'uploader_id': 3,
|
'uploader_id': 3,
|
||||||
},
|
},
|
||||||
'playlist_mincount': 14,
|
'playlist_mincount': 14,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.internet-czas-dzialac.pl/a/icd',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _selfhosted_extract(self, url, webpage=None):
|
def _selfhosted_extract(self, url, webpage=None):
|
||||||
|
|
|
@ -91,6 +91,14 @@ class PolskieRadioIE(PolskieRadioBaseExtractor):
|
||||||
'upload_date': '20201116',
|
'upload_date': '20201116',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
}, {
|
||||||
|
# PR4 audition - other frontend
|
||||||
|
'url': 'https://www.polskieradio.pl/10/6071/Artykul/2610977,Poglos-29-pazdziernika-godz-2301',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2610977',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Pogłos 29 października godz. 23:01',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis',
|
'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -113,24 +121,34 @@ class PolskieRadioIE(PolskieRadioBaseExtractor):
|
||||||
|
|
||||||
content = self._search_regex(
|
content = self._search_regex(
|
||||||
r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>',
|
r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>',
|
||||||
webpage, 'content')
|
webpage, 'content', default=None)
|
||||||
|
|
||||||
timestamp = unified_timestamp(self._html_search_regex(
|
timestamp = unified_timestamp(self._html_search_regex(
|
||||||
r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
|
r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
|
||||||
webpage, 'timestamp', fatal=False))
|
webpage, 'timestamp', default=None))
|
||||||
|
|
||||||
thumbnail_url = self._og_search_thumbnail(webpage)
|
thumbnail_url = self._og_search_thumbnail(webpage, default=None)
|
||||||
|
|
||||||
title = self._og_search_title(webpage).strip()
|
title = self._og_search_title(webpage).strip()
|
||||||
|
|
||||||
|
description = strip_or_none(self._og_search_description(webpage, default=None))
|
||||||
|
|
||||||
|
if not content:
|
||||||
|
return {
|
||||||
|
'id': playlist_id,
|
||||||
|
'url': 'https:' + self._search_regex(r"source:\s*'(//static\.prsa\.pl/[^']+)'", webpage, 'audition record url'),
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'thumbnail': thumbnail_url,
|
||||||
|
}
|
||||||
|
|
||||||
entries = self._extract_webpage_player_entries(content, playlist_id, {
|
entries = self._extract_webpage_player_entries(content, playlist_id, {
|
||||||
'title': title,
|
'title': title,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'thumbnail': thumbnail_url,
|
'thumbnail': thumbnail_url,
|
||||||
})
|
})
|
||||||
|
|
||||||
description = strip_or_none(self._og_search_description(webpage))
|
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id, title, description)
|
return self.playlist_result(entries, playlist_id, title, description)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,7 @@ from ..utils import (
|
||||||
class PornHubBaseIE(InfoExtractor):
|
class PornHubBaseIE(InfoExtractor):
|
||||||
_REQUIRES_PLAYWRIGHT = True
|
_REQUIRES_PLAYWRIGHT = True
|
||||||
_NETRC_MACHINE = 'pornhub'
|
_NETRC_MACHINE = 'pornhub'
|
||||||
|
_PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubthbh7ap3u\.onion)'
|
||||||
|
|
||||||
def _download_webpage_handle(self, *args, **kwargs):
|
def _download_webpage_handle(self, *args, **kwargs):
|
||||||
def dl(*args, **kwargs):
|
def dl(*args, **kwargs):
|
||||||
|
@ -125,11 +126,13 @@ class PornHubIE(PornHubBaseIE):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
(?:[^/]+\.)?
|
||||||
|
%s
|
||||||
|
/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||||
(?:www\.)?thumbzilla\.com/video/
|
(?:www\.)?thumbzilla\.com/video/
|
||||||
)
|
)
|
||||||
(?P<id>[\da-z]+)
|
(?P<id>[\da-z]+)
|
||||||
'''
|
''' % PornHubBaseIE._PORNHUB_HOST_RE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -238,6 +241,13 @@ class PornHubIE(PornHubBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
|
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# geo restricted
|
||||||
|
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://pornhubthbh7ap3u.onion/view_video.php?viewkey=ph5a9813bfa7156',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -277,6 +287,11 @@ class PornHubIE(PornHubBaseIE):
|
||||||
'PornHub said: %s' % error_msg,
|
'PornHub said: %s' % error_msg,
|
||||||
expected=True, video_id=video_id)
|
expected=True, video_id=video_id)
|
||||||
|
|
||||||
|
if any(re.search(p, webpage) for p in (
|
||||||
|
r'class=["\']geoBlocked["\']',
|
||||||
|
r'>\s*This content is unavailable in your country')):
|
||||||
|
self.raise_geo_restricted()
|
||||||
|
|
||||||
# video_title from flashvars contains whitespace instead of non-ASCII (see
|
# video_title from flashvars contains whitespace instead of non-ASCII (see
|
||||||
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
|
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
|
||||||
# on that anymore.
|
# on that anymore.
|
||||||
|
@ -410,17 +425,14 @@ class PornHubIE(PornHubBaseIE):
|
||||||
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
return
|
return
|
||||||
tbr = None
|
if not height:
|
||||||
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
|
height = int_or_none(self._search_regex(
|
||||||
if mobj:
|
r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height',
|
||||||
if not height:
|
default=None))
|
||||||
height = int(mobj.group('height'))
|
|
||||||
tbr = int(mobj.group('tbr'))
|
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': format_url,
|
'url': format_url,
|
||||||
'format_id': '%dp' % height if height else None,
|
'format_id': '%dp' % height if height else None,
|
||||||
'height': height,
|
'height': height,
|
||||||
'tbr': tbr,
|
|
||||||
})
|
})
|
||||||
|
|
||||||
for video_url, height in video_urls:
|
for video_url, height in video_urls:
|
||||||
|
@ -442,7 +454,8 @@ class PornHubIE(PornHubBaseIE):
|
||||||
add_format(video_url, height)
|
add_format(video_url, height)
|
||||||
continue
|
continue
|
||||||
add_format(video_url)
|
add_format(video_url)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(
|
||||||
|
formats, field_preference=('height', 'width', 'fps', 'format_id'))
|
||||||
|
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||||
|
@ -511,7 +524,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class PornHubUserIE(PornHubPlaylistBaseIE):
|
class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
|
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.pornhub.com/model/zoe_ph',
|
'url': 'https://www.pornhub.com/model/zoe_ph',
|
||||||
'playlist_mincount': 118,
|
'playlist_mincount': 118,
|
||||||
|
@ -540,6 +553,9 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||||
# Same as before, multi page
|
# Same as before, multi page
|
||||||
'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
|
'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -615,7 +631,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||||
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
_VALID_URL = r'https?://(?:[^/]+\.)?%s/(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.pornhub.com/model/zoe_ph/videos',
|
'url': 'https://www.pornhub.com/model/zoe_ph/videos',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -720,6 +736,9 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://de.pornhub.com/playlist/4667351',
|
'url': 'https://de.pornhub.com/playlist/4667351',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph/videos',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -730,7 +749,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
|
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
|
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -740,4 +759,7 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
|
'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://pornhubthbh7ap3u.onion/pornstar/jenny-blighe/videos/upload',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
|
@ -30,6 +30,10 @@ from ..utils import (
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlhandle_detect_ext,
|
urlhandle_detect_ext,
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
|
from ..extractor_artifacts.soundcloud import prerelease_client_id
|
||||||
|
except ImportError:
|
||||||
|
prerelease_client_id = None
|
||||||
|
|
||||||
|
|
||||||
class SoundcloudEmbedIE(InfoExtractor):
|
class SoundcloudEmbedIE(InfoExtractor):
|
||||||
|
@ -289,6 +293,10 @@ class SoundcloudIE(InfoExtractor):
|
||||||
return
|
return
|
||||||
raise ExtractorError('Unable to extract client id')
|
raise ExtractorError('Unable to extract client id')
|
||||||
|
|
||||||
|
def _generate_prerelease_file(self):
|
||||||
|
self._update_client_id()
|
||||||
|
return 'prerelease_client_id = {!r}\n'.format(self._CLIENT_ID)
|
||||||
|
|
||||||
def _download_json(self, *args, **kwargs):
|
def _download_json(self, *args, **kwargs):
|
||||||
non_fatal = kwargs.get('fatal') is False
|
non_fatal = kwargs.get('fatal') is False
|
||||||
if non_fatal:
|
if non_fatal:
|
||||||
|
@ -310,7 +318,7 @@ class SoundcloudIE(InfoExtractor):
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk'
|
self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or prerelease_client_id or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _resolv_url(cls, url):
|
def _resolv_url(cls, url):
|
||||||
|
|
|
@ -28,7 +28,7 @@ class UMGDeIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
video_data = self._download_json(
|
video_data = self._download_json(
|
||||||
'https://api.universal-music.de/graphql',
|
'https://graphql.universal-music.de/',
|
||||||
video_id, query={
|
video_id, query={
|
||||||
'query': '''{
|
'query': '''{
|
||||||
universalMusic(channel:16) {
|
universalMusic(channel:16) {
|
||||||
|
@ -56,11 +56,9 @@ class UMGDeIE(InfoExtractor):
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
def add_m3u8_format(format_id):
|
def add_m3u8_format(format_id):
|
||||||
m3u8_formats = self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
hls_url_template % format_id, video_id, 'mp4',
|
hls_url_template % format_id, video_id, 'mp4',
|
||||||
'm3u8_native', m3u8_id='hls', fatal='False')
|
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||||
if m3u8_formats and m3u8_formats[0].get('height'):
|
|
||||||
formats.extend(m3u8_formats)
|
|
||||||
|
|
||||||
for f in video_data.get('formats', []):
|
for f in video_data.get('formats', []):
|
||||||
f_url = f.get('url')
|
f_url = f.get('url')
|
||||||
|
|
37
haruhi_dl/extractor/vider.py
Normal file
37
haruhi_dl/extractor/vider.py
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class ViderIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://vider\.(?:pl|info)/(?:vid/\+f|embed/video/)(?P<id>[a-z\d]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://vider.info/vid/+fsx51se',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'sx51se',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Big Buck Bunny',
|
||||||
|
'upload_date': '20210906',
|
||||||
|
'timestamp': 1630927351,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(f'https://vider.info/vid/+f{video_id}', video_id)
|
||||||
|
|
||||||
|
json_ld = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)<script type="application/ld\+json">(.+?)</script>',
|
||||||
|
webpage, 'JSON-LD'), video_id)
|
||||||
|
info_dict = self._json_ld(json_ld, video_id)
|
||||||
|
# generated SEO junk
|
||||||
|
info_dict['description'] = None
|
||||||
|
info_dict['id'] = video_id
|
||||||
|
info_dict['formats'] = [{
|
||||||
|
'url': self._search_regex(r'\?file=(.+)', json_ld['embedUrl'], 'video url'),
|
||||||
|
'http_headers': {
|
||||||
|
'Referer': 'https://vider.info/',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
return info_dict
|
|
@ -4,13 +4,12 @@ import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
extract_attributes,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
unescapeHTML,
|
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
from ..aes import aes_decrypt_text
|
|
||||||
|
|
||||||
|
|
||||||
class YouPornIE(InfoExtractor):
|
class YouPornIE(InfoExtractor):
|
||||||
|
@ -34,6 +33,7 @@ class YouPornIE(InfoExtractor):
|
||||||
'tags': list,
|
'tags': list,
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
},
|
},
|
||||||
|
'skip': 'This video has been disabled',
|
||||||
}, {
|
}, {
|
||||||
# Unknown uploader
|
# Unknown uploader
|
||||||
'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
|
'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
|
||||||
|
@ -78,6 +78,40 @@ class YouPornIE(InfoExtractor):
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
display_id = mobj.group('display_id') or video_id
|
display_id = mobj.group('display_id') or video_id
|
||||||
|
|
||||||
|
definitions = self._download_json(
|
||||||
|
'https://www.youporn.com/api/video/media_definitions/%s/' % video_id,
|
||||||
|
display_id)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for definition in definitions:
|
||||||
|
if not isinstance(definition, dict):
|
||||||
|
continue
|
||||||
|
video_url = url_or_none(definition.get('videoUrl'))
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
f = {
|
||||||
|
'url': video_url,
|
||||||
|
'filesize': int_or_none(definition.get('videoSize')),
|
||||||
|
}
|
||||||
|
height = int_or_none(definition.get('quality'))
|
||||||
|
# Video URL's path looks like this:
|
||||||
|
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
||||||
|
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
||||||
|
# /videos/201703/11/109285532/1080P_4000K_109285532.mp4
|
||||||
|
# We will benefit from it by extracting some metadata
|
||||||
|
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
|
||||||
|
if mobj:
|
||||||
|
if not height:
|
||||||
|
height = int(mobj.group('height'))
|
||||||
|
bitrate = int(mobj.group('bitrate'))
|
||||||
|
f.update({
|
||||||
|
'format_id': '%dp-%dk' % (height, bitrate),
|
||||||
|
'tbr': bitrate,
|
||||||
|
})
|
||||||
|
f['height'] = height
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
webpage = self._download_webpage(
|
webpage = self._download_webpage(
|
||||||
'http://www.youporn.com/watch/%s' % video_id, display_id,
|
'http://www.youporn.com/watch/%s' % video_id, display_id,
|
||||||
headers={'Cookie': 'age_verified=1'})
|
headers={'Cookie': 'age_verified=1'})
|
||||||
|
@ -88,65 +122,6 @@ class YouPornIE(InfoExtractor):
|
||||||
webpage, default=None) or self._html_search_meta(
|
webpage, default=None) or self._html_search_meta(
|
||||||
'title', webpage, fatal=True)
|
'title', webpage, fatal=True)
|
||||||
|
|
||||||
links = []
|
|
||||||
|
|
||||||
# Main source
|
|
||||||
definitions = self._parse_json(
|
|
||||||
self._search_regex(
|
|
||||||
r'mediaDefinition\s*[=:]\s*(\[.+?\])\s*[;,]', webpage,
|
|
||||||
'media definitions', default='[]'),
|
|
||||||
video_id, fatal=False)
|
|
||||||
if definitions:
|
|
||||||
for definition in definitions:
|
|
||||||
if not isinstance(definition, dict):
|
|
||||||
continue
|
|
||||||
video_url = url_or_none(definition.get('videoUrl'))
|
|
||||||
if video_url:
|
|
||||||
links.append(video_url)
|
|
||||||
|
|
||||||
# Fallback #1, this also contains extra low quality 180p format
|
|
||||||
for _, link in re.findall(r'<a[^>]+href=(["\'])(http(?:(?!\1).)+\.mp4(?:(?!\1).)*)\1[^>]+title=["\']Download [Vv]ideo', webpage):
|
|
||||||
links.append(link)
|
|
||||||
|
|
||||||
# Fallback #2 (unavailable as at 22.06.2017)
|
|
||||||
sources = self._search_regex(
|
|
||||||
r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None)
|
|
||||||
if sources:
|
|
||||||
for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
|
|
||||||
links.append(link)
|
|
||||||
|
|
||||||
# Fallback #3 (unavailable as at 22.06.2017)
|
|
||||||
for _, link in re.findall(
|
|
||||||
r'(?:videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage):
|
|
||||||
links.append(link)
|
|
||||||
|
|
||||||
# Fallback #4, encrypted links (unavailable as at 22.06.2017)
|
|
||||||
for _, encrypted_link in re.findall(
|
|
||||||
r'encryptedQuality\d{3,4}URL\s*=\s*(["\'])([\da-zA-Z+/=]+)\1', webpage):
|
|
||||||
links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8'))
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for video_url in set(unescapeHTML(link) for link in links):
|
|
||||||
f = {
|
|
||||||
'url': video_url,
|
|
||||||
}
|
|
||||||
# Video URL's path looks like this:
|
|
||||||
# /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
|
||||||
# /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
|
|
||||||
# /videos/201703/11/109285532/1080P_4000K_109285532.mp4
|
|
||||||
# We will benefit from it by extracting some metadata
|
|
||||||
mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
|
|
||||||
if mobj:
|
|
||||||
height = int(mobj.group('height'))
|
|
||||||
bitrate = int(mobj.group('bitrate'))
|
|
||||||
f.update({
|
|
||||||
'format_id': '%dp-%dk' % (height, bitrate),
|
|
||||||
'height': height,
|
|
||||||
'tbr': bitrate,
|
|
||||||
})
|
|
||||||
formats.append(f)
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
description = self._html_search_regex(
|
description = self._html_search_regex(
|
||||||
r'(?s)<div[^>]+\bid=["\']description["\'][^>]*>(.+?)</div>',
|
r'(?s)<div[^>]+\bid=["\']description["\'][^>]*>(.+?)</div>',
|
||||||
webpage, 'description',
|
webpage, 'description',
|
||||||
|
@ -169,13 +144,12 @@ class YouPornIE(InfoExtractor):
|
||||||
|
|
||||||
age_limit = self._rta_search(webpage)
|
age_limit = self._rta_search(webpage)
|
||||||
|
|
||||||
average_rating = int_or_none(self._search_regex(
|
view_count = None
|
||||||
r'<div[^>]+class=["\']videoRatingPercentage["\'][^>]*>(\d+)%</div>',
|
views = self._search_regex(
|
||||||
webpage, 'average rating', fatal=False))
|
r'(<div[^>]+\bclass=["\']js_videoInfoViews["\']>)', webpage,
|
||||||
|
'views', default=None)
|
||||||
view_count = str_to_int(self._search_regex(
|
if views:
|
||||||
r'(?s)<div[^>]+class=(["\']).*?\bvideoInfoViews\b.*?\1[^>]*>.*?(?P<count>[\d,.]+)<',
|
view_count = str_to_int(extract_attributes(views).get('data-value'))
|
||||||
webpage, 'view count', fatal=False, group='count'))
|
|
||||||
comment_count = str_to_int(self._search_regex(
|
comment_count = str_to_int(self._search_regex(
|
||||||
r'>All [Cc]omments? \(([\d,.]+)\)',
|
r'>All [Cc]omments? \(([\d,.]+)\)',
|
||||||
webpage, 'comment count', default=None))
|
webpage, 'comment count', default=None))
|
||||||
|
@ -201,7 +175,6 @@ class YouPornIE(InfoExtractor):
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'average_rating': average_rating,
|
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
'categories': categories,
|
'categories': categories,
|
||||||
|
|
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import json
|
import json
|
||||||
import hashlib
|
import hashlib
|
||||||
|
from inspect import getsource
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
@ -45,6 +46,10 @@ from ..utils import (
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
|
from ..extractor_artifacts.youtube import _decrypt_signature_protected
|
||||||
|
except ImportError:
|
||||||
|
_decrypt_signature_protected = None
|
||||||
|
|
||||||
|
|
||||||
class YoutubeBaseInfoExtractor(InfoExtractor):
|
class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
|
@ -901,7 +906,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
raise ExtractorError('Cannot identify player %r' % player_url)
|
raise ExtractorError('Cannot identify player %r' % player_url)
|
||||||
return id_m.group('id')
|
return id_m.group('id')
|
||||||
|
|
||||||
def _extract_signature_function(self, video_id, player_url, example_sig):
|
def _extract_signature_function(self, video_id, player_url):
|
||||||
player_id = self._extract_player_info(player_url)
|
player_id = self._extract_player_info(player_url)
|
||||||
|
|
||||||
# Read from filesystem cache
|
# Read from filesystem cache
|
||||||
|
@ -1012,31 +1017,45 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
' return %s\n') % (signature_id_tuple, expr_code)
|
' return %s\n') % (signature_id_tuple, expr_code)
|
||||||
self.to_screen('Extracted signature function:\n' + code)
|
self.to_screen('Extracted signature function:\n' + code)
|
||||||
|
|
||||||
def mess(self, a, b):
|
@staticmethod
|
||||||
|
def mess(a, b):
|
||||||
c = a[0]
|
c = a[0]
|
||||||
a[0] = a[b % len(a)]
|
a[0] = a[b % len(a)]
|
||||||
a[b % len(a)] = c
|
a[b % len(a)] = c
|
||||||
return a
|
return a
|
||||||
|
|
||||||
def _decrypt_signature_protected(self, s):
|
|
||||||
a = list(s)
|
|
||||||
a = self.mess(a, 49)
|
|
||||||
a = self.mess(a, 26)
|
|
||||||
a.reverse()
|
|
||||||
a = self.mess(a, 62)
|
|
||||||
a.reverse()
|
|
||||||
a = a[2:]
|
|
||||||
return "".join(a)
|
|
||||||
|
|
||||||
def _full_signature_handling(self, sig, player_url, video_id):
|
def _full_signature_handling(self, sig, player_url, video_id):
|
||||||
signature = self._decrypt_signature_protected(sig)
|
if _decrypt_signature_protected:
|
||||||
if re.match(self._VALID_SIG_VALUE_RE, signature):
|
signature = _decrypt_signature_protected(sig)
|
||||||
return signature
|
if re.match(self._VALID_SIG_VALUE_RE, signature):
|
||||||
|
return signature
|
||||||
if self._downloader.params.get('verbose'):
|
if self._downloader.params.get('verbose'):
|
||||||
self.to_screen("Built-in signature decryption failed, trying dynamic")
|
self.to_screen("Built-in signature decryption failed, trying dynamic")
|
||||||
sig_decrypt_stack = self._extract_signature_function(video_id, player_url, sig)
|
sig_decrypt_stack = self._extract_signature_function(video_id, player_url)
|
||||||
return self._do_decrypt_signature(sig, sig_decrypt_stack)
|
return self._do_decrypt_signature(sig, sig_decrypt_stack)
|
||||||
|
|
||||||
|
def _generate_prerelease_file(self):
|
||||||
|
# It's Monday, so I'm in a bad mood, but at least my sailor uniform is super cute!
|
||||||
|
video_id = 'ieQ1rAIjzXc'
|
||||||
|
self._set_consent()
|
||||||
|
webpage = self._download_webpage('https://www.youtube.com/watch?v=%s' % video_id, video_id)
|
||||||
|
player_url = self._search_regex(r'"jsUrl":"(/s/player/.*?/player_ias.vflset/.*?/base.js)', webpage, 'player url')
|
||||||
|
sig_decrypt_stack = self._extract_signature_function(video_id, player_url)
|
||||||
|
func = re.sub(r'(?m)^ ', '', getsource(self.mess).replace('@staticmethod', ''))
|
||||||
|
func += '\n\ndef _decrypt_signature_protected(sig):\n'
|
||||||
|
stack = ['a = list(sig)']
|
||||||
|
for fun in sig_decrypt_stack:
|
||||||
|
if fun[0] == 'splice':
|
||||||
|
stack.append(f'a = a[{fun[1]}:]')
|
||||||
|
elif fun[0] == 'reverse':
|
||||||
|
stack.append('a.reverse()')
|
||||||
|
elif fun[0] == 'mess':
|
||||||
|
stack.append(f'a = mess(a, {fun[1]})')
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unknown stack action: %s' % (fun[0]))
|
||||||
|
stack.append("return ''.join(a)")
|
||||||
|
return func + '\n'.join(map(lambda x: ' ' * 4 + x, stack)) + '\n'
|
||||||
|
|
||||||
def _get_subtitles(self, video_id, webpage):
|
def _get_subtitles(self, video_id, webpage):
|
||||||
try:
|
try:
|
||||||
subs_doc = self._download_xml(
|
subs_doc = self._download_xml(
|
||||||
|
@ -1422,29 +1441,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
|
if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+'
|
||||||
or re.search(r'player-age-gate-content">', video_webpage) is not None):
|
or re.search(r'player-age-gate-content">', video_webpage) is not None):
|
||||||
age_gate = True
|
age_gate = True
|
||||||
# We simulate the access to the video from www.youtube.com/v/{video_id}
|
|
||||||
# this can be viewed without login into Youtube
|
|
||||||
data = compat_urllib_parse_urlencode({
|
|
||||||
'video_id': video_id,
|
|
||||||
'eurl': 'https://youtube.googleapis.com/v/' + video_id,
|
|
||||||
'html5': 1,
|
|
||||||
'c': 'TVHTML5',
|
|
||||||
'cver': '6.20180913',
|
|
||||||
})
|
|
||||||
video_info_url = proto + '://www.youtube.com/get_video_info?' + data
|
|
||||||
try:
|
try:
|
||||||
video_info_webpage = self._download_webpage(
|
yti1_player = self._download_webpage(
|
||||||
video_info_url, video_id,
|
proto + '://www.youtube.com/youtubei/v1/player', video_id,
|
||||||
note='Downloading age-gated video info',
|
headers={
|
||||||
|
'User-Agent': 'Mozilla/5.0 (SMART-TV; Linux; Tizen 4.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.0 Safari/537.36',
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'X-Goog-Api-Key': self._YOUTUBE_API_KEY,
|
||||||
|
},
|
||||||
|
data=bytes(json.dumps({
|
||||||
|
'context': {
|
||||||
|
'client': {
|
||||||
|
'clientName': 'WEB',
|
||||||
|
'clientVersion': '2.20210721.00.00',
|
||||||
|
'clientScreen': 'EMBED',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'videoId': video_id,
|
||||||
|
}).encode('utf-8')),
|
||||||
|
note='Downloading age-gated player info',
|
||||||
errnote='unable to download video info')
|
errnote='unable to download video info')
|
||||||
except ExtractorError:
|
except ExtractorError:
|
||||||
video_info_webpage = None
|
yti1_player = None
|
||||||
if video_info_webpage:
|
if yti1_player:
|
||||||
video_info = compat_parse_qs(video_info_webpage)
|
player_response = extract_player_response(yti1_player, video_id)
|
||||||
pl_response = video_info.get('player_response', [None])[0]
|
|
||||||
player_response = extract_player_response(pl_response, video_id)
|
|
||||||
add_dash_mpd(video_info)
|
add_dash_mpd(video_info)
|
||||||
view_count = extract_view_count(video_info)
|
view_count = extract_view_count(video_id)
|
||||||
else:
|
else:
|
||||||
age_gate = False
|
age_gate = False
|
||||||
# Try looking directly into the video webpage
|
# Try looking directly into the video webpage
|
||||||
|
@ -1814,8 +1836,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
error_desc,
|
error_desc,
|
||||||
countries=self._search_regex(
|
countries=self._search_regex(
|
||||||
r'<meta itemprop="regionsAllowed" content="((?:(?:[A-Z]{2},)*[A-Z]{2})?)">',
|
r'<meta itemprop="regionsAllowed" content="((?:(?:[A-Z]{2},)*[A-Z]{2})?)">',
|
||||||
video_webpage, 'allowed region list').split(','),
|
video_webpage, 'allowed region list').split(','))
|
||||||
expected=True)
|
|
||||||
if error_desc and 'Playback on other websites has been disabled' in error_desc:
|
if error_desc and 'Playback on other websites has been disabled' in error_desc:
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'Embeds disabled for this video, account (with passed credit card or photo ID check, if in EU/EEA/CH/UK) is required',
|
'Embeds disabled for this video, account (with passed credit card or photo ID check, if in EU/EEA/CH/UK) is required',
|
||||||
|
@ -2224,8 +2245,9 @@ class YoutubeBaseListInfoExtractor(YoutubeBaseInfoExtractor):
|
||||||
webpage = self._download_webpage(url, list_id,
|
webpage = self._download_webpage(url, list_id,
|
||||||
note='Downloading %s page #1 (webpage)' % (self._LIST_NAME))
|
note='Downloading %s page #1 (webpage)' % (self._LIST_NAME))
|
||||||
return self._parse_json(
|
return self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex((
|
||||||
r'(?:window(?:\["|\.)|var )ytInitialData(?:"])?\s*=\s*({.+});',
|
r'(?:window(?:\["|\.)|var )ytInitialData(?:"])?\s*=\s*({.+});</script>',
|
||||||
|
r'(?:window(?:\["|\.)|var )ytInitialData(?:"])?\s*=\s*({.+});'),
|
||||||
webpage, 'initial data JSON'), 'initial data JSON'), webpage
|
webpage, 'initial data JSON'), 'initial data JSON'), webpage
|
||||||
|
|
||||||
def _real_extract(self, url, results=None, query=None):
|
def _real_extract(self, url, results=None, query=None):
|
||||||
|
|
|
@ -231,7 +231,10 @@ class FFmpegPostProcessor(PostProcessor):
|
||||||
stdout, stderr = p.communicate()
|
stdout, stderr = p.communicate()
|
||||||
if p.returncode != 0:
|
if p.returncode != 0:
|
||||||
stderr = stderr.decode('utf-8', 'replace')
|
stderr = stderr.decode('utf-8', 'replace')
|
||||||
msg = stderr.strip().split('\n')[-1]
|
msgs = stderr.strip().split('\n')
|
||||||
|
msg = msgs[-1]
|
||||||
|
if self._downloader.params.get('verbose', False):
|
||||||
|
self._downloader.to_screen('[debug] ' + '\n'.join(msgs[:-1]))
|
||||||
raise FFmpegPostProcessorError(msg)
|
raise FFmpegPostProcessorError(msg)
|
||||||
self.try_utime(out_path, oldest_mtime, oldest_mtime)
|
self.try_utime(out_path, oldest_mtime, oldest_mtime)
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
__version__ = '2021.06.20'
|
__version__ = '2021.08.01'
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(__version__)
|
print(__version__)
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -115,7 +115,7 @@ setup(
|
||||||
packages=[
|
packages=[
|
||||||
'haruhi_dl',
|
'haruhi_dl',
|
||||||
'haruhi_dl.extractor', 'haruhi_dl.downloader',
|
'haruhi_dl.extractor', 'haruhi_dl.downloader',
|
||||||
'haruhi_dl.postprocessor'],
|
'haruhi_dl.postprocessor', 'haruhi_dl.extractor_artifacts'],
|
||||||
|
|
||||||
# Provokes warning on most systems (why?!)
|
# Provokes warning on most systems (why?!)
|
||||||
# test_suite = 'nose.collector',
|
# test_suite = 'nose.collector',
|
||||||
|
|
Loading…
Reference in a new issue