Compare commits
233 commits
v2021.03.0
...
master
Author | SHA1 | Date | |
---|---|---|---|
2f375d447c | |||
d464b29113 | |||
19602fb3f5 | |||
a550e21b8c | |||
1ae67712e8 | |||
Dominika Liberda | a96bf110da | ||
973652cf4d | |||
d81137a604 | |||
a0d52ce5be | |||
Dominika Liberda | 81b5018d99 | ||
Dominika Liberda | 31b7bf5bdb | ||
Dominika Liberda | a0cb1b40a2 | ||
Dominika Liberda | c3e48f4934 | ||
Dominika Liberda | ca6cbb6234 | ||
7858dc7b9f | |||
2234b1100c | |||
75442522b2 | |||
f4070e6fe4 | |||
b30cd7afbb | |||
29389b4935 | |||
3fc2d04e08 | |||
30a3fb457e | |||
69813b6be8 | |||
f1a365faf8 | |||
86c90f7d47 | |||
a33a92ba4b | |||
6057163d97 | |||
aad8936157 | |||
18dd355e39 | |||
e628fc3794 | |||
ac99e96a1e | |||
93131809f2 | |||
9cced7b3d2 | |||
b526b67bc1 | |||
e676b759d1 | |||
Dominika Liberda | 1d54631bfb | ||
073959a503 | |||
Dominika Liberda | eaf7a8bd6e | ||
ed273bfbf2 | |||
9373a2f667 | |||
f2a5fa2e53 | |||
9b1ef5167d | |||
7787c45730 | |||
Dominika Liberda | f34b024e70 | ||
0d8ef28280 | |||
132d7674e3 | |||
e19e102a56 | |||
dd62e6bab3 | |||
484dabbf8a | |||
2e387cb356 | |||
177f5c64de | |||
a9f7bf158b | |||
80c9bfae14 | |||
8fac551776 | |||
47fec1e10b | |||
57c88d40d3 | |||
ce5c2526bc | |||
8c826fe7ce | |||
1c539931b6 | |||
6d5cb9e661 | |||
97abd98bc3 | |||
646a08b1c5 | |||
e32f3c07ea | |||
56d9861eb5 | |||
8d0c50580c | |||
07adc2e4cd | |||
a3e21baccc | |||
c3b5074fcd | |||
30d8947496 | |||
2489669316 | |||
5512cc0f37 | |||
1643b0b490 | |||
41cd26d4cf | |||
993cb8ce4c | |||
fca8c46c7b | |||
9d9b571371 | |||
d540126206 | |||
fa290c78e7 | |||
2c8fa677b2 | |||
ad5cc09566 | |||
e83f44815c | |||
6adb5ea838 | |||
8dee2b0f85 | |||
36bc893bd8 | |||
ceab7dc7ec | |||
560a3ab05d | |||
b7f9dc517f | |||
d56b6a0b75 | |||
2403ecd42d | |||
19dc8442c2 | |||
d40d350a69 | |||
63c541a3cd | |||
c9c96706eb | |||
35043c6160 | |||
5c054ee942 | |||
76d4e8de92 | |||
e9f7e06635 | |||
64ec930237 | |||
ac8b9e45fb | |||
8b4a9656f0 | |||
6ad8f4990a | |||
b31ca60b3a | |||
eb67a3cd44 | |||
cde74b6420 | |||
d68515cd12 | |||
379b17f27e | |||
83a294d881 | |||
4c46e374bc | |||
5f6bcc20f5 | |||
865b8fd65f | |||
f7cde33162 | |||
9ef69b9a67 | |||
05f71071f4 | |||
f755095cb3 | |||
85f9e11581 | |||
6108793376 | |||
d94f06105c | |||
0a6031afcb | |||
d8d8cc0945 | |||
8deedd7636 | |||
229b4d1671 | |||
2208983e30 | |||
8f35a39d9f | |||
97b46bced6 | |||
6f678388cb | |||
40ef0c5a1c | |||
f0dd168230 | |||
df566be96f | |||
923069eb48 | |||
a0986f874d | |||
12a935cf42 | |||
44ed85b18b | |||
2bd0f6069a | |||
e2764f61ea | |||
66e93478d8 | |||
a4d58a6adf | |||
abb792e7b5 | |||
55e021da8e | |||
13cc377d6f | |||
46d28e0fd5 | |||
9c0e55eb79 | |||
860a8f2061 | |||
557fe650bb | |||
baf8549c0a | |||
dae5140251 | |||
9eaffe8278 | |||
6ed5f6bbc8 | |||
Dominika Liberda | a71cc68530 | ||
8a0ec69c60 | |||
607734c7ef | |||
3a0f408546 | |||
a067097513 | |||
Dominika Liberda | b428c73970 | ||
e824771caf | |||
ecf455300f | |||
605ba1f477 | |||
6277a6f4c7 | |||
608d64024e | |||
b587a7656e | |||
14ee975fb4 | |||
1cd1ed16ed | |||
74ae4cb2be | |||
7bee125ade | |||
847a1ddff4 | |||
64f7b37d8e | |||
2404fc148e | |||
9aa7e4481b | |||
2e7f27f566 | |||
Dominika Liberda | 7ba6fd5e2c | ||
d2d859b0cb | |||
1644003935 | |||
d7455472c7 | |||
a688593c71 | |||
ce1c406432 | |||
ef06ab2626 | |||
5403f15eca | |||
11e7d9a9bc | |||
ad4946376d | |||
fae71efe4b | |||
2a36637212 | |||
051da7778d | |||
6faaa046ba | |||
3216bd2742 | |||
8210d0d578 | |||
1df8de409f | |||
bc2dfba575 | |||
7e5f6863ca | |||
8e580fb912 | |||
a84bff7941 | |||
c07c6fd0bf | |||
0bf5bb20bb | |||
19f1ef28f1 | |||
06a0a2404e | |||
b7c5d42047 | |||
8332796684 | |||
fd211154d3 | |||
e6efc4cc87 | |||
9f9d5f98fd | |||
6e95b224c2 | |||
0eab1a6949 | |||
a28058ddeb | |||
62d5e81ff1 | |||
ef668c9585 | |||
63755989fc | |||
f67e11c888 | |||
28d7757c8b | |||
d49b9356ce | |||
efffe9e670 | |||
a4a4af8546 | |||
8e8af58d04 | |||
4ddca367de | |||
3e7425297f | |||
3b151afce7 | |||
999ab0298b | |||
510512606a | |||
a8e3f00134 | |||
ca57ada0fc | |||
ade6eb8abc | |||
6f3c4fd2f8 | |||
58538a2c64 | |||
3426d75467 | |||
199edacd48 | |||
9e535b8762 | |||
0b5407d6ec | |||
c10469c0a8 | |||
9759eb7182 | |||
5311710390 | |||
c42920795e | |||
0de898ecb5 | |||
ec0abef671 | |||
d5ad78cd0b | |||
3e69892860 | |||
3240e9f582 |
2
.github/FUNDING.yml
vendored
Normal file
2
.github/FUNDING.yml
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
github: selfisekai
|
||||||
|
ko_fi: selfisekai
|
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -15,6 +15,7 @@ haruhi-dl.1
|
||||||
haruhi-dl.bash-completion
|
haruhi-dl.bash-completion
|
||||||
haruhi-dl.fish
|
haruhi-dl.fish
|
||||||
haruhi_dl/extractor/lazy_extractors.py
|
haruhi_dl/extractor/lazy_extractors.py
|
||||||
|
haruhi_dl/extractor_artifacts/
|
||||||
haruhi-dl
|
haruhi-dl
|
||||||
haruhi-dl.exe
|
haruhi-dl.exe
|
||||||
haruhi-dl.tar.gz
|
haruhi-dl.tar.gz
|
||||||
|
|
|
@ -1,8 +1,29 @@
|
||||||
default:
|
default:
|
||||||
before_script:
|
before_script:
|
||||||
|
- sed -i "s@dl-cdn.alpinelinux.org@alpine.sakamoto.pl@g" /etc/apk/repositories
|
||||||
- apk add bash
|
- apk add bash
|
||||||
- pip install nose
|
- pip install nose
|
||||||
|
|
||||||
|
pypy3.6-core:
|
||||||
|
image: pypy:3.6-slim
|
||||||
|
variables:
|
||||||
|
HDL_TEST_SET: core
|
||||||
|
before_script:
|
||||||
|
- apt-get update && apt-get install -y bash && apt-get clean
|
||||||
|
- pip install nose
|
||||||
|
script:
|
||||||
|
- ./devscripts/run_tests.sh
|
||||||
|
|
||||||
|
pypy3.7-core:
|
||||||
|
image: pypy:3.7-slim
|
||||||
|
variables:
|
||||||
|
HDL_TEST_SET: core
|
||||||
|
before_script:
|
||||||
|
- apt-get update && apt-get install -y bash && apt-get clean
|
||||||
|
- pip install nose
|
||||||
|
script:
|
||||||
|
- ./devscripts/run_tests.sh
|
||||||
|
|
||||||
py3.6-core:
|
py3.6-core:
|
||||||
image: python:3.6-alpine
|
image: python:3.6-alpine
|
||||||
variables:
|
variables:
|
||||||
|
@ -39,18 +60,6 @@ py3.9-download:
|
||||||
script:
|
script:
|
||||||
- ./devscripts/run_tests.sh
|
- ./devscripts/run_tests.sh
|
||||||
|
|
||||||
#jython-core:
|
|
||||||
# image: openjdk:11-slim
|
|
||||||
# variables:
|
|
||||||
# HDL_TEST_SET: core
|
|
||||||
# allow_failure: true
|
|
||||||
# before_script:
|
|
||||||
# - apt-get update
|
|
||||||
# - apt-get install -y wget
|
|
||||||
# - ./devscripts/install_jython.sh
|
|
||||||
# - export PATH="$HOME/jython/bin:$PATH"
|
|
||||||
# script:
|
|
||||||
# - ./devscripts/run_tests.sh
|
|
||||||
|
|
||||||
playwright-tests-core:
|
playwright-tests-core:
|
||||||
image: mcr.microsoft.com/playwright:focal
|
image: mcr.microsoft.com/playwright:focal
|
||||||
|
|
192
ChangeLog
192
ChangeLog
|
@ -1,3 +1,195 @@
|
||||||
|
version 2021.08.01
|
||||||
|
Extractor
|
||||||
|
* [youtube] fixed agegate
|
||||||
|
* [niconico] dmc downloader from youtube-dlp
|
||||||
|
* [peertube] new URL schemas
|
||||||
|
|
||||||
|
version 2021.06.20
|
||||||
|
Core
|
||||||
|
* [playwright] fixed headlessness
|
||||||
|
+ [playwright] option to force a specific browser
|
||||||
|
|
||||||
|
Extractor
|
||||||
|
* [tiktok] fix empty video lists
|
||||||
|
* [youtube] fix and speed-up age-gate circumvention
|
||||||
|
* [youtube] fix videos with JS-like syntax
|
||||||
|
|
||||||
|
|
||||||
|
version 2021.06.01
|
||||||
|
Core
|
||||||
|
* merging formats by codecs
|
||||||
|
* [json_ld] better author extraction
|
||||||
|
+ --force-use-mastodon option
|
||||||
|
* support for HTTP 308 redirects
|
||||||
|
+ [test_execution] add test for lazy extractors
|
||||||
|
* Improve extract_info doc
|
||||||
|
* [options] Fix thumbnail option group name
|
||||||
|
|
||||||
|
Extractor
|
||||||
|
* [tvp:series] fallback to web
|
||||||
|
- [ninateka] remove extractor
|
||||||
|
* [tvn24] refactor handling next.js frontend
|
||||||
|
* [cda] fix premium videos for premium users (?)
|
||||||
|
* [tvp] support for tvp.info vue.js pages
|
||||||
|
+ [sejm.gov.pl] new extractors
|
||||||
|
+ [senat.gov.pl] new extractors
|
||||||
|
* [spreaker] new url schemes
|
||||||
|
* [spreaker] support for embedded player
|
||||||
|
+ [spryciarze.pl] new extractors
|
||||||
|
+ [castos] new extractors
|
||||||
|
+ [magentamusik360] new extractor
|
||||||
|
+ [arnes] new extractor
|
||||||
|
+ [palcomp3] new extractor
|
||||||
|
* [screencastomatic] fix extraction
|
||||||
|
* [youku] update ccode
|
||||||
|
+ [line] support live.line.me
|
||||||
|
* [curiositystream] fix format extraction
|
||||||
|
* [jamendo] fix track extraction
|
||||||
|
* [pornhub] extracting DASH and HLS formats
|
||||||
|
* [mtv] fix Viacom A/B testing video player
|
||||||
|
+ [maoritv] new extractor
|
||||||
|
* [pluralsight] extend anti-throttling timeout
|
||||||
|
* [mastodon] support for soapbox and audio files
|
||||||
|
* [tvp] fix jp2.tvp.pl
|
||||||
|
* [youtube:channel] fix multiple page extraction
|
||||||
|
* [tvp:embed] handling formats better way
|
||||||
|
* [tvn] better extraction method choosing
|
||||||
|
* [tvp] fix tvp:website extracting with weird urls
|
||||||
|
+ [wppilot] new extractors
|
||||||
|
+ [mastodon] logging in to mastodon/pleroma
|
||||||
|
+ [mastodon] fetching posts via different instances
|
||||||
|
+ [mastodon] fetching peertube videos via pleroma instances
|
||||||
|
* [bbc] extract full description from __INITIAL_DATA__
|
||||||
|
* [tver] redirect all downloads to Brightcove
|
||||||
|
* [medaltv] fix extraction
|
||||||
|
* [francetvinfo] improve video id extraction
|
||||||
|
* [xfileshare] support for wolfstream.tv
|
||||||
|
* [tv2dk] fix extraction
|
||||||
|
* [svtplay] improve extraction
|
||||||
|
* [xtube] fix formats extraction
|
||||||
|
* [twitter] improve formats extraction from vmap URL
|
||||||
|
* [mastodon] cache apps on logging in
|
||||||
|
* [mastodon] support cards to external services
|
||||||
|
* [peertube] logging in
|
||||||
|
* [tiktok] deduplicate videos
|
||||||
|
+ [misskey] new extractor
|
||||||
|
+ [radiokapital] new extractors
|
||||||
|
* [youtube] fix videos with age gate
|
||||||
|
* [kaltura] Make embed code alternatives actually work
|
||||||
|
* [kaltura] Improve iframe extraction
|
||||||
|
* [dispeak] Improve FLV extraction
|
||||||
|
* [dispeak] DRY and update tests
|
||||||
|
* [gdcvault] Add support for HTML5 videos
|
||||||
|
* [funimation] Add support for optional lang code in URLs
|
||||||
|
* [medaltv] Relax _VALID_URL
|
||||||
|
- [blinkx] Remove extractor
|
||||||
|
* [orf:radio] Switch download URLs to HTTPS
|
||||||
|
+ [generic] Add Referer header for direct videojs download URLs
|
||||||
|
+ [vk] Add support for sibnet embeds
|
||||||
|
+ [generic] Add support for sibnet embeds
|
||||||
|
* [phoenix] Fix extraction
|
||||||
|
* [generic] Add support for og:audio
|
||||||
|
* [vivo] Add support for vivo.st
|
||||||
|
* [eroprofile] Fix extraction
|
||||||
|
* [playstuff] Add extractor
|
||||||
|
* [shahid] relax _VALID_URL
|
||||||
|
* [redbulltv] fix embed data extraction
|
||||||
|
* [vimeo] fix vimeo pro embed extraction
|
||||||
|
* [twitch:clips] Add access token query to download URLs
|
||||||
|
* [twitch:clips] Improve extraction
|
||||||
|
* [ted] Prefer own formats over external sources
|
||||||
|
* [ustream] Detect https embeds
|
||||||
|
* [ard] Relax _VALID_URL and fix video ids
|
||||||
|
|
||||||
|
|
||||||
|
version 2021.04.01
|
||||||
|
Core
|
||||||
|
- Removed Herobrine
|
||||||
|
|
||||||
|
Extractor
|
||||||
|
* [youtube] fixed GDPR consent workaround
|
||||||
|
* [instagram] improve title extraction and extract duration
|
||||||
|
* [francetvinfo] improve video ID extraction
|
||||||
|
* [vlive] merge all updates from YTDL
|
||||||
|
|
||||||
|
version 2021.03.30
|
||||||
|
Core
|
||||||
|
* `--ie-key` commandline option for selecting specific extractor
|
||||||
|
|
||||||
|
Extractor
|
||||||
|
* [tiktok] detect private videos
|
||||||
|
* [dw:article] fix extractor
|
||||||
|
+ [patroniteaudio] added extractor
|
||||||
|
+ [sbs] Add support for ondemand watch URLs
|
||||||
|
* [picarto] Fix live stream extraction
|
||||||
|
* [vimeo] Fix unlisted video extraction
|
||||||
|
* [ard] Improve clip id extraction
|
||||||
|
+ [zoom] Add support for zoom.us
|
||||||
|
* [bbc] Fix BBC IPlayer Episodes/Group extraction
|
||||||
|
* [zingmp3] Fix extraction
|
||||||
|
* [youtube] added workaround for cookie consent
|
||||||
|
|
||||||
|
version 2021.03.21
|
||||||
|
Core
|
||||||
|
* [playwright] More verbose errors
|
||||||
|
- Removed a lot of deprecated platform support code
|
||||||
|
* New win32 exe build system
|
||||||
|
+ Support for BitTorrent formats
|
||||||
|
+ Support for VTT subtitles in m3u8 (HLS) manifests
|
||||||
|
+ `release_timestamp` meta field
|
||||||
|
|
||||||
|
Extractor
|
||||||
|
+ [acast:player] new extractor
|
||||||
|
+ [videotarget] new extractor
|
||||||
|
* [youtube] caching extracted signature functions
|
||||||
|
* [go] fix extraction
|
||||||
|
* [youtube] more descriptive geo-lock messages (with countries)
|
||||||
|
* [polskieradio] podcast support
|
||||||
|
* [onnetwork] refactored extraction
|
||||||
|
+ [tiktok] hashtag and music extractors
|
||||||
|
* [peertube] bittorrent formats
|
||||||
|
* [generic] detecting bittorrent manifest files
|
||||||
|
+ bittorrent magnet extractor
|
||||||
|
* [generic] extracting mpd manifests properly
|
||||||
|
* [youtube] better signature handling for DASH formats
|
||||||
|
* [youtube] some DASH formats are now just static files
|
||||||
|
+ [polskieradio] radiokierowcow.pl extractor
|
||||||
|
* [pulsevideo] unduplicating formats
|
||||||
|
+ [tvp:embed] extracting video subtitles
|
||||||
|
+ [bandaichannel] Add new extractor
|
||||||
|
* [urplay] fix episode data extraction
|
||||||
|
* [stretchinternet] Fix extraction
|
||||||
|
* [zdf] Rework extractors
|
||||||
|
+ [bbc] add support for BBC Reel videos
|
||||||
|
* [9c9media] fix extraction for videos with multiple ContentPackages
|
||||||
|
* [voxmedia] fix volume embed extraction
|
||||||
|
* [trovo] Add Origin header to VOD formats
|
||||||
|
* [cbs] add support for Paramount+
|
||||||
|
* [bilibili] fix video info extraction
|
||||||
|
* [pornhub] Extract formats from get_media end point
|
||||||
|
* [pornhub] Detect flagged videos
|
||||||
|
* [bandcamp] Extract release_timestamp
|
||||||
|
* [shahid] fix format extraction
|
||||||
|
* [fujitv] fix HLS formats extension
|
||||||
|
* [tver] improve title extraction
|
||||||
|
* [pinterest] reduce the number of HLS format requests
|
||||||
|
* [sportdeutschland] fix extraction
|
||||||
|
* [southpark] Fix extraction and add support for southparkstudios.com
|
||||||
|
* [rtve] improve extraction
|
||||||
|
* [applepodcasts] fix extraction
|
||||||
|
* [svtplay] Improve extraction
|
||||||
|
* [mlb] fix video extracion
|
||||||
|
* [vvvvid] fix kenc format extraction
|
||||||
|
* [vimeo:album] Fix extraction for albums with number of videos multiple to page size
|
||||||
|
* [peertube] improve thumbnail extraction
|
||||||
|
* [yandexmusic] Refactor and add support for artist's tracks and albums
|
||||||
|
* [yandexmusic:album] Improve album title extraction
|
||||||
|
* [yandexmusic] DRY _VALID_URL base
|
||||||
|
* [yandexmusic] Add support for music.yandex.com
|
||||||
|
* [yandexmusic:playlist] Request missing tracks in chunks
|
||||||
|
- [tvnplayer] removed extractor
|
||||||
|
* [youtube] meaningful error for age-gated no-embed videos
|
||||||
|
|
||||||
version 2021.03.01
|
version 2021.03.01
|
||||||
Extractor
|
Extractor
|
||||||
* [cda] logging in with a user account
|
* [cda] logging in with a user account
|
||||||
|
|
2
Makefile
2
Makefile
|
@ -9,7 +9,7 @@ PREFIX ?= /usr/local
|
||||||
BINDIR ?= $(PREFIX)/bin
|
BINDIR ?= $(PREFIX)/bin
|
||||||
MANDIR ?= $(PREFIX)/man
|
MANDIR ?= $(PREFIX)/man
|
||||||
SHAREDIR ?= $(PREFIX)/share
|
SHAREDIR ?= $(PREFIX)/share
|
||||||
PYTHON ?= /usr/bin/env python
|
PYTHON ?= /usr/bin/env python3
|
||||||
|
|
||||||
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
|
# set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local
|
||||||
SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi)
|
SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi)
|
||||||
|
|
58
README.md
58
README.md
|
@ -14,30 +14,84 @@ A Microsoft GitHub mirror exists as well: https://github.com/haruhi-dl/haruhi-dl
|
||||||
|
|
||||||
## Installing
|
## Installing
|
||||||
|
|
||||||
haruhi-dl is available on PyPI: [![version on PyPI](https://img.shields.io/pypi/v/haruhi-dl?style=flat-square)](https://pypi.org/project/haruhi-dl/)
|
System-specific ways:
|
||||||
|
|
||||||
|
- [Windows .exe files](https://git.sakamoto.pl/laudompat/haruhi-dl/-/releases) ([mirror](https://github.com/haruhi-dl/haruhi-dl/releases)) - just unpack and run the exe file in cmd/powershell! (ffmpeg/rtmpdump not included, playwright extractors won't work)
|
||||||
|
- [Arch Linux (AUR)](https://aur.archlinux.org/packages/haruhi-dl/) - `yay -S haruhi-dl` (managed by mlunax)
|
||||||
|
- [macOS (homebrew)](https://formulae.brew.sh/formula/haruhi-dl) - `brew install haruhi-dl` (managed by Homebrew)
|
||||||
|
|
||||||
|
haruhi-dl is also available on PyPI: [![version on PyPI](https://img.shields.io/pypi/v/haruhi-dl?style=flat-square)](https://pypi.org/project/haruhi-dl/)
|
||||||
|
|
||||||
Install release from PyPI on Python 3.x:
|
Install release from PyPI on Python 3.x:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
$ python3 -m pip install --upgrade haruhi-dl
|
$ python3 -m pip install --upgrade haruhi-dl
|
||||||
```
|
```
|
||||||
|
|
||||||
Install from master (unstable) on Python 3.x:
|
Install from master (unstable) on Python 3.x:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
$ python3 -m pip install --upgrade git+https://git.sakamoto.pl/laudompat/haruhi-dl.git
|
$ python3 -m pip install --upgrade git+https://git.sakamoto.pl/laudompat/haruhi-dl.git
|
||||||
```
|
```
|
||||||
|
|
||||||
**Python 2 support is dropped and we recommend to switch to Python 3**, though it may still work.
|
**Python 2 support is dropped, use Python 3.**
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
$ haruhi-dl "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
|
$ haruhi-dl "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
|
||||||
```
|
```
|
||||||
|
|
||||||
That's it! You just got rickrolled!
|
That's it! You just got rickrolled!
|
||||||
|
|
||||||
Full manual with all options:
|
Full manual with all options:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
$ haruhi-dl --help
|
$ haruhi-dl --help
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Differences from youtube-dl
|
||||||
|
|
||||||
|
_This is not a complete list._
|
||||||
|
|
||||||
|
- Changed license from Unlicense to LGPL 3.0
|
||||||
|
- Extracting and downloading video with subtitles from m3u8 (HLS) - this also includes subtitles from Twitter and some other services
|
||||||
|
- Support for BitTorrent protocol (only used when explicitly enabled by user with `--allow-p2p` or `--prefer-p2p`; aria2c required)
|
||||||
|
- Specific way to handle selfhosted services (untied to specific providers/domains, like PeerTube, Funkwhale, Mastodon)
|
||||||
|
- Specific way to handle content proxy sites (like Nitter for Twitter)
|
||||||
|
- Merging formats by codecs instead of file extensions, if possible (you'd rather like your AV1+opus downloads from YouTube to be .webm, than .mkv, don't you?)
|
||||||
|
- New/improved/fixed extractors:
|
||||||
|
- PeerTube (extracting playlists, channels and user accounts, optionally downloading with BitTorrent)
|
||||||
|
- Funkwhale
|
||||||
|
- TikTok (extractors for user profiles, hashtags and music - all except single video and music with `--no-playlist` require Playwright)
|
||||||
|
- cda.pl
|
||||||
|
- Ipla
|
||||||
|
- Weibo (DASH formats)
|
||||||
|
- LinkedIn (videos from user posts)
|
||||||
|
- Acast
|
||||||
|
- Mastodon (including Pleroma, Gab Social, Soapbox)
|
||||||
|
- Ring Publishing (aka PulsEmbed, PulseVideo, OnetMVP; Ringier Axel Springer)
|
||||||
|
- TVP (support for TVPlayer2, client-rendered sites and TVP ABC, refactored some extractors to use mobile JSON API)
|
||||||
|
- TVN24 (support for main page, Fakty and magazine frontend)
|
||||||
|
- PolskieRadio
|
||||||
|
- Agora (wyborcza.pl video, wyborcza.pl/wysokieobcasy.pl/audycje.tokfm.pl podcasts, tuba.fm)
|
||||||
|
- sejm.gov.pl/senat.gov.pl
|
||||||
|
- Some improvements with handling JSON-LD
|
||||||
|
|
||||||
|
## Bug reports
|
||||||
|
|
||||||
|
Please send the bug details to <bug@haruhi.download> or on [Microsoft GitHub](https://github.com/haruhi-dl/haruhi-dl/issues).
|
||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
If you want to contribute, send us a diff to <contribute@haruhi.download>, or submit a Pull Request on [our mirror at Microsoft GitHub](https://github.com/haruhi-dl/haruhi-dl).
|
If you want to contribute, send us a diff to <contribute@haruhi.download>, or submit a Pull Request on [our mirror at Microsoft GitHub](https://github.com/haruhi-dl/haruhi-dl).
|
||||||
|
|
||||||
|
Why contribute to this fork, and not youtube-dl?
|
||||||
|
|
||||||
|
- You make sure your contributions will always be free - under Unlicense, anyone can take your code, modify it, and close the source. LGPL 3.0 makes it clear, that any contributions must be published.
|
||||||
|
|
||||||
|
## Donations
|
||||||
|
|
||||||
|
If my contributions helped you, please consider sending me a small tip.
|
||||||
|
|
||||||
|
[![Buy Me a Coffee at ko-fi.com](https://cdn.ko-fi.com/cdn/kofi1.png?v=2)](https://ko-fi.com/selfisekai)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
data="$(curl -s "https://www.youtube.com/s/player/$1/player_ias.vflset/en_GB/base.js")"
|
data="$(curl -s "https://www.youtube.com/s/player/$1/player_ias.vflset/en_GB/base.js")"
|
||||||
func="$(grep -P '[a-z]\=a\.split.*a\.join' <<< "$data")"
|
func="$(grep -P '[a-z]\=a\.split\([\"'"'"']{2}.*a\.join' <<< "$data")"
|
||||||
echo "full extracted function: $func"
|
echo "full extracted function: $func"
|
||||||
|
|
||||||
obfuscatedName="$(grep -Poh '\(""\);[A-Za-z]+' <<< "$func" | sed -s 's/("");//')"
|
obfuscatedName="$(grep -Poh '\(""\);[A-Za-z]+' <<< "$func" | sed -s 's/("");//')"
|
||||||
|
|
|
@ -5,6 +5,17 @@
|
||||||
module.exports = function patchHook(patchContent) {
|
module.exports = function patchHook(patchContent) {
|
||||||
[
|
[
|
||||||
[/(?:youtube-|yt-?)dl\.org/g, 'haruhi.download'],
|
[/(?:youtube-|yt-?)dl\.org/g, 'haruhi.download'],
|
||||||
|
|
||||||
|
// fork: https://github.com/blackjack4494/yt-dlc
|
||||||
|
[/youtube_dlc/g, 'haruhi_dl'],
|
||||||
|
[/youtube-dlc/g, 'haruhi-dl'],
|
||||||
|
[/ytdlc/g, 'hdl'],
|
||||||
|
[/yt-dlc/g, 'hdl'],
|
||||||
|
// fork: https://github.com/yt-dlp/yt-dlp
|
||||||
|
[/yt_dlp/g, 'haruhi_dl'],
|
||||||
|
[/yt-dlp/g, 'haruhi-dl'],
|
||||||
|
[/ytdlp/g, 'hdl'],
|
||||||
|
|
||||||
[/youtube_dl/g, 'haruhi_dl'],
|
[/youtube_dl/g, 'haruhi_dl'],
|
||||||
[/youtube-dl/g, 'haruhi-dl'],
|
[/youtube-dl/g, 'haruhi-dl'],
|
||||||
[/youtubedl/g, 'haruhidl'],
|
[/youtubedl/g, 'haruhidl'],
|
||||||
|
@ -14,8 +25,10 @@ module.exports = function patchHook(patchContent) {
|
||||||
[/ydl/g, 'hdl'],
|
[/ydl/g, 'hdl'],
|
||||||
|
|
||||||
// prevent from linking to non-existent repository
|
// prevent from linking to non-existent repository
|
||||||
[/github\.com\/ytdl-org\/haruhi-dl/g, 'github.com/ytdl-org/youtube-dl'],
|
[/github\.com\/(?:yt|h)dl-org\/haruhi-dl/g, 'github.com/ytdl-org/youtube-dl'],
|
||||||
[/github\.com\/rg3\/haruhi-dl/g, 'github.com/ytdl-org/youtube-dl'],
|
[/github\.com\/rg3\/haruhi-dl/g, 'github.com/ytdl-org/youtube-dl'],
|
||||||
|
[/github\.com\/blackjack4494\/hdl/g, 'github.com/blackjack4494/yt-dlc'],
|
||||||
|
[/github\.com\/hdl\/hdl/g, 'github.com/yt-dlp/yt-dlp'],
|
||||||
// prevent changing the smuggle URLs (for compatibility with ytdl)
|
// prevent changing the smuggle URLs (for compatibility with ytdl)
|
||||||
[/__haruhidl_smuggle/g, '__youtubedl_smuggle'],
|
[/__haruhidl_smuggle/g, '__youtubedl_smuggle'],
|
||||||
].forEach(([regex, replacement]) => patchContent = patchContent.replace(regex, replacement));
|
].forEach(([regex, replacement]) => patchContent = patchContent.replace(regex, replacement));
|
||||||
|
|
32
devscripts/prerelease_codegen.py
Normal file
32
devscripts/prerelease_codegen.py
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
# this is intended to speed-up some extractors,
|
||||||
|
# which sometimes need to extract some data that doesn't change very much often,
|
||||||
|
# but it does on random times, like youtube's signature "crypto" or soundcloud's client id
|
||||||
|
|
||||||
|
import os
|
||||||
|
from os.path import dirname as dirn
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
|
||||||
|
|
||||||
|
from haruhi_dl import HaruhiDL
|
||||||
|
from haruhi_dl.utils import (
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
hdl = HaruhiDL(params={
|
||||||
|
'quiet': True,
|
||||||
|
})
|
||||||
|
artifact_dir = os.path.join(dirn(dirn((os.path.abspath(__file__)))), 'haruhi_dl', 'extractor_artifacts')
|
||||||
|
if not os.path.exists(artifact_dir):
|
||||||
|
os.mkdir(artifact_dir)
|
||||||
|
|
||||||
|
for ie_name in (
|
||||||
|
'Youtube',
|
||||||
|
'Soundcloud',
|
||||||
|
):
|
||||||
|
ie = hdl.get_info_extractor(ie_name)
|
||||||
|
try:
|
||||||
|
file_contents = ie._generate_prerelease_file()
|
||||||
|
with open(os.path.join(artifact_dir, ie_name.lower() + '.py'), 'w') as file:
|
||||||
|
file.write(file_contents)
|
||||||
|
except ExtractorError as err:
|
||||||
|
print(err)
|
|
@ -1,141 +1,24 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# IMPORTANT: the following assumptions are made
|
if [[ "$(basename $(pwd))" == 'devscripts' ]]; then
|
||||||
# * the GH repo is on the origin remote
|
cd ..
|
||||||
# * the gh-pages branch is named so locally
|
|
||||||
# * the git config user.signingkey is properly set
|
|
||||||
|
|
||||||
# You will need
|
|
||||||
# pip install coverage nose rsa wheel
|
|
||||||
|
|
||||||
# TODO
|
|
||||||
# release notes
|
|
||||||
# make hash on local files
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
skip_tests=true
|
|
||||||
gpg_sign_commits=""
|
|
||||||
buildserver='localhost:8142'
|
|
||||||
|
|
||||||
while true
|
|
||||||
do
|
|
||||||
case "$1" in
|
|
||||||
--run-tests)
|
|
||||||
skip_tests=false
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
--gpg-sign-commits|-S)
|
|
||||||
gpg_sign_commits="-S"
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
--buildserver)
|
|
||||||
buildserver="$2"
|
|
||||||
shift 2
|
|
||||||
;;
|
|
||||||
--*)
|
|
||||||
echo "ERROR: unknown option $1"
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
break
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
if [ -z "$1" ]; then echo "ERROR: specify version number like this: $0 1994.09.06"; exit 1; fi
|
|
||||||
version="$1"
|
|
||||||
major_version=$(echo "$version" | sed -n 's#^\([0-9]*\.[0-9]*\.[0-9]*\).*#\1#p')
|
|
||||||
if test "$major_version" '!=' "$(date '+%Y.%m.%d')"; then
|
|
||||||
echo "$version does not start with today's date!"
|
|
||||||
exit 1
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already present'; exit 1; fi
|
v="$(date "+%Y.%m.%d")"
|
||||||
if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi
|
|
||||||
useless_files=$(find haruhi_dl -type f -not -name '*.py')
|
|
||||||
if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in haruhi_dl: $useless_files"; exit 1; fi
|
|
||||||
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
|
|
||||||
if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi
|
|
||||||
if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
|
|
||||||
if ! python3 -c 'import wheel' 2>/dev/null; then echo 'ERROR: wheel is missing'; exit 1; fi
|
|
||||||
|
|
||||||
read -p "Is ChangeLog up to date? (y/n) " -n 1
|
if [[ "$(grep "'$v" haruhi_dl/version.py)" != '' ]]; then #' is this the first release of the day?
|
||||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
|
if [[ "$(grep -Poh '[0-9]{4}\.[0-9]{2}\.[0-9]{2}\.[0-9]' haruhi_dl/version.py)" != '' ]]; then # so, 2nd or nth?
|
||||||
|
v="$v.$(($(cat haruhi_dl/version.py | grep -Poh '[0-9]{4}\.[0-9]{2}\.[0-9]{2}\.[0-9]' | grep -Poh '[0-9]+$')+1))"
|
||||||
/bin/echo -e "\n### First of all, testing..."
|
else
|
||||||
make clean
|
v="$v.1"
|
||||||
if $skip_tests ; then
|
fi
|
||||||
echo 'SKIPPING TESTS'
|
|
||||||
else
|
|
||||||
nosetests --verbose --with-coverage --cover-package=haruhi_dl --cover-html test --stop || exit 1
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
/bin/echo -e "\n### Changing version in version.py..."
|
sed "s/__version__ = '.*'/__version__ = '$v'/g" -i haruhi_dl/version.py
|
||||||
sed -i "s/__version__ = '.*'/__version__ = '$version'/" haruhi_dl/version.py
|
|
||||||
|
|
||||||
/bin/echo -e "\n### Changing version in ChangeLog..."
|
python3 setup.py build_lazy_extractors
|
||||||
sed -i "s/<unreleased>/$version/" ChangeLog
|
python3 devscripts/prerelease_codegen.py
|
||||||
|
rm -R build dist
|
||||||
/bin/echo -e "\n### Committing documentation, templates and haruhi_dl/version.py..."
|
python3 setup.py sdist bdist_wheel
|
||||||
make README.md CONTRIBUTING.md issuetemplates supportedsites
|
python3 -m twine upload dist/*
|
||||||
git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md .github/ISSUE_TEMPLATE/6_question.md docs/supportedsites.md haruhi_dl/version.py ChangeLog
|
devscripts/wine-py2exe.sh setup.py
|
||||||
git commit $gpg_sign_commits -m "release $version"
|
|
||||||
|
|
||||||
/bin/echo -e "\n### Now tagging, signing and pushing..."
|
|
||||||
git tag -s -m "Release $version" "$version"
|
|
||||||
git show "$version"
|
|
||||||
read -p "Is it good, can I push? (y/n) " -n 1
|
|
||||||
if [[ ! $REPLY =~ ^[Yy]$ ]]; then exit 1; fi
|
|
||||||
echo
|
|
||||||
MASTER=$(git rev-parse --abbrev-ref HEAD)
|
|
||||||
git push origin $MASTER:master
|
|
||||||
git push origin "$version"
|
|
||||||
|
|
||||||
/bin/echo -e "\n### OK, now it is time to build the binaries..."
|
|
||||||
REV=$(git rev-parse HEAD)
|
|
||||||
make haruhi-dl haruhi-dl.tar.gz
|
|
||||||
read -p "VM running? (y/n) " -n 1
|
|
||||||
wget "http://$buildserver/build/ytdl-org/haruhi-dl/haruhi-dl.exe?rev=$REV" -O haruhi-dl.exe
|
|
||||||
mkdir -p "build/$version"
|
|
||||||
mv haruhi-dl haruhi-dl.exe "build/$version"
|
|
||||||
mv haruhi-dl.tar.gz "build/$version/haruhi-dl-$version.tar.gz"
|
|
||||||
RELEASE_FILES="haruhi-dl haruhi-dl.exe haruhi-dl-$version.tar.gz"
|
|
||||||
(cd build/$version/ && md5sum $RELEASE_FILES > MD5SUMS)
|
|
||||||
(cd build/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS)
|
|
||||||
(cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
|
|
||||||
(cd build/$version/ && sha512sum $RELEASE_FILES > SHA2-512SUMS)
|
|
||||||
|
|
||||||
/bin/echo -e "\n### Signing and uploading the new binaries to GitHub..."
|
|
||||||
for f in $RELEASE_FILES; do gpg --passphrase-repeat 5 --detach-sig "build/$version/$f"; done
|
|
||||||
|
|
||||||
ROOT=$(pwd)
|
|
||||||
python devscripts/create-github-release.py ChangeLog $version "$ROOT/build/$version"
|
|
||||||
|
|
||||||
#ssh ytdl@yt-dl.org "sh html/update_latest.sh $version"
|
|
||||||
|
|
||||||
/bin/echo -e "\n### Now switching to gh-pages..."
|
|
||||||
git clone --branch gh-pages --single-branch . build/gh-pages
|
|
||||||
(
|
|
||||||
set -e
|
|
||||||
ORIGIN_URL=$(git config --get remote.origin.url)
|
|
||||||
cd build/gh-pages
|
|
||||||
"$ROOT/devscripts/gh-pages/add-version.py" $version
|
|
||||||
"$ROOT/devscripts/gh-pages/update-feed.py"
|
|
||||||
"$ROOT/devscripts/gh-pages/sign-versions.py" < "$ROOT/updates_key.pem"
|
|
||||||
"$ROOT/devscripts/gh-pages/generate-download.py"
|
|
||||||
"$ROOT/devscripts/gh-pages/update-copyright.py"
|
|
||||||
"$ROOT/devscripts/gh-pages/update-sites.py"
|
|
||||||
git add *.html *.html.in update
|
|
||||||
git commit $gpg_sign_commits -m "release $version"
|
|
||||||
git push "$ROOT" gh-pages
|
|
||||||
git push "$ORIGIN_URL" gh-pages
|
|
||||||
)
|
|
||||||
rm -rf build
|
|
||||||
|
|
||||||
make pypi-files
|
|
||||||
echo "Uploading to PyPi ..."
|
|
||||||
python setup.py sdist bdist_wheel upload
|
|
||||||
make clean
|
|
||||||
|
|
||||||
/bin/echo -e "\n### DONE!"
|
|
||||||
|
|
|
@ -2,7 +2,8 @@
|
||||||
|
|
||||||
# Run with as parameter a setup.py that works in the current directory
|
# Run with as parameter a setup.py that works in the current directory
|
||||||
# e.g. no os.chdir()
|
# e.g. no os.chdir()
|
||||||
# It will run twice, the first time will crash
|
|
||||||
|
# Wine >=6.3 required: https://bugs.winehq.org/show_bug.cgi?id=3591
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
|
@ -10,36 +11,30 @@ SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
|
||||||
|
|
||||||
if [ ! -d wine-py2exe ]; then
|
if [ ! -d wine-py2exe ]; then
|
||||||
|
|
||||||
sudo apt-get install wine1.3 axel bsdiff
|
|
||||||
|
|
||||||
mkdir wine-py2exe
|
mkdir wine-py2exe
|
||||||
cd wine-py2exe
|
cd wine-py2exe
|
||||||
export WINEPREFIX=`pwd`
|
export WINEPREFIX=`pwd`
|
||||||
|
|
||||||
axel -a "http://www.python.org/ftp/python/2.7/python-2.7.msi"
|
echo "Downloading Python 3.8.8"
|
||||||
axel -a "http://downloads.sourceforge.net/project/py2exe/py2exe/0.6.9/py2exe-0.6.9.win32-py2.7.exe"
|
aria2c "https://www.python.org/ftp/python/3.8.8/python-3.8.8.exe"
|
||||||
#axel -a "http://winetricks.org/winetricks"
|
|
||||||
|
# this will need to be upgraded when switching to a newer version of python
|
||||||
|
winetricks win7
|
||||||
|
|
||||||
# http://appdb.winehq.org/objectManager.php?sClass=version&iId=21957
|
# http://appdb.winehq.org/objectManager.php?sClass=version&iId=21957
|
||||||
echo "Follow python setup on screen"
|
echo "Installing Python 3.8.8"
|
||||||
wine msiexec /i python-2.7.msi
|
wine python-3.8.8.exe /quiet InstallAllUsers=1 'DefaultAllUsersTargetDir=C:\\python38'
|
||||||
|
|
||||||
echo "Follow py2exe setup on screen"
|
echo "Installing py2exe"
|
||||||
wine py2exe-0.6.9.win32-py2.7.exe
|
wine 'C:\\python38\\python.exe' -m pip install wheel
|
||||||
|
wine 'C:\\python38\\python.exe' -m pip install py2exe
|
||||||
|
#wine 'C:\\python38\\python.exe' -m pip install playwright===1.9.0
|
||||||
|
#wine 'C:\\python38\\python.exe' -m playwright install
|
||||||
|
|
||||||
#echo "Follow Microsoft Visual C++ 2008 Redistributable Package setup on screen"
|
#echo "Follow Microsoft Visual C++ 2008 Redistributable Package setup on screen"
|
||||||
#bash winetricks vcrun2008
|
#bash winetricks vcrun2008
|
||||||
|
|
||||||
rm py2exe-0.6.9.win32-py2.7.exe
|
rm python-3.8.8.exe
|
||||||
rm python-2.7.msi
|
|
||||||
#rm winetricks
|
|
||||||
|
|
||||||
# http://bugs.winehq.org/show_bug.cgi?id=3591
|
|
||||||
|
|
||||||
mv drive_c/Python27/Lib/site-packages/py2exe/run.exe drive_c/Python27/Lib/site-packages/py2exe/run.exe.backup
|
|
||||||
bspatch drive_c/Python27/Lib/site-packages/py2exe/run.exe.backup drive_c/Python27/Lib/site-packages/py2exe/run.exe "$SCRIPT_DIR/SizeOfImage.patch"
|
|
||||||
mv drive_c/Python27/Lib/site-packages/py2exe/run_w.exe drive_c/Python27/Lib/site-packages/py2exe/run_w.exe.backup
|
|
||||||
bspatch drive_c/Python27/Lib/site-packages/py2exe/run_w.exe.backup drive_c/Python27/Lib/site-packages/py2exe/run_w.exe "$SCRIPT_DIR/SizeOfImage_w.patch"
|
|
||||||
|
|
||||||
cd -
|
cd -
|
||||||
|
|
||||||
|
@ -49,8 +44,8 @@ else
|
||||||
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
wine "C:\\Python27\\python.exe" "$1" py2exe > "py2exe.log" 2>&1 || true
|
mkdir -p build/bdist.win32/winexe/bundle-3.8/
|
||||||
echo '# Copying python27.dll' >> "py2exe.log"
|
# cp "$WINEPREFIX/drive_c/python38/python38.dll" build/bdist.win32/winexe/bundle-3.8/
|
||||||
cp "$WINEPREFIX/drive_c/windows/system32/python27.dll" build/bdist.win32/winexe/bundle-2.7/
|
echo "Making the exe file"
|
||||||
wine "C:\\Python27\\python.exe" "$1" py2exe >> "py2exe.log" 2>&1
|
# cannot be piped into a file: https://forum.winehq.org/viewtopic.php?t=33992
|
||||||
|
wine 'C:\\python38\\python.exe' "$1" py2exe | tee py2exe.log
|
||||||
|
|
|
@ -60,6 +60,7 @@ from .utils import (
|
||||||
format_bytes,
|
format_bytes,
|
||||||
formatSeconds,
|
formatSeconds,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
|
HaruhiDLError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
ISO3166Utils,
|
ISO3166Utils,
|
||||||
locked_file,
|
locked_file,
|
||||||
|
@ -777,22 +778,38 @@ class HaruhiDL(object):
|
||||||
|
|
||||||
def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
def extract_info(self, url, download=True, ie_key=None, extra_info={},
|
||||||
process=True, force_generic_extractor=False):
|
process=True, force_generic_extractor=False):
|
||||||
'''
|
"""
|
||||||
Returns a list with a dictionary for each video we find.
|
Return a list with a dictionary for each video extracted.
|
||||||
If 'download', also downloads the videos.
|
|
||||||
extra_info is a dict containing the extra values to add to each result
|
Arguments:
|
||||||
'''
|
url -- URL to extract
|
||||||
|
|
||||||
|
Keyword arguments:
|
||||||
|
download -- whether to download videos during extraction
|
||||||
|
ie_key -- extractor key hint
|
||||||
|
extra_info -- dictionary containing the extra values to add to each result
|
||||||
|
process -- whether to resolve all unresolved references (URLs, playlist items),
|
||||||
|
must be True for download to work.
|
||||||
|
force_generic_extractor -- force using the generic extractor
|
||||||
|
"""
|
||||||
|
|
||||||
if not ie_key and force_generic_extractor:
|
if not ie_key and force_generic_extractor:
|
||||||
ie_key = 'Generic'
|
ie_key = 'Generic'
|
||||||
|
|
||||||
|
force_use_mastodon = self.params.get('force_use_mastodon')
|
||||||
|
if not ie_key and force_use_mastodon:
|
||||||
|
ie_key = 'MastodonSH'
|
||||||
|
|
||||||
|
if not ie_key:
|
||||||
|
ie_key = self.params.get('ie_key')
|
||||||
|
|
||||||
if ie_key:
|
if ie_key:
|
||||||
ies = [self.get_info_extractor(ie_key)]
|
ies = [self.get_info_extractor(ie_key)]
|
||||||
else:
|
else:
|
||||||
ies = self._ies
|
ies = self._ies
|
||||||
|
|
||||||
for ie in ies:
|
for ie in ies:
|
||||||
if not ie.suitable(url):
|
if not force_use_mastodon and not ie.suitable(url):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
ie = self.get_info_extractor(ie.ie_key())
|
ie = self.get_info_extractor(ie.ie_key())
|
||||||
|
@ -906,7 +923,7 @@ class HaruhiDL(object):
|
||||||
# url_transparent. In such cases outer metadata (from ie_result)
|
# url_transparent. In such cases outer metadata (from ie_result)
|
||||||
# should be propagated to inner one (info). For this to happen
|
# should be propagated to inner one (info). For this to happen
|
||||||
# _type of info should be overridden with url_transparent. This
|
# _type of info should be overridden with url_transparent. This
|
||||||
# fixes issue from https://github.com/ytdl-org/haruhi-dl/pull/11163.
|
# fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
|
||||||
if new_result.get('_type') == 'url':
|
if new_result.get('_type') == 'url':
|
||||||
new_result['_type'] = 'url_transparent'
|
new_result['_type'] = 'url_transparent'
|
||||||
|
|
||||||
|
@ -914,7 +931,7 @@ class HaruhiDL(object):
|
||||||
new_result, download=download, extra_info=extra_info)
|
new_result, download=download, extra_info=extra_info)
|
||||||
elif result_type in ('playlist', 'multi_video'):
|
elif result_type in ('playlist', 'multi_video'):
|
||||||
# Protect from infinite recursion due to recursively nested playlists
|
# Protect from infinite recursion due to recursively nested playlists
|
||||||
# (see https://github.com/hdl-org/haruhi-dl/issues/27833)
|
# (see https://github.com/ytdl-org/youtube-dl/issues/27833)
|
||||||
webpage_url = ie_result['webpage_url']
|
webpage_url = ie_result['webpage_url']
|
||||||
if webpage_url in self._playlist_urls:
|
if webpage_url in self._playlist_urls:
|
||||||
self.to_screen(
|
self.to_screen(
|
||||||
|
@ -1515,14 +1532,18 @@ class HaruhiDL(object):
|
||||||
if 'display_id' not in info_dict and 'id' in info_dict:
|
if 'display_id' not in info_dict and 'id' in info_dict:
|
||||||
info_dict['display_id'] = info_dict['id']
|
info_dict['display_id'] = info_dict['id']
|
||||||
|
|
||||||
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
|
for ts_key, date_key in (
|
||||||
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
('timestamp', 'upload_date'),
|
||||||
# see http://bugs.python.org/issue1646728)
|
('release_timestamp', 'release_date'),
|
||||||
try:
|
):
|
||||||
upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
|
if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
|
||||||
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
|
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
|
||||||
except (ValueError, OverflowError, OSError):
|
# see http://bugs.python.org/issue1646728)
|
||||||
pass
|
try:
|
||||||
|
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
|
||||||
|
info_dict[date_key] = upload_date.strftime('%Y%m%d')
|
||||||
|
except (ValueError, OverflowError, OSError):
|
||||||
|
pass
|
||||||
|
|
||||||
# Auto generate title fields corresponding to the *_number fields when missing
|
# Auto generate title fields corresponding to the *_number fields when missing
|
||||||
# in order to always have clean titles. This is very common for TV series.
|
# in order to always have clean titles. This is very common for TV series.
|
||||||
|
@ -1530,6 +1551,19 @@ class HaruhiDL(object):
|
||||||
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
|
||||||
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
|
||||||
|
|
||||||
|
# Some fragmented media manifests like m3u8 allow embedding subtitles
|
||||||
|
# This is a weird hack to provide these subtitles to users without a very huge refactor of extractors
|
||||||
|
if 'formats' in info_dict:
|
||||||
|
formats_subtitles = list(filter(lambda x: x.get('_subtitle'), info_dict['formats']))
|
||||||
|
if formats_subtitles:
|
||||||
|
info_dict.setdefault('subtitles', {})
|
||||||
|
for sub in formats_subtitles:
|
||||||
|
if sub['_key'] not in info_dict['subtitles']:
|
||||||
|
info_dict['subtitles'][sub['_key']] = []
|
||||||
|
info_dict['subtitles'][sub['_key']].append(sub['_subtitle'])
|
||||||
|
# remove these subtitles from formats now
|
||||||
|
info_dict['formats'] = list(filter(lambda x: '_subtitle' not in x, info_dict['formats']))
|
||||||
|
|
||||||
for cc_kind in ('subtitles', 'automatic_captions'):
|
for cc_kind in ('subtitles', 'automatic_captions'):
|
||||||
cc = info_dict.get(cc_kind)
|
cc = info_dict.get(cc_kind)
|
||||||
if cc:
|
if cc:
|
||||||
|
@ -1537,6 +1571,12 @@ class HaruhiDL(object):
|
||||||
for subtitle_format in subtitle:
|
for subtitle_format in subtitle:
|
||||||
if subtitle_format.get('url'):
|
if subtitle_format.get('url'):
|
||||||
subtitle_format['url'] = sanitize_url(subtitle_format['url'])
|
subtitle_format['url'] = sanitize_url(subtitle_format['url'])
|
||||||
|
if subtitle_format.get('protocol') is None:
|
||||||
|
subtitle_format['protocol'] = determine_protocol(subtitle_format)
|
||||||
|
if subtitle_format.get('http_headers') is None:
|
||||||
|
full_info = info_dict.copy()
|
||||||
|
full_info.update(subtitle_format)
|
||||||
|
subtitle_format['http_headers'] = self._calc_headers(full_info)
|
||||||
if subtitle_format.get('ext') is None:
|
if subtitle_format.get('ext') is None:
|
||||||
subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
|
subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
|
||||||
|
|
||||||
|
@ -1649,7 +1689,7 @@ class HaruhiDL(object):
|
||||||
# by extractor are incomplete or not (i.e. whether extractor provides only
|
# by extractor are incomplete or not (i.e. whether extractor provides only
|
||||||
# video-only or audio-only formats) for proper formats selection for
|
# video-only or audio-only formats) for proper formats selection for
|
||||||
# extractors with such incomplete formats (see
|
# extractors with such incomplete formats (see
|
||||||
# https://github.com/ytdl-org/haruhi-dl/pull/5556).
|
# https://github.com/ytdl-org/youtube-dl/pull/5556).
|
||||||
# Since formats may be filtered during format selection and may not match
|
# Since formats may be filtered during format selection and may not match
|
||||||
# the original formats the results may be incorrect. Thus original formats
|
# the original formats the results may be incorrect. Thus original formats
|
||||||
# or pre-calculated metrics should be passed to format selection routines
|
# or pre-calculated metrics should be passed to format selection routines
|
||||||
|
@ -1657,7 +1697,7 @@ class HaruhiDL(object):
|
||||||
# We will pass a context object containing all necessary additional data
|
# We will pass a context object containing all necessary additional data
|
||||||
# instead of just formats.
|
# instead of just formats.
|
||||||
# This fixes incorrect format selection issue (see
|
# This fixes incorrect format selection issue (see
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/10083).
|
# https://github.com/ytdl-org/youtube-dl/issues/10083).
|
||||||
incomplete_formats = (
|
incomplete_formats = (
|
||||||
# All formats are video-only or
|
# All formats are video-only or
|
||||||
all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
|
all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
|
||||||
|
@ -1853,7 +1893,6 @@ class HaruhiDL(object):
|
||||||
# subtitles download errors are already managed as troubles in relevant IE
|
# subtitles download errors are already managed as troubles in relevant IE
|
||||||
# that way it will silently go on when used with unsupporting IE
|
# that way it will silently go on when used with unsupporting IE
|
||||||
subtitles = info_dict['requested_subtitles']
|
subtitles = info_dict['requested_subtitles']
|
||||||
ie = self.get_info_extractor(info_dict['extractor_key'])
|
|
||||||
for sub_lang, sub_info in subtitles.items():
|
for sub_lang, sub_info in subtitles.items():
|
||||||
sub_format = sub_info['ext']
|
sub_format = sub_info['ext']
|
||||||
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
|
sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
|
||||||
|
@ -1864,7 +1903,7 @@ class HaruhiDL(object):
|
||||||
if sub_info.get('data') is not None:
|
if sub_info.get('data') is not None:
|
||||||
try:
|
try:
|
||||||
# Use newline='' to prevent conversion of newline characters
|
# Use newline='' to prevent conversion of newline characters
|
||||||
# See https://github.com/ytdl-org/haruhi-dl/issues/10268
|
# See https://github.com/ytdl-org/youtube-dl/issues/10268
|
||||||
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
|
||||||
subfile.write(sub_info['data'])
|
subfile.write(sub_info['data'])
|
||||||
except (OSError, IOError):
|
except (OSError, IOError):
|
||||||
|
@ -1872,10 +1911,8 @@ class HaruhiDL(object):
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
sub_data = ie._request_webpage(
|
subd = get_suitable_downloader(sub_info, self.params)(self, self.params)
|
||||||
sub_info['url'], info_dict['id'], note=False).read()
|
subd.download(sub_filename, sub_info)
|
||||||
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
|
|
||||||
subfile.write(sub_data)
|
|
||||||
except (ExtractorError, IOError, OSError, ValueError) as err:
|
except (ExtractorError, IOError, OSError, ValueError) as err:
|
||||||
self.report_warning('Unable to download subtitle for "%s": %s' %
|
self.report_warning('Unable to download subtitle for "%s": %s' %
|
||||||
(sub_lang, error_to_compat_str(err)))
|
(sub_lang, error_to_compat_str(err)))
|
||||||
|
@ -1903,6 +1940,10 @@ class HaruhiDL(object):
|
||||||
fd.add_progress_hook(ph)
|
fd.add_progress_hook(ph)
|
||||||
if self.params.get('verbose'):
|
if self.params.get('verbose'):
|
||||||
self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
|
self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
|
||||||
|
if info.get('protocol') == 'bittorrent' and not self.params.get('allow_p2p'):
|
||||||
|
raise HaruhiDLError('Peer-to-peer format got selected, but peer-to-peer '
|
||||||
|
'downloads are not allowed. '
|
||||||
|
'Choose different format or add --allow-p2p option')
|
||||||
return fd.download(name, info)
|
return fd.download(name, info)
|
||||||
|
|
||||||
if info_dict.get('requested_formats') is not None:
|
if info_dict.get('requested_formats') is not None:
|
||||||
|
@ -1919,8 +1960,32 @@ class HaruhiDL(object):
|
||||||
|
|
||||||
def compatible_formats(formats):
|
def compatible_formats(formats):
|
||||||
video, audio = formats
|
video, audio = formats
|
||||||
# Check extension
|
# Check extensions and codecs
|
||||||
video_ext, audio_ext = video.get('ext'), audio.get('ext')
|
video_ext, audio_ext = video.get('ext'), audio.get('ext')
|
||||||
|
video_codec, audio_codec = video.get('vcodec'), audio.get('acodec')
|
||||||
|
|
||||||
|
if video_codec and audio_codec:
|
||||||
|
COMPATIBLE_CODECS = {
|
||||||
|
'mp4': (
|
||||||
|
# fourcc (m3u8, mpd)
|
||||||
|
'av01', 'hevc', 'avc1', 'mp4a',
|
||||||
|
# whatever the ism does
|
||||||
|
'h264', 'aacl',
|
||||||
|
),
|
||||||
|
'webm': (
|
||||||
|
'av01', 'vp9', 'vp8', 'opus', 'vrbs',
|
||||||
|
# these are in the webm spec, so putting it here to be sure
|
||||||
|
'vp9x', 'vp8x',
|
||||||
|
),
|
||||||
|
}
|
||||||
|
video_codec = video_codec[:4].lower()
|
||||||
|
audio_codec = audio_codec[:4].lower()
|
||||||
|
for ext in COMPATIBLE_CODECS:
|
||||||
|
if all(codec in COMPATIBLE_CODECS[ext]
|
||||||
|
for codec in (video_codec, audio_codec)):
|
||||||
|
info_dict['ext'] = ext
|
||||||
|
return True
|
||||||
|
|
||||||
if video_ext and audio_ext:
|
if video_ext and audio_ext:
|
||||||
COMPATIBLE_EXTS = (
|
COMPATIBLE_EXTS = (
|
||||||
('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
|
('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
|
||||||
|
@ -1929,7 +1994,6 @@ class HaruhiDL(object):
|
||||||
for exts in COMPATIBLE_EXTS:
|
for exts in COMPATIBLE_EXTS:
|
||||||
if video_ext in exts and audio_ext in exts:
|
if video_ext in exts and audio_ext in exts:
|
||||||
return True
|
return True
|
||||||
# TODO: Check acodec/vcodec
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
filename_real_ext = os.path.splitext(filename)[1][1:]
|
filename_real_ext = os.path.splitext(filename)[1][1:]
|
||||||
|
@ -2283,7 +2347,7 @@ class HaruhiDL(object):
|
||||||
return
|
return
|
||||||
|
|
||||||
if type('') is not compat_str:
|
if type('') is not compat_str:
|
||||||
# Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/haruhi-dl/issues/3326)
|
# Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
|
||||||
self.report_warning(
|
self.report_warning(
|
||||||
'Your Python is broken! Update to a newer and supported version')
|
'Your Python is broken! Update to a newer and supported version')
|
||||||
|
|
||||||
|
@ -2377,7 +2441,7 @@ class HaruhiDL(object):
|
||||||
proxies = {'http': opts_proxy, 'https': opts_proxy}
|
proxies = {'http': opts_proxy, 'https': opts_proxy}
|
||||||
else:
|
else:
|
||||||
proxies = compat_urllib_request.getproxies()
|
proxies = compat_urllib_request.getproxies()
|
||||||
# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/haruhi-dl/issues/805)
|
# Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
|
||||||
if 'http' in proxies and 'https' not in proxies:
|
if 'http' in proxies and 'https' not in proxies:
|
||||||
proxies['https'] = proxies['http']
|
proxies['https'] = proxies['http']
|
||||||
proxy_handler = PerRequestProxyHandler(proxies)
|
proxy_handler = PerRequestProxyHandler(proxies)
|
||||||
|
@ -2391,7 +2455,7 @@ class HaruhiDL(object):
|
||||||
# When passing our own FileHandler instance, build_opener won't add the
|
# When passing our own FileHandler instance, build_opener won't add the
|
||||||
# default FileHandler and allows us to disable the file protocol, which
|
# default FileHandler and allows us to disable the file protocol, which
|
||||||
# can be used for malicious purposes (see
|
# can be used for malicious purposes (see
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/8227)
|
# https://github.com/ytdl-org/youtube-dl/issues/8227)
|
||||||
file_handler = compat_urllib_request.FileHandler()
|
file_handler = compat_urllib_request.FileHandler()
|
||||||
|
|
||||||
def file_open(*args, **kwargs):
|
def file_open(*args, **kwargs):
|
||||||
|
@ -2403,7 +2467,7 @@ class HaruhiDL(object):
|
||||||
|
|
||||||
# Delete the default user-agent header, which would otherwise apply in
|
# Delete the default user-agent header, which would otherwise apply in
|
||||||
# cases where our custom HTTP handler doesn't come into play
|
# cases where our custom HTTP handler doesn't come into play
|
||||||
# (See https://github.com/ytdl-org/haruhi-dl/issues/1309 for details)
|
# (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
|
||||||
opener.addheaders = []
|
opener.addheaders = []
|
||||||
self._opener = opener
|
self._opener = opener
|
||||||
|
|
||||||
|
|
|
@ -50,7 +50,7 @@ from .HaruhiDL import HaruhiDL
|
||||||
def _real_main(argv=None):
|
def _real_main(argv=None):
|
||||||
# Compatibility fixes for Windows
|
# Compatibility fixes for Windows
|
||||||
if sys.platform == 'win32':
|
if sys.platform == 'win32':
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/820
|
# https://github.com/ytdl-org/youtube-dl/issues/820
|
||||||
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
|
codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
|
||||||
|
|
||||||
workaround_optparse_bug9161()
|
workaround_optparse_bug9161()
|
||||||
|
@ -176,6 +176,10 @@ def _real_main(argv=None):
|
||||||
opts.max_sleep_interval = opts.sleep_interval
|
opts.max_sleep_interval = opts.sleep_interval
|
||||||
if opts.ap_mso and opts.ap_mso not in MSO_INFO:
|
if opts.ap_mso and opts.ap_mso not in MSO_INFO:
|
||||||
parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
|
parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
|
||||||
|
if opts.force_generic_extractor and opts.force_use_mastodon:
|
||||||
|
parser.error('force either generic extractor or Mastodon')
|
||||||
|
if opts.force_playwright_browser not in ('firefox', 'chromium', 'webkit', None):
|
||||||
|
parser.error('invalid browser forced, must be on of: firefox, chromium, webkit')
|
||||||
|
|
||||||
def parse_retries(retries):
|
def parse_retries(retries):
|
||||||
if retries in ('inf', 'infinite'):
|
if retries in ('inf', 'infinite'):
|
||||||
|
@ -348,6 +352,8 @@ def _real_main(argv=None):
|
||||||
'restrictfilenames': opts.restrictfilenames,
|
'restrictfilenames': opts.restrictfilenames,
|
||||||
'ignoreerrors': opts.ignoreerrors,
|
'ignoreerrors': opts.ignoreerrors,
|
||||||
'force_generic_extractor': opts.force_generic_extractor,
|
'force_generic_extractor': opts.force_generic_extractor,
|
||||||
|
'force_use_mastodon': opts.force_use_mastodon,
|
||||||
|
'ie_key': opts.ie_key,
|
||||||
'ratelimit': opts.ratelimit,
|
'ratelimit': opts.ratelimit,
|
||||||
'nooverwrites': opts.nooverwrites,
|
'nooverwrites': opts.nooverwrites,
|
||||||
'retries': opts.retries,
|
'retries': opts.retries,
|
||||||
|
@ -420,6 +426,7 @@ def _real_main(argv=None):
|
||||||
'headless_playwright': opts.headless_playwright,
|
'headless_playwright': opts.headless_playwright,
|
||||||
'sleep_interval': opts.sleep_interval,
|
'sleep_interval': opts.sleep_interval,
|
||||||
'max_sleep_interval': opts.max_sleep_interval,
|
'max_sleep_interval': opts.max_sleep_interval,
|
||||||
|
'force_playwright_browser': opts.force_playwright_browser,
|
||||||
'external_downloader': opts.external_downloader,
|
'external_downloader': opts.external_downloader,
|
||||||
'list_thumbnails': opts.list_thumbnails,
|
'list_thumbnails': opts.list_thumbnails,
|
||||||
'playlist_items': opts.playlist_items,
|
'playlist_items': opts.playlist_items,
|
||||||
|
@ -438,6 +445,8 @@ def _real_main(argv=None):
|
||||||
'geo_bypass': opts.geo_bypass,
|
'geo_bypass': opts.geo_bypass,
|
||||||
'geo_bypass_country': opts.geo_bypass_country,
|
'geo_bypass_country': opts.geo_bypass_country,
|
||||||
'geo_bypass_ip_block': opts.geo_bypass_ip_block,
|
'geo_bypass_ip_block': opts.geo_bypass_ip_block,
|
||||||
|
'allow_p2p': opts.allow_p2p if not opts.prefer_p2p else True,
|
||||||
|
'prefer_p2p': opts.prefer_p2p,
|
||||||
# just for deprecation check
|
# just for deprecation check
|
||||||
'autonumber': opts.autonumber if opts.autonumber is True else None,
|
'autonumber': opts.autonumber if opts.autonumber is True else None,
|
||||||
'usetitle': opts.usetitle if opts.usetitle is True else None,
|
'usetitle': opts.usetitle if opts.usetitle is True else None,
|
||||||
|
|
2995
haruhi_dl/compat.py
2995
haruhi_dl/compat.py
File diff suppressed because it is too large
Load diff
|
@ -1,5 +1,18 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from ..utils import (
|
||||||
|
determine_protocol,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_real_downloader(info_dict, protocol=None, *args, **kwargs):
|
||||||
|
info_copy = info_dict.copy()
|
||||||
|
if protocol:
|
||||||
|
info_copy['protocol'] = protocol
|
||||||
|
return get_suitable_downloader(info_copy, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
# Some of these require _get_real_downloader
|
||||||
from .common import FileDownloader
|
from .common import FileDownloader
|
||||||
from .f4m import F4mFD
|
from .f4m import F4mFD
|
||||||
from .hls import HlsFD
|
from .hls import HlsFD
|
||||||
|
@ -8,15 +21,13 @@ from .rtmp import RtmpFD
|
||||||
from .dash import DashSegmentsFD
|
from .dash import DashSegmentsFD
|
||||||
from .rtsp import RtspFD
|
from .rtsp import RtspFD
|
||||||
from .ism import IsmFD
|
from .ism import IsmFD
|
||||||
|
from .niconico import NiconicoDmcFD
|
||||||
from .external import (
|
from .external import (
|
||||||
get_external_downloader,
|
get_external_downloader,
|
||||||
|
Aria2cFD,
|
||||||
FFmpegFD,
|
FFmpegFD,
|
||||||
)
|
)
|
||||||
|
|
||||||
from ..utils import (
|
|
||||||
determine_protocol,
|
|
||||||
)
|
|
||||||
|
|
||||||
PROTOCOL_MAP = {
|
PROTOCOL_MAP = {
|
||||||
'rtmp': RtmpFD,
|
'rtmp': RtmpFD,
|
||||||
'm3u8_native': HlsFD,
|
'm3u8_native': HlsFD,
|
||||||
|
@ -26,6 +37,8 @@ PROTOCOL_MAP = {
|
||||||
'f4m': F4mFD,
|
'f4m': F4mFD,
|
||||||
'http_dash_segments': DashSegmentsFD,
|
'http_dash_segments': DashSegmentsFD,
|
||||||
'ism': IsmFD,
|
'ism': IsmFD,
|
||||||
|
'bittorrent': Aria2cFD,
|
||||||
|
'niconico_dmc': NiconicoDmcFD,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -182,15 +182,16 @@ class Aria2cFD(ExternalFD):
|
||||||
AVAILABLE_OPT = '-v'
|
AVAILABLE_OPT = '-v'
|
||||||
|
|
||||||
def _make_cmd(self, tmpfilename, info_dict):
|
def _make_cmd(self, tmpfilename, info_dict):
|
||||||
cmd = [self.exe, '-c']
|
cmd = [self.exe or 'aria2c', '-c']
|
||||||
cmd += self._configuration_args([
|
cmd += self._configuration_args([
|
||||||
'--min-split-size', '1M', '--max-connection-per-server', '4'])
|
'--min-split-size', '1M', '--max-connection-per-server', '4'])
|
||||||
dn = os.path.dirname(tmpfilename)
|
dn = os.path.dirname(tmpfilename)
|
||||||
if dn:
|
if dn:
|
||||||
cmd += ['--dir', dn]
|
cmd += ['--dir', dn]
|
||||||
cmd += ['--out', os.path.basename(tmpfilename)]
|
cmd += ['--out', os.path.basename(tmpfilename)]
|
||||||
for key, val in info_dict['http_headers'].items():
|
if info_dict['protocol'] != 'bittorrent':
|
||||||
cmd += ['--header', '%s: %s' % (key, val)]
|
for key, val in info_dict['http_headers'].items():
|
||||||
|
cmd += ['--header', '%s: %s' % (key, val)]
|
||||||
cmd += self._option('--interface', 'source_address')
|
cmd += self._option('--interface', 'source_address')
|
||||||
cmd += self._option('--all-proxy', 'proxy')
|
cmd += self._option('--all-proxy', 'proxy')
|
||||||
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
|
||||||
|
@ -240,7 +241,7 @@ class FFmpegFD(ExternalFD):
|
||||||
# setting -seekable prevents ffmpeg from guessing if the server
|
# setting -seekable prevents ffmpeg from guessing if the server
|
||||||
# supports seeking(by adding the header `Range: bytes=0-`), which
|
# supports seeking(by adding the header `Range: bytes=0-`), which
|
||||||
# can cause problems in some cases
|
# can cause problems in some cases
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/11800#issuecomment-275037127
|
# https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
|
||||||
# http://trac.ffmpeg.org/ticket/6125#comment:10
|
# http://trac.ffmpeg.org/ticket/6125#comment:10
|
||||||
args += ['-seekable', '1' if seekable else '0']
|
args += ['-seekable', '1' if seekable else '0']
|
||||||
|
|
||||||
|
@ -317,7 +318,9 @@ class FFmpegFD(ExternalFD):
|
||||||
args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
|
args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
|
||||||
|
|
||||||
if protocol in ('m3u8', 'm3u8_native'):
|
if protocol in ('m3u8', 'm3u8_native'):
|
||||||
if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
|
if info_dict['ext'] == 'vtt':
|
||||||
|
args += ['-f', 'webvtt']
|
||||||
|
elif self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
|
||||||
args += ['-f', 'mpegts']
|
args += ['-f', 'mpegts']
|
||||||
else:
|
else:
|
||||||
args += ['-f', 'mp4']
|
args += ['-f', 'mp4']
|
||||||
|
@ -341,7 +344,7 @@ class FFmpegFD(ExternalFD):
|
||||||
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
|
# mp4 file couldn't be played, but if we ask ffmpeg to quit it
|
||||||
# produces a file that is playable (this is mostly useful for live
|
# produces a file that is playable (this is mostly useful for live
|
||||||
# streams). Note that Windows is not affected and produces playable
|
# streams). Note that Windows is not affected and produces playable
|
||||||
# files (see https://github.com/ytdl-org/haruhi-dl/issues/8300).
|
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
|
||||||
if sys.platform != 'win32':
|
if sys.platform != 'win32':
|
||||||
proc.communicate(b'q')
|
proc.communicate(b'q')
|
||||||
raise
|
raise
|
||||||
|
|
|
@ -324,8 +324,8 @@ class F4mFD(FragmentFD):
|
||||||
urlh = self.hdl.urlopen(self._prepare_url(info_dict, man_url))
|
urlh = self.hdl.urlopen(self._prepare_url(info_dict, man_url))
|
||||||
man_url = urlh.geturl()
|
man_url = urlh.geturl()
|
||||||
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
|
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
|
||||||
# (see https://github.com/ytdl-org/haruhi-dl/issues/6215#issuecomment-121704244
|
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244
|
||||||
# and https://github.com/ytdl-org/haruhi-dl/issues/7823)
|
# and https://github.com/ytdl-org/youtube-dl/issues/7823)
|
||||||
manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip()
|
manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip()
|
||||||
|
|
||||||
doc = compat_etree_fromstring(manifest)
|
doc = compat_etree_fromstring(manifest)
|
||||||
|
@ -409,7 +409,7 @@ class F4mFD(FragmentFD):
|
||||||
# In tests, segments may be truncated, and thus
|
# In tests, segments may be truncated, and thus
|
||||||
# FlvReader may not be able to parse the whole
|
# FlvReader may not be able to parse the whole
|
||||||
# chunk. If so, write the segment as is
|
# chunk. If so, write the segment as is
|
||||||
# See https://github.com/ytdl-org/haruhi-dl/issues/9214
|
# See https://github.com/ytdl-org/youtube-dl/issues/9214
|
||||||
dest_stream.write(down_data)
|
dest_stream.write(down_data)
|
||||||
break
|
break
|
||||||
raise
|
raise
|
||||||
|
|
|
@ -154,8 +154,8 @@ class HlsFD(FragmentFD):
|
||||||
except compat_urllib_error.HTTPError as err:
|
except compat_urllib_error.HTTPError as err:
|
||||||
# Unavailable (possibly temporary) fragments may be served.
|
# Unavailable (possibly temporary) fragments may be served.
|
||||||
# First we try to retry then either skip or abort.
|
# First we try to retry then either skip or abort.
|
||||||
# See https://github.com/ytdl-org/haruhi-dl/issues/10165,
|
# See https://github.com/ytdl-org/youtube-dl/issues/10165,
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/10448).
|
# https://github.com/ytdl-org/youtube-dl/issues/10448).
|
||||||
count += 1
|
count += 1
|
||||||
if count <= fragment_retries:
|
if count <= fragment_retries:
|
||||||
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
self.report_retry_fragment(err, frag_index, count, fragment_retries)
|
||||||
|
@ -173,7 +173,7 @@ class HlsFD(FragmentFD):
|
||||||
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.hdl.urlopen(
|
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.hdl.urlopen(
|
||||||
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
|
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
|
||||||
# Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
|
# Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
|
||||||
# size (see https://github.com/hdl-org/haruhi-dl/pull/27660). Tests only care that the correct data downloaded,
|
# size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
|
||||||
# not what it decrypts to.
|
# not what it decrypts to.
|
||||||
if not test:
|
if not test:
|
||||||
frag_content = AES.new(
|
frag_content = AES.new(
|
||||||
|
|
|
@ -118,7 +118,7 @@ class HttpFD(FileDownloader):
|
||||||
# to match the value of requested Range HTTP header. This is due to a webservers
|
# to match the value of requested Range HTTP header. This is due to a webservers
|
||||||
# that don't support resuming and serve a whole file with no Content-Range
|
# that don't support resuming and serve a whole file with no Content-Range
|
||||||
# set in response despite of requested Range (see
|
# set in response despite of requested Range (see
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/6057#issuecomment-126129799)
|
# https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
|
||||||
if has_range:
|
if has_range:
|
||||||
content_range = ctx.data.headers.get('Content-Range')
|
content_range = ctx.data.headers.get('Content-Range')
|
||||||
if content_range:
|
if content_range:
|
||||||
|
|
55
haruhi_dl/downloader/niconico.py
Normal file
55
haruhi_dl/downloader/niconico.py
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import threading
|
||||||
|
|
||||||
|
from .common import FileDownloader
|
||||||
|
from ..downloader import _get_real_downloader
|
||||||
|
from ..extractor.niconico import NiconicoIE
|
||||||
|
from ..compat import compat_urllib_request
|
||||||
|
|
||||||
|
|
||||||
|
class NiconicoDmcFD(FileDownloader):
|
||||||
|
""" Downloading niconico douga from DMC with heartbeat """
|
||||||
|
|
||||||
|
FD_NAME = 'niconico_dmc'
|
||||||
|
|
||||||
|
def real_download(self, filename, info_dict):
|
||||||
|
self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
|
||||||
|
|
||||||
|
ie = NiconicoIE(self.hdl)
|
||||||
|
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
|
||||||
|
|
||||||
|
fd = _get_real_downloader(info_dict, params=self.params)(self.hdl, self.params)
|
||||||
|
|
||||||
|
success = download_complete = False
|
||||||
|
timer = [None]
|
||||||
|
heartbeat_lock = threading.Lock()
|
||||||
|
heartbeat_url = heartbeat_info_dict['url']
|
||||||
|
heartbeat_data = heartbeat_info_dict['data'].encode()
|
||||||
|
heartbeat_interval = heartbeat_info_dict.get('interval', 30)
|
||||||
|
|
||||||
|
def heartbeat():
|
||||||
|
try:
|
||||||
|
compat_urllib_request.urlopen(url=heartbeat_url, data=heartbeat_data)
|
||||||
|
except Exception:
|
||||||
|
self.to_screen('[%s] Heartbeat failed' % self.FD_NAME)
|
||||||
|
|
||||||
|
with heartbeat_lock:
|
||||||
|
if not download_complete:
|
||||||
|
timer[0] = threading.Timer(heartbeat_interval, heartbeat)
|
||||||
|
timer[0].start()
|
||||||
|
|
||||||
|
heartbeat_info_dict['ping']()
|
||||||
|
self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval))
|
||||||
|
try:
|
||||||
|
heartbeat()
|
||||||
|
if type(fd).__name__ == 'HlsFD':
|
||||||
|
info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0])
|
||||||
|
success = fd.real_download(filename, info_dict)
|
||||||
|
finally:
|
||||||
|
if heartbeat_lock:
|
||||||
|
with heartbeat_lock:
|
||||||
|
timer[0].cancel()
|
||||||
|
download_complete = True
|
||||||
|
return success
|
|
@ -7,8 +7,12 @@ from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
clean_podcast_url,
|
clean_podcast_url,
|
||||||
|
float_or_none,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
urljoin,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -124,3 +128,76 @@ class ACastChannelIE(ACastBaseIE):
|
||||||
entries.append(self._extract_episode(episode, show_info))
|
entries.append(self._extract_episode(episode, show_info))
|
||||||
return self.playlist_result(
|
return self.playlist_result(
|
||||||
entries, show.get('id'), show.get('title'), show.get('description'))
|
entries, show.get('id'), show.get('title'), show.get('description'))
|
||||||
|
|
||||||
|
|
||||||
|
class ACastPlayerIE(InfoExtractor):
|
||||||
|
IE_NAME = 'acast:player'
|
||||||
|
_VALID_URL = r'https?://player\.acast\.com/(?:[^/]+/episodes/)?(?P<id>[^/?#]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://player.acast.com/600595844cac453f8579eca0/episodes/maciej-konieczny-podatek-medialny-to-mechanizm-kontroli?theme=default&latest=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '601dc897fb37095537d48e6f',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Maciej Konieczny: "Podatek medialny to bardziej mechanizm kontroli niż podatkowy”',
|
||||||
|
'upload_date': '20210208',
|
||||||
|
'timestamp': 1612764000,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://player.acast.com/5d09057251a90dcf7fa8e985?theme=default&latest=1',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5d09057251a90dcf7fa8e985',
|
||||||
|
'title': 'DGPtalk: Obiektywnie o biznesie',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage, **kw):
|
||||||
|
return [mobj.group('url')
|
||||||
|
for mobj in re.finditer(
|
||||||
|
r'(?x)<iframe\b[^>]+\bsrc=(["\'])(?P<url>%s(?:\?[^#]+)?(?:\#.+?)?)\1' % ACastPlayerIE._VALID_URL,
|
||||||
|
webpage)]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
|
||||||
|
data = self._parse_json(
|
||||||
|
js_to_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)var _global\s*=\s*({.+?});',
|
||||||
|
webpage, 'podcast data')), display_id)
|
||||||
|
|
||||||
|
show = data['show']
|
||||||
|
|
||||||
|
players = [{
|
||||||
|
'id': player['_id'],
|
||||||
|
'title': player['title'],
|
||||||
|
'url': player['audio'],
|
||||||
|
'duration': float_or_none(player.get('duration')),
|
||||||
|
'timestamp': parse_iso8601(player.get('publishDate')),
|
||||||
|
'thumbnail': urljoin('https://player.acast.com/', player.get('cover')),
|
||||||
|
'series': show['title'],
|
||||||
|
'episode': player['title'],
|
||||||
|
} for player in data['player']]
|
||||||
|
|
||||||
|
if len(players) > 1:
|
||||||
|
info_dict = {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': players,
|
||||||
|
'id': show['_id'],
|
||||||
|
'title': show['title'],
|
||||||
|
'series': show['title'],
|
||||||
|
}
|
||||||
|
if show.get('cover'):
|
||||||
|
info_dict['thumbnails'] = [{
|
||||||
|
'url': urljoin('https://player.acast.com/', show['cover']['url']),
|
||||||
|
'filesize': int_or_none(show['cover'].get('size')),
|
||||||
|
}]
|
||||||
|
return info_dict
|
||||||
|
|
||||||
|
if len(players) == 1:
|
||||||
|
return players[0]
|
||||||
|
|
||||||
|
raise ExtractorError('No podcast episodes found')
|
||||||
|
|
|
@ -9,10 +9,10 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class AppleConnectIE(InfoExtractor):
|
class AppleConnectIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
|
_VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||||
'md5': 'e7c38568a01ea45402570e6029206723',
|
'md5': 'c1d41f72c8bcaf222e089434619316e4',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
|
||||||
'ext': 'm4v',
|
'ext': 'm4v',
|
||||||
|
@ -22,7 +22,10 @@ class AppleConnectIE(InfoExtractor):
|
||||||
'upload_date': '20150710',
|
'upload_date': '20150710',
|
||||||
'timestamp': 1436545535,
|
'timestamp': 1436545535,
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
@ -36,7 +39,7 @@ class AppleConnectIE(InfoExtractor):
|
||||||
|
|
||||||
video_data = self._parse_json(video_json, video_id)
|
video_data = self._parse_json(video_json, video_id)
|
||||||
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
|
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
|
||||||
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
|
like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
|
|
@ -42,6 +42,7 @@ class ApplePodcastsIE(InfoExtractor):
|
||||||
ember_data = self._parse_json(self._search_regex(
|
ember_data = self._parse_json(self._search_regex(
|
||||||
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
|
||||||
webpage, 'ember data'), episode_id)
|
webpage, 'ember data'), episode_id)
|
||||||
|
ember_data = ember_data.get(episode_id) or ember_data
|
||||||
episode = ember_data['data']['attributes']
|
episode = ember_data['data']['attributes']
|
||||||
description = episode.get('description') or {}
|
description = episode.get('description') or {}
|
||||||
|
|
||||||
|
|
|
@ -249,14 +249,14 @@ class ARDMediathekIE(ARDMediathekBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class ARDIE(InfoExtractor):
|
class ARDIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?:video-?)?(?P<id>[0-9]+))\.html'
|
_VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# available till 7.01.2022
|
# available till 7.01.2022
|
||||||
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
|
'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-die-woche-video100.html',
|
||||||
'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
|
'md5': '867d8aa39eeaf6d76407c5ad1bb0d4c1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'display_id': 'maischberger-die-woche',
|
'id': 'maischberger-die-woche-video100',
|
||||||
'id': '100',
|
'display_id': 'maischberger-die-woche-video100',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'duration': 3687.0,
|
'duration': 3687.0,
|
||||||
'title': 'maischberger. die woche vom 7. Januar 2021',
|
'title': 'maischberger. die woche vom 7. Januar 2021',
|
||||||
|
@ -264,16 +264,25 @@ class ARDIE(InfoExtractor):
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.daserste.de/information/reportage-dokumentation/erlebnis-erde/videosextern/woelfe-und-herdenschutzhunde-ungleiche-brueder-102.html',
|
'url': 'https://www.daserste.de/information/politik-weltgeschehen/morgenmagazin/videosextern/dominik-kahun-aus-der-nhl-direkt-zur-weltmeisterschaft-100.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.daserste.de/information/nachrichten-wetter/tagesthemen/videosextern/tagesthemen-17736.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/Drehpause-100.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.daserste.de/unterhaltung/film/filmmittwoch-im-ersten/videos/making-ofwendezeit-video-100.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
display_id = mobj.group('display_id')
|
display_id = mobj.group('id')
|
||||||
|
|
||||||
player_url = mobj.group('mainurl') + '~playerXml.xml'
|
player_url = mobj.group('mainurl') + '~playerXml.xml'
|
||||||
doc = self._download_xml(player_url, display_id)
|
doc = self._download_xml(player_url, display_id)
|
||||||
|
@ -324,7 +333,7 @@ class ARDIE(InfoExtractor):
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': mobj.group('id'),
|
'id': xpath_text(video_node, './videoId', default=display_id),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'title': video_node.find('./title').text,
|
'title': video_node.find('./title').text,
|
||||||
|
@ -335,7 +344,7 @@ class ARDIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
|
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?:[^/]+/)?(?:player|live|video)/(?:[^/]+/)*(?P<id>Y3JpZDovL[a-zA-Z0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
|
||||||
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
|
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
|
||||||
|
@ -365,22 +374,22 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
|
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('video_id')
|
|
||||||
display_id = mobj.group('display_id')
|
|
||||||
if display_id:
|
|
||||||
display_id = display_id.rstrip('/')
|
|
||||||
if not display_id:
|
|
||||||
display_id = video_id
|
|
||||||
|
|
||||||
player_page = self._download_json(
|
player_page = self._download_json(
|
||||||
'https://api.ardmediathek.de/public-gateway',
|
'https://api.ardmediathek.de/public-gateway',
|
||||||
display_id, data=json.dumps({
|
video_id, data=json.dumps({
|
||||||
'query': '''{
|
'query': '''{
|
||||||
playerPage(client:"%s", clipId: "%s") {
|
playerPage(client: "ard", clipId: "%s") {
|
||||||
blockedByFsk
|
blockedByFsk
|
||||||
broadcastedOn
|
broadcastedOn
|
||||||
maturityContentRating
|
maturityContentRating
|
||||||
|
@ -410,7 +419,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}''' % (mobj.group('client'), video_id),
|
}''' % video_id,
|
||||||
}).encode(), headers={
|
}).encode(), headers={
|
||||||
'Content-Type': 'application/json'
|
'Content-Type': 'application/json'
|
||||||
})['data']['playerPage']
|
})['data']['playerPage']
|
||||||
|
@ -435,7 +444,6 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
|
||||||
r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
|
r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
|
||||||
info.update({
|
info.update({
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
'display_id': display_id,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': description,
|
'description': description,
|
||||||
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
|
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
|
||||||
|
|
|
@ -103,7 +103,7 @@ class ArkenaIE(InfoExtractor):
|
||||||
f_url, video_id, mpd_id=kind, fatal=False))
|
f_url, video_id, mpd_id=kind, fatal=False))
|
||||||
elif kind == 'silverlight':
|
elif kind == 'silverlight':
|
||||||
# TODO: process when ism is supported (see
|
# TODO: process when ism is supported (see
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/8118)
|
# https://github.com/ytdl-org/youtube-dl/issues/8118)
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
tbr = float_or_none(f.get('Bitrate'), 1000)
|
tbr = float_or_none(f.get('Bitrate'), 1000)
|
||||||
|
|
101
haruhi_dl/extractor/arnes.py
Normal file
101
haruhi_dl/extractor/arnes.py
Normal file
|
@ -0,0 +1,101 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
)
|
||||||
|
from ..utils import (
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
|
remove_start,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ArnesIE(InfoExtractor):
|
||||||
|
IE_NAME = 'video.arnes.si'
|
||||||
|
IE_DESC = 'Arnes Video'
|
||||||
|
_VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch|embed|api/(?:asset|public/video))/(?P<id>[0-9a-zA-Z]{12})'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
|
||||||
|
'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'a1qrWTOQfVoU',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Linearna neodvisnost, definicija',
|
||||||
|
'description': 'Linearna neodvisnost, definicija',
|
||||||
|
'license': 'PRIVATE',
|
||||||
|
'creator': 'Polona Oblak',
|
||||||
|
'timestamp': 1585063725,
|
||||||
|
'upload_date': '20200324',
|
||||||
|
'channel': 'Polona Oblak',
|
||||||
|
'channel_id': 'q6pc04hw24cj',
|
||||||
|
'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
|
||||||
|
'duration': 596.75,
|
||||||
|
'view_count': int,
|
||||||
|
'tags': ['linearna_algebra'],
|
||||||
|
'start_time': 10,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
_BASE_URL = 'https://video.arnes.si'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
video = self._download_json(
|
||||||
|
self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
|
||||||
|
title = video['title']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for media in (video.get('media') or []):
|
||||||
|
media_url = media.get('url')
|
||||||
|
if not media_url:
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': self._BASE_URL + media_url,
|
||||||
|
'format_id': remove_start(media.get('format'), 'FORMAT_'),
|
||||||
|
'format_note': media.get('formatTranslation'),
|
||||||
|
'width': int_or_none(media.get('width')),
|
||||||
|
'height': int_or_none(media.get('height')),
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
channel = video.get('channel') or {}
|
||||||
|
channel_id = channel.get('url')
|
||||||
|
thumbnail = video.get('thumbnailUrl')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': self._BASE_URL + thumbnail,
|
||||||
|
'description': video.get('description'),
|
||||||
|
'license': video.get('license'),
|
||||||
|
'creator': video.get('author'),
|
||||||
|
'timestamp': parse_iso8601(video.get('creationTime')),
|
||||||
|
'channel': channel.get('name'),
|
||||||
|
'channel_id': channel_id,
|
||||||
|
'channel_url': self._BASE_URL + '/?channel=' + channel_id if channel_id else None,
|
||||||
|
'duration': float_or_none(video.get('duration'), 1000),
|
||||||
|
'view_count': int_or_none(video.get('views')),
|
||||||
|
'tags': video.get('hashtags'),
|
||||||
|
'start_time': int_or_none(compat_parse_qs(
|
||||||
|
compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
|
||||||
|
}
|
37
haruhi_dl/extractor/bandaichannel.py
Normal file
37
haruhi_dl/extractor/bandaichannel.py
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .brightcove import BrightcoveNewIE
|
||||||
|
from ..utils import extract_attributes
|
||||||
|
|
||||||
|
|
||||||
|
class BandaiChannelIE(BrightcoveNewIE):
|
||||||
|
IE_NAME = 'bandaichannel'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?b-ch\.com/titles/(?P<id>\d+/\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.b-ch.com/titles/514/001',
|
||||||
|
'md5': 'a0f2d787baa5729bed71108257f613a4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6128044564001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'メタルファイターMIKU 第1話',
|
||||||
|
'timestamp': 1580354056,
|
||||||
|
'uploader_id': '5797077852001',
|
||||||
|
'upload_date': '20200130',
|
||||||
|
'duration': 1387.733,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
attrs = extract_attributes(self._search_regex(
|
||||||
|
r'(<video-js[^>]+\bid="bcplayer"[^>]*>)', webpage, 'player'))
|
||||||
|
bc = self._download_json(
|
||||||
|
'https://pbifcd.b-ch.com/v1/playbackinfo/ST/70/' + attrs['data-info'],
|
||||||
|
video_id, headers={'X-API-KEY': attrs['data-auth'].strip()})['bc']
|
||||||
|
return self._parse_brightcove_metadata(bc, bc['id'])
|
|
@ -49,6 +49,7 @@ class BandcampIE(InfoExtractor):
|
||||||
'uploader': 'Ben Prunty',
|
'uploader': 'Ben Prunty',
|
||||||
'timestamp': 1396508491,
|
'timestamp': 1396508491,
|
||||||
'upload_date': '20140403',
|
'upload_date': '20140403',
|
||||||
|
'release_timestamp': 1396483200,
|
||||||
'release_date': '20140403',
|
'release_date': '20140403',
|
||||||
'duration': 260.877,
|
'duration': 260.877,
|
||||||
'track': 'Lanius (Battle)',
|
'track': 'Lanius (Battle)',
|
||||||
|
@ -69,6 +70,7 @@ class BandcampIE(InfoExtractor):
|
||||||
'uploader': 'Mastodon',
|
'uploader': 'Mastodon',
|
||||||
'timestamp': 1322005399,
|
'timestamp': 1322005399,
|
||||||
'upload_date': '20111122',
|
'upload_date': '20111122',
|
||||||
|
'release_timestamp': 1076112000,
|
||||||
'release_date': '20040207',
|
'release_date': '20040207',
|
||||||
'duration': 120.79,
|
'duration': 120.79,
|
||||||
'track': 'Hail to Fire',
|
'track': 'Hail to Fire',
|
||||||
|
@ -197,7 +199,7 @@ class BandcampIE(InfoExtractor):
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'uploader': artist,
|
'uploader': artist,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'release_date': unified_strdate(tralbum.get('album_release_date')),
|
'release_timestamp': unified_timestamp(tralbum.get('album_release_date')),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
'track': track,
|
'track': track,
|
||||||
'track_number': track_number,
|
'track_number': track_number,
|
||||||
|
|
|
@ -1,31 +1,39 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import functools
|
||||||
import itertools
|
import itertools
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..compat import (
|
||||||
|
compat_etree_Element,
|
||||||
|
compat_HTTPError,
|
||||||
|
compat_parse_qs,
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
compat_urlparse,
|
||||||
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
OnDemandPagedList,
|
||||||
clean_html,
|
clean_html,
|
||||||
dict_get,
|
dict_get,
|
||||||
ExtractorError,
|
|
||||||
float_or_none,
|
float_or_none,
|
||||||
get_element_by_class,
|
get_element_by_class,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
strip_or_none,
|
||||||
try_get,
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
urljoin,
|
urljoin,
|
||||||
)
|
)
|
||||||
from ..compat import (
|
|
||||||
compat_etree_Element,
|
|
||||||
compat_HTTPError,
|
|
||||||
compat_urlparse,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BBCCoUkIE(InfoExtractor):
|
class BBCCoUkIE(InfoExtractor):
|
||||||
|
@ -204,7 +212,7 @@ class BBCCoUkIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
'skip': 'Now it\'s really geo-restricted',
|
'skip': 'Now it\'s really geo-restricted',
|
||||||
}, {
|
}, {
|
||||||
# compact player (https://github.com/ytdl-org/haruhi-dl/issues/8147)
|
# compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
|
||||||
'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
|
'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'p028bfkj',
|
'id': 'p028bfkj',
|
||||||
|
@ -756,8 +764,17 @@ class BBCIE(BBCCoUkIE):
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# custom redirection to www.bbc.com
|
# custom redirection to www.bbc.com
|
||||||
|
# also, video with window.__INITIAL_DATA__
|
||||||
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
|
'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
|
||||||
'only_matching': True,
|
'info_dict': {
|
||||||
|
'id': 'p02xzws1',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Pluto may have 'nitrogen glaciers'",
|
||||||
|
'description': 'md5:6a95b593f528d7a5f2605221bc56912f',
|
||||||
|
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||||
|
'timestamp': 1437785037,
|
||||||
|
'upload_date': '20150725',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
# single video article embedded with data-media-vpid
|
# single video article embedded with data-media-vpid
|
||||||
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
|
||||||
|
@ -793,11 +810,25 @@ class BBCIE(BBCCoUkIE):
|
||||||
'description': 'Learn English words and phrases from this story',
|
'description': 'Learn English words and phrases from this story',
|
||||||
},
|
},
|
||||||
'add_ie': [BBCCoUkIE.ie_key()],
|
'add_ie': [BBCCoUkIE.ie_key()],
|
||||||
|
}, {
|
||||||
|
# BBC Reel
|
||||||
|
'url': 'https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p07c6sb9',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'How positive thinking is harming your happiness',
|
||||||
|
'alt_title': 'The downsides of positive thinking',
|
||||||
|
'description': 'md5:fad74b31da60d83b8265954ee42d85b4',
|
||||||
|
'duration': 235,
|
||||||
|
'thumbnail': r're:https?://.+/p07c9dsr.jpg',
|
||||||
|
'upload_date': '20190604',
|
||||||
|
'categories': ['Psychology'],
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def suitable(cls, url):
|
def suitable(cls, url):
|
||||||
EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE)
|
EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE)
|
||||||
return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
|
return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
|
||||||
else super(BBCIE, cls).suitable(url))
|
else super(BBCIE, cls).suitable(url))
|
||||||
|
|
||||||
|
@ -929,7 +960,7 @@ class BBCIE(BBCCoUkIE):
|
||||||
else:
|
else:
|
||||||
entry['title'] = info['title']
|
entry['title'] = info['title']
|
||||||
entry['formats'].extend(info['formats'])
|
entry['formats'].extend(info['formats'])
|
||||||
except Exception as e:
|
except ExtractorError as e:
|
||||||
# Some playlist URL may fail with 500, at the same time
|
# Some playlist URL may fail with 500, at the same time
|
||||||
# the other one may work fine (e.g.
|
# the other one may work fine (e.g.
|
||||||
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
# http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
|
||||||
|
@ -980,6 +1011,37 @@ class BBCIE(BBCCoUkIE):
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# bbc reel (e.g. https://www.bbc.com/reel/video/p07c6sb6/how-positive-thinking-is-harming-your-happiness)
|
||||||
|
initial_data = self._parse_json(self._html_search_regex(
|
||||||
|
r'<script[^>]+id=(["\'])initial-data\1[^>]+data-json=(["\'])(?P<json>(?:(?!\2).)+)',
|
||||||
|
webpage, 'initial data', default='{}', group='json'), playlist_id, fatal=False)
|
||||||
|
if initial_data:
|
||||||
|
init_data = try_get(
|
||||||
|
initial_data, lambda x: x['initData']['items'][0], dict) or {}
|
||||||
|
smp_data = init_data.get('smpData') or {}
|
||||||
|
clip_data = try_get(smp_data, lambda x: x['items'][0], dict) or {}
|
||||||
|
version_id = clip_data.get('versionID')
|
||||||
|
if version_id:
|
||||||
|
title = smp_data['title']
|
||||||
|
formats, subtitles = self._download_media_selector(version_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
image_url = smp_data.get('holdingImageURL')
|
||||||
|
display_date = init_data.get('displayDate')
|
||||||
|
topic_title = init_data.get('topicTitle')
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': version_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'alt_title': init_data.get('shortTitle'),
|
||||||
|
'thumbnail': image_url.replace('$recipe', 'raw') if image_url else None,
|
||||||
|
'description': smp_data.get('summary') or init_data.get('shortSummary'),
|
||||||
|
'upload_date': display_date.replace('-', '') if display_date else None,
|
||||||
|
'subtitles': subtitles,
|
||||||
|
'duration': int_or_none(clip_data.get('duration')),
|
||||||
|
'categories': [topic_title] if topic_title else None,
|
||||||
|
}
|
||||||
|
|
||||||
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
# Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
|
||||||
# There are several setPayload calls may be present but the video
|
# There are several setPayload calls may be present but the video
|
||||||
# seems to be always related to the first one
|
# seems to be always related to the first one
|
||||||
|
@ -1041,7 +1103,7 @@ class BBCIE(BBCCoUkIE):
|
||||||
thumbnail = None
|
thumbnail = None
|
||||||
image_url = current_programme.get('image_url')
|
image_url = current_programme.get('image_url')
|
||||||
if image_url:
|
if image_url:
|
||||||
thumbnail = image_url.replace('{recipe}', '1920x1920')
|
thumbnail = image_url.replace('{recipe}', 'raw')
|
||||||
return {
|
return {
|
||||||
'id': programme_id,
|
'id': programme_id,
|
||||||
'title': title,
|
'title': title,
|
||||||
|
@ -1114,12 +1176,29 @@ class BBCIE(BBCCoUkIE):
|
||||||
continue
|
continue
|
||||||
formats, subtitles = self._download_media_selector(item_id)
|
formats, subtitles = self._download_media_selector(item_id)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
item_desc = None
|
||||||
|
blocks = try_get(media, lambda x: x['summary']['blocks'], list)
|
||||||
|
if blocks:
|
||||||
|
summary = []
|
||||||
|
for block in blocks:
|
||||||
|
text = try_get(block, lambda x: x['model']['text'], compat_str)
|
||||||
|
if text:
|
||||||
|
summary.append(text)
|
||||||
|
if summary:
|
||||||
|
item_desc = '\n\n'.join(summary)
|
||||||
|
item_time = None
|
||||||
|
for meta in try_get(media, lambda x: x['metadata']['items'], list) or []:
|
||||||
|
if try_get(meta, lambda x: x['label']) == 'Published':
|
||||||
|
item_time = unified_timestamp(meta.get('timestamp'))
|
||||||
|
break
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': item_id,
|
'id': item_id,
|
||||||
'title': item_title,
|
'title': item_title,
|
||||||
'thumbnail': item.get('holdingImageUrl'),
|
'thumbnail': item.get('holdingImageUrl'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
|
'timestamp': item_time,
|
||||||
|
'description': strip_or_none(item_desc),
|
||||||
})
|
})
|
||||||
for resp in (initial_data.get('data') or {}).values():
|
for resp in (initial_data.get('data') or {}).values():
|
||||||
name = resp.get('name')
|
name = resp.get('name')
|
||||||
|
@ -1293,21 +1372,149 @@ class BBCCoUkPlaylistBaseIE(InfoExtractor):
|
||||||
playlist_id, title, description)
|
playlist_id, title, description)
|
||||||
|
|
||||||
|
|
||||||
class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
class BBCCoUkIPlayerPlaylistBaseIE(InfoExtractor):
|
||||||
IE_NAME = 'bbc.co.uk:iplayer:playlist'
|
_VALID_URL_TMPL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/%%s/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
|
||||||
_VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
|
|
||||||
_URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
|
@staticmethod
|
||||||
_VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
|
def _get_default(episode, key, default_key='default'):
|
||||||
|
return try_get(episode, lambda x: x[key][default_key])
|
||||||
|
|
||||||
|
def _get_description(self, data):
|
||||||
|
synopsis = data.get(self._DESCRIPTION_KEY) or {}
|
||||||
|
return dict_get(synopsis, ('large', 'medium', 'small'))
|
||||||
|
|
||||||
|
def _fetch_page(self, programme_id, per_page, series_id, page):
|
||||||
|
elements = self._get_elements(self._call_api(
|
||||||
|
programme_id, per_page, page + 1, series_id))
|
||||||
|
for element in elements:
|
||||||
|
episode = self._get_episode(element)
|
||||||
|
episode_id = episode.get('id')
|
||||||
|
if not episode_id:
|
||||||
|
continue
|
||||||
|
thumbnail = None
|
||||||
|
image = self._get_episode_image(episode)
|
||||||
|
if image:
|
||||||
|
thumbnail = image.replace('{recipe}', 'raw')
|
||||||
|
category = self._get_default(episode, 'labels', 'category')
|
||||||
|
yield {
|
||||||
|
'_type': 'url',
|
||||||
|
'id': episode_id,
|
||||||
|
'title': self._get_episode_field(episode, 'subtitle'),
|
||||||
|
'url': 'https://www.bbc.co.uk/iplayer/episode/' + episode_id,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'description': self._get_description(episode),
|
||||||
|
'categories': [category] if category else None,
|
||||||
|
'series': self._get_episode_field(episode, 'title'),
|
||||||
|
'ie_key': BBCCoUkIE.ie_key(),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
pid = self._match_id(url)
|
||||||
|
qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
||||||
|
series_id = qs.get('seriesId', [None])[0]
|
||||||
|
page = qs.get('page', [None])[0]
|
||||||
|
per_page = 36 if page else self._PAGE_SIZE
|
||||||
|
fetch_page = functools.partial(self._fetch_page, pid, per_page, series_id)
|
||||||
|
entries = fetch_page(int(page) - 1) if page else OnDemandPagedList(fetch_page, self._PAGE_SIZE)
|
||||||
|
playlist_data = self._get_playlist_data(self._call_api(pid, 1))
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, pid, self._get_playlist_title(playlist_data),
|
||||||
|
self._get_description(playlist_data))
|
||||||
|
|
||||||
|
|
||||||
|
class BBCCoUkIPlayerEpisodesIE(BBCCoUkIPlayerPlaylistBaseIE):
|
||||||
|
IE_NAME = 'bbc.co.uk:iplayer:episodes'
|
||||||
|
_VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'episodes'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
|
'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'b05rcz9v',
|
'id': 'b05rcz9v',
|
||||||
'title': 'The Disappearance',
|
'title': 'The Disappearance',
|
||||||
'description': 'French thriller serial about a missing teenager.',
|
'description': 'md5:58eb101aee3116bad4da05f91179c0cb',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 6,
|
'playlist_mincount': 8,
|
||||||
'skip': 'This programme is not currently available on BBC iPlayer',
|
|
||||||
}, {
|
}, {
|
||||||
|
# all seasons
|
||||||
|
'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b094m5t9',
|
||||||
|
'title': 'Doctor Foster',
|
||||||
|
'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 10,
|
||||||
|
}, {
|
||||||
|
# explicit season
|
||||||
|
'url': 'https://www.bbc.co.uk/iplayer/episodes/b094m5t9/doctor-foster?seriesId=b094m6nv',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b094m5t9',
|
||||||
|
'title': 'Doctor Foster',
|
||||||
|
'description': 'md5:5aa9195fad900e8e14b52acd765a9fd6',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 5,
|
||||||
|
}, {
|
||||||
|
# all pages
|
||||||
|
'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'm0004c4v',
|
||||||
|
'title': 'Beechgrove',
|
||||||
|
'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 37,
|
||||||
|
}, {
|
||||||
|
# explicit page
|
||||||
|
'url': 'https://www.bbc.co.uk/iplayer/episodes/m0004c4v/beechgrove?page=2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'm0004c4v',
|
||||||
|
'title': 'Beechgrove',
|
||||||
|
'description': 'Gardening show that celebrates Scottish horticulture and growing conditions.',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1,
|
||||||
|
}]
|
||||||
|
_PAGE_SIZE = 100
|
||||||
|
_DESCRIPTION_KEY = 'synopsis'
|
||||||
|
|
||||||
|
def _get_episode_image(self, episode):
|
||||||
|
return self._get_default(episode, 'image')
|
||||||
|
|
||||||
|
def _get_episode_field(self, episode, field):
|
||||||
|
return self._get_default(episode, field)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_elements(data):
|
||||||
|
return data['entities']['results']
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_episode(element):
|
||||||
|
return element.get('episode') or {}
|
||||||
|
|
||||||
|
def _call_api(self, pid, per_page, page=1, series_id=None):
|
||||||
|
variables = {
|
||||||
|
'id': pid,
|
||||||
|
'page': page,
|
||||||
|
'perPage': per_page,
|
||||||
|
}
|
||||||
|
if series_id:
|
||||||
|
variables['sliceId'] = series_id
|
||||||
|
return self._download_json(
|
||||||
|
'https://graph.ibl.api.bbc.co.uk/', pid, headers={
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}, data=json.dumps({
|
||||||
|
'id': '5692d93d5aac8d796a0305e895e61551',
|
||||||
|
'variables': variables,
|
||||||
|
}).encode('utf-8'))['data']['programme']
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_playlist_data(data):
|
||||||
|
return data
|
||||||
|
|
||||||
|
def _get_playlist_title(self, data):
|
||||||
|
return self._get_default(data, 'title')
|
||||||
|
|
||||||
|
|
||||||
|
class BBCCoUkIPlayerGroupIE(BBCCoUkIPlayerPlaylistBaseIE):
|
||||||
|
IE_NAME = 'bbc.co.uk:iplayer:group'
|
||||||
|
_VALID_URL = BBCCoUkIPlayerPlaylistBaseIE._VALID_URL_TMPL % 'group'
|
||||||
|
_TESTS = [{
|
||||||
# Available for over a year unlike 30 days for most other programmes
|
# Available for over a year unlike 30 days for most other programmes
|
||||||
'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
|
'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -1316,14 +1523,56 @@ class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||||
'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
|
'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 10,
|
'playlist_mincount': 10,
|
||||||
|
}, {
|
||||||
|
# all pages
|
||||||
|
'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p081d7j7',
|
||||||
|
'title': 'Music in Scotland',
|
||||||
|
'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 47,
|
||||||
|
}, {
|
||||||
|
# explicit page
|
||||||
|
'url': 'https://www.bbc.co.uk/iplayer/group/p081d7j7?page=2',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'p081d7j7',
|
||||||
|
'title': 'Music in Scotland',
|
||||||
|
'description': 'Perfomances in Scotland and programmes featuring Scottish acts.',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 11,
|
||||||
}]
|
}]
|
||||||
|
_PAGE_SIZE = 200
|
||||||
|
_DESCRIPTION_KEY = 'synopses'
|
||||||
|
|
||||||
def _extract_title_and_description(self, webpage):
|
def _get_episode_image(self, episode):
|
||||||
title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
|
return self._get_default(episode, 'images', 'standard')
|
||||||
description = self._search_regex(
|
|
||||||
r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>',
|
def _get_episode_field(self, episode, field):
|
||||||
webpage, 'description', fatal=False, group='value')
|
return episode.get(field)
|
||||||
return title, description
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_elements(data):
|
||||||
|
return data['elements']
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_episode(element):
|
||||||
|
return element
|
||||||
|
|
||||||
|
def _call_api(self, pid, per_page, page=1, series_id=None):
|
||||||
|
return self._download_json(
|
||||||
|
'http://ibl.api.bbc.co.uk/ibl/v1/groups/%s/episodes' % pid,
|
||||||
|
pid, query={
|
||||||
|
'page': page,
|
||||||
|
'per_page': per_page,
|
||||||
|
})['group_episodes']
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_playlist_data(data):
|
||||||
|
return data['group']
|
||||||
|
|
||||||
|
def _get_playlist_title(self, data):
|
||||||
|
return data.get('title')
|
||||||
|
|
||||||
|
|
||||||
class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
|
class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
|
||||||
|
|
|
@ -156,6 +156,7 @@ class BiliBiliIE(InfoExtractor):
|
||||||
cid = js['result']['cid']
|
cid = js['result']['cid']
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
|
'Accept': 'application/json',
|
||||||
'Referer': url
|
'Referer': url
|
||||||
}
|
}
|
||||||
headers.update(self.geo_verification_headers())
|
headers.update(self.geo_verification_headers())
|
||||||
|
@ -232,7 +233,7 @@ class BiliBiliIE(InfoExtractor):
|
||||||
webpage)
|
webpage)
|
||||||
if uploader_mobj:
|
if uploader_mobj:
|
||||||
info.update({
|
info.update({
|
||||||
'uploader': uploader_mobj.group('name'),
|
'uploader': uploader_mobj.group('name').strip(),
|
||||||
'uploader_id': uploader_mobj.group('id'),
|
'uploader_id': uploader_mobj.group('id'),
|
||||||
})
|
})
|
||||||
if not info.get('uploader'):
|
if not info.get('uploader'):
|
||||||
|
|
|
@ -1,86 +0,0 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
remove_start,
|
|
||||||
int_or_none,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class BlinkxIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)'
|
|
||||||
IE_NAME = 'blinkx'
|
|
||||||
|
|
||||||
_TEST = {
|
|
||||||
'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ',
|
|
||||||
'md5': '337cf7a344663ec79bf93a526a2e06c7',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'Da0Gw3xc',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News',
|
|
||||||
'uploader': 'IGN News',
|
|
||||||
'upload_date': '20150217',
|
|
||||||
'timestamp': 1424215740,
|
|
||||||
'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.',
|
|
||||||
'duration': 47.743333,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
display_id = video_id[:8]
|
|
||||||
|
|
||||||
api_url = ('https://apib4.blinkx.com/api.php?action=play_video&'
|
|
||||||
+ 'video=%s' % video_id)
|
|
||||||
data_json = self._download_webpage(api_url, display_id)
|
|
||||||
data = json.loads(data_json)['api']['results'][0]
|
|
||||||
duration = None
|
|
||||||
thumbnails = []
|
|
||||||
formats = []
|
|
||||||
for m in data['media']:
|
|
||||||
if m['type'] == 'jpg':
|
|
||||||
thumbnails.append({
|
|
||||||
'url': m['link'],
|
|
||||||
'width': int(m['w']),
|
|
||||||
'height': int(m['h']),
|
|
||||||
})
|
|
||||||
elif m['type'] == 'original':
|
|
||||||
duration = float(m['d'])
|
|
||||||
elif m['type'] == 'youtube':
|
|
||||||
yt_id = m['link']
|
|
||||||
self.to_screen('Youtube video detected: %s' % yt_id)
|
|
||||||
return self.url_result(yt_id, 'Youtube', video_id=yt_id)
|
|
||||||
elif m['type'] in ('flv', 'mp4'):
|
|
||||||
vcodec = remove_start(m['vcodec'], 'ff')
|
|
||||||
acodec = remove_start(m['acodec'], 'ff')
|
|
||||||
vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000)
|
|
||||||
abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000)
|
|
||||||
tbr = vbr + abr if vbr and abr else None
|
|
||||||
format_id = '%s-%sk-%s' % (vcodec, tbr, m['w'])
|
|
||||||
formats.append({
|
|
||||||
'format_id': format_id,
|
|
||||||
'url': m['link'],
|
|
||||||
'vcodec': vcodec,
|
|
||||||
'acodec': acodec,
|
|
||||||
'abr': abr,
|
|
||||||
'vbr': vbr,
|
|
||||||
'tbr': tbr,
|
|
||||||
'width': int_or_none(m.get('w')),
|
|
||||||
'height': int_or_none(m.get('h')),
|
|
||||||
})
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': display_id,
|
|
||||||
'fullid': video_id,
|
|
||||||
'title': data['title'],
|
|
||||||
'formats': formats,
|
|
||||||
'uploader': data['channel_name'],
|
|
||||||
'timestamp': data['pubdate_epoch'],
|
|
||||||
'description': data.get('description'),
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
'duration': duration,
|
|
||||||
}
|
|
|
@ -130,7 +130,7 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||||
'skip': 'Unsupported URL',
|
'skip': 'Unsupported URL',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# playlist with 'playlistTab' (https://github.com/ytdl-org/haruhi-dl/issues/9965)
|
# playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
|
||||||
'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg',
|
'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1522758701001',
|
'id': '1522758701001',
|
||||||
|
@ -154,10 +154,10 @@ class BrightcoveLegacyIE(InfoExtractor):
|
||||||
<object class="BrightcoveExperience">{params}</object>
|
<object class="BrightcoveExperience">{params}</object>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Fix up some stupid HTML, see https://github.com/ytdl-org/haruhi-dl/issues/1553
|
# Fix up some stupid HTML, see https://github.com/ytdl-org/youtube-dl/issues/1553
|
||||||
object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>',
|
object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>',
|
||||||
lambda m: m.group(1) + '/>', object_str)
|
lambda m: m.group(1) + '/>', object_str)
|
||||||
# Fix up some stupid XML, see https://github.com/ytdl-org/haruhi-dl/issues/1608
|
# Fix up some stupid XML, see https://github.com/ytdl-org/youtube-dl/issues/1608
|
||||||
object_str = object_str.replace('<--', '<!--')
|
object_str = object_str.replace('<--', '<!--')
|
||||||
# remove namespace to simplify extraction
|
# remove namespace to simplify extraction
|
||||||
object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
|
object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
|
||||||
|
|
91
haruhi_dl/extractor/castos.py
Normal file
91
haruhi_dl/extractor/castos.py
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
parse_duration,
|
||||||
|
)
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class CastosHostedIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://[^/.]+\.castos\.com/(?:player|episodes)/(?P<id>[\da-zA-Z-]+)'
|
||||||
|
IE_NAME = 'castos:hosted'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://audience.castos.com/player/408278',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '408278',
|
||||||
|
'ext': 'mp3',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://audience.castos.com/episodes/improve-your-podcast-production',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_urls(webpage, **kw):
|
||||||
|
return [mobj.group(1) for mobj
|
||||||
|
in re.finditer(
|
||||||
|
r'<iframe\b[^>]+(?<!-)src="(https?://[^/.]+\.castos\.com/player/\d+)',
|
||||||
|
webpage)]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
series = self._html_search_regex(
|
||||||
|
r'<div class="show">\s+<strong>([^<]+)</strong>', webpage, 'series name')
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'<div class="episode-title">([^<]+)</div>', webpage, 'episode title')
|
||||||
|
|
||||||
|
audio_url = self._html_search_regex(
|
||||||
|
r'<audio class="clip">\s+<source\b[^>]+src="(https?://[^"]+)"', webpage, 'audio url')
|
||||||
|
duration = parse_duration(self._search_regex(
|
||||||
|
r'<time id="duration">(\d\d(?::\d\d)+)</time>', webpage, 'duration'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': audio_url,
|
||||||
|
'duration': duration,
|
||||||
|
'series': series,
|
||||||
|
'episode': title,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CastosSSPIE(InfoExtractor):
|
||||||
|
@classmethod
|
||||||
|
def _extract_entries(self, webpage, **kw):
|
||||||
|
entries = []
|
||||||
|
for found in re.finditer(
|
||||||
|
r'(?s)<div class="castos-player[^"]*"[^>]*data-episode="(\d+)-[a-z\d]+">(.+?</nav>)\s*</div>',
|
||||||
|
webpage):
|
||||||
|
video_id, entry = found.group(1, 2)
|
||||||
|
|
||||||
|
def search_entry(regex):
|
||||||
|
res = re.search(regex, entry)
|
||||||
|
if res:
|
||||||
|
return res.group(1)
|
||||||
|
|
||||||
|
series = search_entry(r'<div class="show">\s+<strong>([^<]+)</strong>')
|
||||||
|
title = search_entry(r'<div class="episode-title">([^<]+)</div>')
|
||||||
|
|
||||||
|
audio_url = search_entry(
|
||||||
|
r'<audio class="clip[^"]*">\s+<source\b[^>]+src="(https?://[^"]+)"')
|
||||||
|
duration = parse_duration(
|
||||||
|
search_entry(r'<time id="duration[^"]*">(\d\d(?::\d\d)+)</time>'))
|
||||||
|
|
||||||
|
if not title or not audio_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
entries.append({
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': audio_url,
|
||||||
|
'duration': duration,
|
||||||
|
'series': series,
|
||||||
|
'episode': title,
|
||||||
|
})
|
||||||
|
return entries
|
|
@ -27,7 +27,7 @@ class CBSBaseIE(ThePlatformFeedIE):
|
||||||
|
|
||||||
|
|
||||||
class CBSIE(CBSBaseIE):
|
class CBSIE(CBSBaseIE):
|
||||||
_VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
|
_VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:(?:cbs|paramountplus)\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
|
||||||
|
@ -52,6 +52,9 @@ class CBSIE(CBSBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
|
def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
|
||||||
|
|
|
@ -26,7 +26,7 @@ class CBSNewsEmbedIE(CBSIE):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
item = self._parse_json(zlib.decompress(compat_b64decode(
|
item = self._parse_json(zlib.decompress(compat_b64decode(
|
||||||
compat_urllib_parse_unquote(self._match_id(url))),
|
compat_urllib_parse_unquote(self._match_id(url))),
|
||||||
-zlib.MAX_WBITS), None)['video']['items'][0]
|
-zlib.MAX_WBITS).decode('utf-8'), None)['video']['items'][0]
|
||||||
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
|
return self._extract_video_info(item['mpxRefId'], 'cbsnews')
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,38 +1,113 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .cbs import CBSBaseIE
|
import re
|
||||||
|
|
||||||
|
# from .cbs import CBSBaseIE
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class CBSSportsIE(CBSBaseIE):
|
# class CBSSportsEmbedIE(CBSBaseIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/(?:video|news)/(?P<id>[^/?#&]+)'
|
class CBSSportsEmbedIE(InfoExtractor):
|
||||||
|
IE_NAME = 'cbssports:embed'
|
||||||
|
_VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
|
||||||
|
(?:
|
||||||
|
ids%3D(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})|
|
||||||
|
pcid%3D(?P<pcid>\d+)
|
||||||
|
)'''
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.cbssports.com/nba/video/donovan-mitchell-flashes-star-potential-in-game-2-victory-over-thunder/',
|
'url': 'https://www.cbssports.com/player/embed/?args=player_id%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26ids%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26resizable%3D1%26autoplay%3Dtrue%26domain%3Dcbssports.com%26comp_ads_enabled%3Dfalse%26watchAndRead%3D0%26startTime%3D0%26env%3Dprod',
|
||||||
'info_dict': {
|
'only_matching': True,
|
||||||
'id': '1214315075735',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Donovan Mitchell flashes star potential in Game 2 victory over Thunder',
|
|
||||||
'description': 'md5:df6f48622612c2d6bd2e295ddef58def',
|
|
||||||
'timestamp': 1524111457,
|
|
||||||
'upload_date': '20180419',
|
|
||||||
'uploader': 'CBSI-NEW',
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
# m3u8 download
|
|
||||||
'skip_download': True,
|
|
||||||
}
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.cbssports.com/nba/news/nba-playoffs-2018-watch-76ers-vs-heat-game-3-series-schedule-tv-channel-online-stream/',
|
'url': 'https://embed.247sports.com/player/embed/?args=%3fplayer_id%3d1827823171591%26channel%3dcollege-football-recruiting%26pcid%3d1827823171591%26width%3d640%26height%3d360%26autoplay%3dTrue%26comp_ads_enabled%3dFalse%26uvpc%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_v4%2526partner%253d247%26uvpc_m%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_m_v4%2526partner_m%253d247_mobile%26utag%3d247sportssite%26resizable%3dTrue',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _extract_video_info(self, filter_query, video_id):
|
# def _extract_video_info(self, filter_query, video_id):
|
||||||
return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
|
# return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
uuid, pcid = re.match(self._VALID_URL, url).groups()
|
||||||
|
query = {'id': uuid} if uuid else {'pcid': pcid}
|
||||||
|
video = self._download_json(
|
||||||
|
'https://www.cbssports.com/api/content/video/',
|
||||||
|
uuid or pcid, query=query)[0]
|
||||||
|
video_id = video['id']
|
||||||
|
title = video['title']
|
||||||
|
metadata = video.get('metaData') or {}
|
||||||
|
# return self._extract_video_info('byId=%d' % metadata['mpxOutletId'], video_id)
|
||||||
|
# return self._extract_video_info('byGuid=' + metadata['mpxRefId'], video_id)
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(
|
||||||
|
metadata['files'][0]['url'], video_id, 'mp4',
|
||||||
|
'm3u8_native', m3u8_id='hls', fatal=False)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
image = video.get('image')
|
||||||
|
thumbnails = None
|
||||||
|
if image:
|
||||||
|
image_path = image.get('path')
|
||||||
|
if image_path:
|
||||||
|
thumbnails = [{
|
||||||
|
'url': image_path,
|
||||||
|
'width': int_or_none(image.get('width')),
|
||||||
|
'height': int_or_none(image.get('height')),
|
||||||
|
'filesize': int_or_none(image.get('size')),
|
||||||
|
}]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'description': video.get('description'),
|
||||||
|
'timestamp': int_or_none(try_get(video, lambda x: x['dateCreated']['epoch'])),
|
||||||
|
'duration': int_or_none(metadata.get('duration')),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CBSSportsBaseIE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_webpage(url, display_id)
|
||||||
video_id = self._search_regex(
|
iframe_url = self._search_regex(
|
||||||
[r'(?:=|%26)pcid%3D(\d+)', r'embedVideo(?:Container)?_(\d+)'],
|
r'<iframe[^>]+(?:data-)?src="(https?://[^/]+/player/embed[^"]+)"',
|
||||||
webpage, 'video id')
|
webpage, 'embed url')
|
||||||
return self._extract_video_info('byId=%s' % video_id, video_id)
|
return self.url_result(iframe_url, CBSSportsEmbedIE.ie_key())
|
||||||
|
|
||||||
|
|
||||||
|
class CBSSportsIE(CBSSportsBaseIE):
|
||||||
|
IE_NAME = 'cbssports'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.cbssports.com/college-football/video/cover-3-stanford-spring-gleaning/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'b56c03a6-231a-4bbe-9c55-af3c8a8e9636',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Cover 3: Stanford Spring Gleaning',
|
||||||
|
'description': 'The Cover 3 crew break down everything you need to know about the Stanford Cardinal this spring.',
|
||||||
|
'timestamp': 1617218398,
|
||||||
|
'upload_date': '20210331',
|
||||||
|
'duration': 502,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
|
||||||
|
class TwentyFourSevenSportsIE(CBSSportsBaseIE):
|
||||||
|
IE_NAME = '247sports'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://247sports.com/Video/2021-QB-Jake-Garcia-senior-highlights-through-five-games-10084854/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4f1265cb-c3b5-44a8-bb1d-1914119a0ccc',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': '2021 QB Jake Garcia senior highlights through five games',
|
||||||
|
'description': 'md5:8cb67ebed48e2e6adac1701e0ff6e45b',
|
||||||
|
'timestamp': 1607114223,
|
||||||
|
'upload_date': '20201204',
|
||||||
|
'duration': 208,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
|
@ -126,9 +126,6 @@ class CDAIE(CDABaseExtractor):
|
||||||
metadata = self._download_json(
|
metadata = self._download_json(
|
||||||
self._BASE_URL + '/video/' + video_id, video_id, headers=headers)['video']
|
self._BASE_URL + '/video/' + video_id, video_id, headers=headers)['video']
|
||||||
|
|
||||||
if metadata.get('premium') is True and metadata.get('premium_free') is not True:
|
|
||||||
raise ExtractorError('This video is only available for premium users.', expected=True)
|
|
||||||
|
|
||||||
uploader = try_get(metadata, lambda x: x['author']['login'])
|
uploader = try_get(metadata, lambda x: x['author']['login'])
|
||||||
# anonymous uploader
|
# anonymous uploader
|
||||||
if uploader == 'anonim':
|
if uploader == 'anonim':
|
||||||
|
@ -136,6 +133,8 @@ class CDAIE(CDABaseExtractor):
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for quality in metadata['qualities']:
|
for quality in metadata['qualities']:
|
||||||
|
if not quality['file']:
|
||||||
|
continue
|
||||||
formats.append({
|
formats.append({
|
||||||
'url': quality['file'],
|
'url': quality['file'],
|
||||||
'format': quality['title'],
|
'format': quality['title'],
|
||||||
|
@ -144,6 +143,13 @@ class CDAIE(CDABaseExtractor):
|
||||||
'filesize': quality.get('length'),
|
'filesize': quality.get('length'),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
if not formats:
|
||||||
|
if metadata.get('premium') is True and metadata.get('premium_free') is not True:
|
||||||
|
raise ExtractorError('This video is only available for premium users.', expected=True)
|
||||||
|
raise ExtractorError('No video qualities found', video_id=video_id)
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': metadata['title'],
|
'title': metadata['title'],
|
||||||
|
|
|
@ -157,7 +157,7 @@ class CeskaTelevizeIE(InfoExtractor):
|
||||||
stream_formats = self._extract_mpd_formats(
|
stream_formats = self._extract_mpd_formats(
|
||||||
stream_url, playlist_id,
|
stream_url, playlist_id,
|
||||||
mpd_id='dash-%s' % format_id, fatal=False)
|
mpd_id='dash-%s' % format_id, fatal=False)
|
||||||
# See https://github.com/ytdl-org/haruhi-dl/issues/12119#issuecomment-280037031
|
# See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031
|
||||||
if format_id == 'audioDescription':
|
if format_id == 'audioDescription':
|
||||||
for f in stream_formats:
|
for f in stream_formats:
|
||||||
f['source_preference'] = -10
|
f['source_preference'] = -10
|
||||||
|
|
|
@ -17,7 +17,7 @@ import math
|
||||||
|
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_cookiejar_Cookie,
|
compat_cookiejar_Cookie,
|
||||||
compat_cookies,
|
compat_cookies_SimpleCookie,
|
||||||
compat_etree_Element,
|
compat_etree_Element,
|
||||||
compat_etree_fromstring,
|
compat_etree_fromstring,
|
||||||
compat_getpass,
|
compat_getpass,
|
||||||
|
@ -70,6 +70,7 @@ from ..utils import (
|
||||||
str_or_none,
|
str_or_none,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
|
try_get,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
@ -204,6 +205,14 @@ class InfoExtractor(object):
|
||||||
* downloader_options A dictionary of downloader options as
|
* downloader_options A dictionary of downloader options as
|
||||||
described in FileDownloader
|
described in FileDownloader
|
||||||
|
|
||||||
|
Internally, extractors can include subtitles in the format
|
||||||
|
list, in this format:
|
||||||
|
* _subtitle The subtitle object, in the same format
|
||||||
|
as in subtitles field
|
||||||
|
* _key The tag for the provided subtitle
|
||||||
|
This is never included in the output JSON, but moved
|
||||||
|
into the subtitles field.
|
||||||
|
|
||||||
url: Final video URL.
|
url: Final video URL.
|
||||||
ext: Video filename extension.
|
ext: Video filename extension.
|
||||||
format: The video format, defaults to ext (used for --get-format)
|
format: The video format, defaults to ext (used for --get-format)
|
||||||
|
@ -230,8 +239,10 @@ class InfoExtractor(object):
|
||||||
uploader: Full name of the video uploader.
|
uploader: Full name of the video uploader.
|
||||||
license: License name the video is licensed under.
|
license: License name the video is licensed under.
|
||||||
creator: The creator of the video.
|
creator: The creator of the video.
|
||||||
|
release_timestamp: UNIX timestamp of the moment the video was released.
|
||||||
release_date: The date (YYYYMMDD) when the video was released.
|
release_date: The date (YYYYMMDD) when the video was released.
|
||||||
timestamp: UNIX timestamp of the moment the video became available.
|
timestamp: UNIX timestamp of the moment the video became available
|
||||||
|
(uploaded).
|
||||||
upload_date: Video upload date (YYYYMMDD).
|
upload_date: Video upload date (YYYYMMDD).
|
||||||
If not explicitly set, calculated from timestamp.
|
If not explicitly set, calculated from timestamp.
|
||||||
uploader_id: Nickname or id of the video uploader.
|
uploader_id: Nickname or id of the video uploader.
|
||||||
|
@ -245,11 +256,15 @@ class InfoExtractor(object):
|
||||||
subtitles: The available subtitles as a dictionary in the format
|
subtitles: The available subtitles as a dictionary in the format
|
||||||
{tag: subformats}. "tag" is usually a language code, and
|
{tag: subformats}. "tag" is usually a language code, and
|
||||||
"subformats" is a list sorted from lower to higher
|
"subformats" is a list sorted from lower to higher
|
||||||
preference, each element is a dictionary with the "ext"
|
preference, each element is a dictionary,
|
||||||
entry and one of:
|
which must contain one of these values:
|
||||||
* "data": The subtitles file contents
|
* "data": The subtitles file contents
|
||||||
* "url": A URL pointing to the subtitles file
|
* "url": A URL pointing to the subtitles file
|
||||||
"ext" will be calculated from URL if missing
|
These values are guessed based on other data, if missing,
|
||||||
|
in a way analogic to the formats data:
|
||||||
|
* "ext" - subtitle extension name (vtt, srt, ...)
|
||||||
|
* "proto" - download protocol (https, http, m3u8, ...)
|
||||||
|
* "http_headers"
|
||||||
automatic_captions: Like 'subtitles', used by the YoutubeIE for
|
automatic_captions: Like 'subtitles', used by the YoutubeIE for
|
||||||
automatically generated captions
|
automatically generated captions
|
||||||
duration: Length of the video in seconds, as an integer or float.
|
duration: Length of the video in seconds, as an integer or float.
|
||||||
|
@ -1273,6 +1288,23 @@ class InfoExtractor(object):
|
||||||
continue
|
continue
|
||||||
info[count_key] = interaction_count
|
info[count_key] = interaction_count
|
||||||
|
|
||||||
|
def extract_author(e):
|
||||||
|
if not e:
|
||||||
|
return None
|
||||||
|
if not e.get('author'):
|
||||||
|
return None
|
||||||
|
e = e['author']
|
||||||
|
if isinstance(e, str):
|
||||||
|
info['uploader'] = e
|
||||||
|
elif isinstance(e, dict):
|
||||||
|
etype = e.get('@type')
|
||||||
|
if etype in ('Person', 'Organization'):
|
||||||
|
info.update({
|
||||||
|
'uploader': e.get('name'),
|
||||||
|
'uploader_id': e.get('identifier'),
|
||||||
|
'uploader_url': try_get(e, lambda x: x['url']['url'], str),
|
||||||
|
})
|
||||||
|
|
||||||
media_object_types = ('MediaObject', 'VideoObject', 'AudioObject', 'MusicVideoObject')
|
media_object_types = ('MediaObject', 'VideoObject', 'AudioObject', 'MusicVideoObject')
|
||||||
|
|
||||||
def extract_media_object(e):
|
def extract_media_object(e):
|
||||||
|
@ -1290,7 +1322,6 @@ class InfoExtractor(object):
|
||||||
'thumbnails': thumbnails,
|
'thumbnails': thumbnails,
|
||||||
'duration': parse_duration(e.get('duration')),
|
'duration': parse_duration(e.get('duration')),
|
||||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||||
'uploader': str_or_none(e.get('author')),
|
|
||||||
'filesize': float_or_none(e.get('contentSize')),
|
'filesize': float_or_none(e.get('contentSize')),
|
||||||
'tbr': int_or_none(e.get('bitrate')),
|
'tbr': int_or_none(e.get('bitrate')),
|
||||||
'width': int_or_none(e.get('width')),
|
'width': int_or_none(e.get('width')),
|
||||||
|
@ -1298,6 +1329,7 @@ class InfoExtractor(object):
|
||||||
'view_count': int_or_none(e.get('interactionCount')),
|
'view_count': int_or_none(e.get('interactionCount')),
|
||||||
})
|
})
|
||||||
extract_interaction_statistic(e)
|
extract_interaction_statistic(e)
|
||||||
|
extract_author(e)
|
||||||
|
|
||||||
for e in json_ld:
|
for e in json_ld:
|
||||||
if '@context' in e:
|
if '@context' in e:
|
||||||
|
@ -1391,6 +1423,10 @@ class InfoExtractor(object):
|
||||||
f['tbr'] = f['abr'] + f['vbr']
|
f['tbr'] = f['abr'] + f['vbr']
|
||||||
|
|
||||||
def _formats_key(f):
|
def _formats_key(f):
|
||||||
|
# manifest subtitle workaround
|
||||||
|
if '_subtitle' in f:
|
||||||
|
return (-1,)
|
||||||
|
|
||||||
# TODO remove the following workaround
|
# TODO remove the following workaround
|
||||||
from ..utils import determine_ext
|
from ..utils import determine_ext
|
||||||
if not f.get('ext') and 'url' in f:
|
if not f.get('ext') and 'url' in f:
|
||||||
|
@ -1410,7 +1446,19 @@ class InfoExtractor(object):
|
||||||
preference -= 0.5
|
preference -= 0.5
|
||||||
|
|
||||||
protocol = f.get('protocol') or determine_protocol(f)
|
protocol = f.get('protocol') or determine_protocol(f)
|
||||||
proto_preference = 0 if protocol in ['http', 'https'] else (-0.5 if protocol == 'rtsp' else -0.1)
|
if protocol in ['http', 'https']:
|
||||||
|
proto_preference = 0
|
||||||
|
elif protocol == 'rtsp':
|
||||||
|
proto_preference = -0.5
|
||||||
|
elif protocol == 'bittorrent':
|
||||||
|
if self._downloader.params.get('prefer_p2p') is True:
|
||||||
|
proto_preference = 1
|
||||||
|
elif self._downloader.params.get('allow_p2p') is True:
|
||||||
|
proto_preference = -0.1
|
||||||
|
else:
|
||||||
|
proto_preference = -2
|
||||||
|
else:
|
||||||
|
proto_preference = -0.1
|
||||||
|
|
||||||
if f.get('vcodec') == 'none': # audio only
|
if f.get('vcodec') == 'none': # audio only
|
||||||
preference -= 50
|
preference -= 50
|
||||||
|
@ -1519,7 +1567,7 @@ class InfoExtractor(object):
|
||||||
manifest_url, video_id, 'Downloading f4m manifest',
|
manifest_url, video_id, 'Downloading f4m manifest',
|
||||||
'Unable to download f4m manifest',
|
'Unable to download f4m manifest',
|
||||||
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
|
# Some manifests may be malformed, e.g. prosiebensat1 generated manifests
|
||||||
# (see https://github.com/ytdl-org/haruhi-dl/issues/6215#issuecomment-121704244)
|
# (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244)
|
||||||
transform_source=transform_source,
|
transform_source=transform_source,
|
||||||
fatal=fatal, data=data, headers=headers, query=query)
|
fatal=fatal, data=data, headers=headers, query=query)
|
||||||
|
|
||||||
|
@ -1550,7 +1598,7 @@ class InfoExtractor(object):
|
||||||
manifest_version = '2.0'
|
manifest_version = '2.0'
|
||||||
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
|
||||||
# Remove unsupported DRM protected media from final formats
|
# Remove unsupported DRM protected media from final formats
|
||||||
# rendition (see https://github.com/ytdl-org/haruhi-dl/issues/8573).
|
# rendition (see https://github.com/ytdl-org/youtube-dl/issues/8573).
|
||||||
media_nodes = remove_encrypted_media(media_nodes)
|
media_nodes = remove_encrypted_media(media_nodes)
|
||||||
if not media_nodes:
|
if not media_nodes:
|
||||||
return formats
|
return formats
|
||||||
|
@ -1681,8 +1729,8 @@ class InfoExtractor(object):
|
||||||
|
|
||||||
# References:
|
# References:
|
||||||
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
|
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
|
||||||
# 2. https://github.com/ytdl-org/haruhi-dl/issues/12211
|
# 2. https://github.com/ytdl-org/youtube-dl/issues/12211
|
||||||
# 3. https://github.com/ytdl-org/haruhi-dl/issues/18923
|
# 3. https://github.com/ytdl-org/youtube-dl/issues/18923
|
||||||
|
|
||||||
# We should try extracting formats only from master playlists [1, 4.3.4],
|
# We should try extracting formats only from master playlists [1, 4.3.4],
|
||||||
# i.e. playlists that describe available qualities. On the other hand
|
# i.e. playlists that describe available qualities. On the other hand
|
||||||
|
@ -1714,7 +1762,7 @@ class InfoExtractor(object):
|
||||||
if not (media_type and group_id and name):
|
if not (media_type and group_id and name):
|
||||||
return
|
return
|
||||||
groups.setdefault(group_id, []).append(media)
|
groups.setdefault(group_id, []).append(media)
|
||||||
if media_type not in ('VIDEO', 'AUDIO'):
|
if media_type not in ('VIDEO', 'AUDIO', 'SUBTITLES'):
|
||||||
return
|
return
|
||||||
media_url = media.get('URI')
|
media_url = media.get('URI')
|
||||||
if media_url:
|
if media_url:
|
||||||
|
@ -1722,17 +1770,27 @@ class InfoExtractor(object):
|
||||||
for v in (m3u8_id, group_id, name):
|
for v in (m3u8_id, group_id, name):
|
||||||
if v:
|
if v:
|
||||||
format_id.append(v)
|
format_id.append(v)
|
||||||
f = {
|
if media_type == 'SUBTITLES':
|
||||||
'format_id': '-'.join(format_id),
|
f = {
|
||||||
'url': format_url(media_url),
|
'_subtitle': {
|
||||||
'manifest_url': m3u8_url,
|
'url': format_url(media_url),
|
||||||
'language': media.get('LANGUAGE'),
|
'ext': 'vtt',
|
||||||
'ext': ext,
|
'protocol': entry_protocol,
|
||||||
'protocol': entry_protocol,
|
},
|
||||||
'preference': preference,
|
'_key': media.get('LANGUAGE'),
|
||||||
}
|
}
|
||||||
if media_type == 'AUDIO':
|
else:
|
||||||
f['vcodec'] = 'none'
|
f = {
|
||||||
|
'format_id': '-'.join(format_id),
|
||||||
|
'url': format_url(media_url),
|
||||||
|
'manifest_url': m3u8_url,
|
||||||
|
'language': media.get('LANGUAGE'),
|
||||||
|
'ext': ext,
|
||||||
|
'protocol': entry_protocol,
|
||||||
|
'preference': preference,
|
||||||
|
}
|
||||||
|
if media_type == 'AUDIO':
|
||||||
|
f['vcodec'] = 'none'
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
|
||||||
def build_stream_name():
|
def build_stream_name():
|
||||||
|
@ -2238,7 +2296,7 @@ class InfoExtractor(object):
|
||||||
# First of, % characters outside $...$ templates
|
# First of, % characters outside $...$ templates
|
||||||
# must be escaped by doubling for proper processing
|
# must be escaped by doubling for proper processing
|
||||||
# by % operator string formatting used further (see
|
# by % operator string formatting used further (see
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/16867).
|
# https://github.com/ytdl-org/youtube-dl/issues/16867).
|
||||||
t = ''
|
t = ''
|
||||||
in_template = False
|
in_template = False
|
||||||
for c in tmpl:
|
for c in tmpl:
|
||||||
|
@ -2257,7 +2315,7 @@ class InfoExtractor(object):
|
||||||
|
|
||||||
# @initialization is a regular template like @media one
|
# @initialization is a regular template like @media one
|
||||||
# so it should be handled just the same way (see
|
# so it should be handled just the same way (see
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/11605)
|
# https://github.com/ytdl-org/youtube-dl/issues/11605)
|
||||||
if 'initialization' in representation_ms_info:
|
if 'initialization' in representation_ms_info:
|
||||||
initialization_template = prepare_template(
|
initialization_template = prepare_template(
|
||||||
'initialization',
|
'initialization',
|
||||||
|
@ -2343,7 +2401,7 @@ class InfoExtractor(object):
|
||||||
elif 'segment_urls' in representation_ms_info:
|
elif 'segment_urls' in representation_ms_info:
|
||||||
# Segment URLs with no SegmentTimeline
|
# Segment URLs with no SegmentTimeline
|
||||||
# Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
|
# Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
|
||||||
# https://github.com/ytdl-org/haruhi-dl/pull/14844
|
# https://github.com/ytdl-org/youtube-dl/pull/14844
|
||||||
fragments = []
|
fragments = []
|
||||||
segment_duration = float_or_none(
|
segment_duration = float_or_none(
|
||||||
representation_ms_info['segment_duration'],
|
representation_ms_info['segment_duration'],
|
||||||
|
@ -2381,8 +2439,8 @@ class InfoExtractor(object):
|
||||||
# According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
|
# According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
|
||||||
# is not necessarily unique within a Period thus formats with
|
# is not necessarily unique within a Period thus formats with
|
||||||
# the same `format_id` are quite possible. There are numerous examples
|
# the same `format_id` are quite possible. There are numerous examples
|
||||||
# of such manifests (see https://github.com/ytdl-org/haruhi-dl/issues/15111,
|
# of such manifests (see https://github.com/ytdl-org/youtube-dl/issues/15111,
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/13919)
|
# https://github.com/ytdl-org/youtube-dl/issues/13919)
|
||||||
full_info = formats_dict.get(representation_id, {}).copy()
|
full_info = formats_dict.get(representation_id, {}).copy()
|
||||||
full_info.update(f)
|
full_info.update(f)
|
||||||
formats.append(full_info)
|
formats.append(full_info)
|
||||||
|
@ -2545,7 +2603,7 @@ class InfoExtractor(object):
|
||||||
media_tags.extend(re.findall(
|
media_tags.extend(re.findall(
|
||||||
# We only allow video|audio followed by a whitespace or '>'.
|
# We only allow video|audio followed by a whitespace or '>'.
|
||||||
# Allowing more characters may end up in significant slow down (see
|
# Allowing more characters may end up in significant slow down (see
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/11979, example URL:
|
# https://github.com/ytdl-org/youtube-dl/issues/11979, example URL:
|
||||||
# http://www.porntrex.com/maps/videositemap.xml).
|
# http://www.porntrex.com/maps/videositemap.xml).
|
||||||
r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
|
r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
|
||||||
for media_tag, media_type, media_content in media_tags:
|
for media_tag, media_type, media_content in media_tags:
|
||||||
|
@ -2924,10 +2982,10 @@ class InfoExtractor(object):
|
||||||
self._downloader.cookiejar.set_cookie(cookie)
|
self._downloader.cookiejar.set_cookie(cookie)
|
||||||
|
|
||||||
def _get_cookies(self, url):
|
def _get_cookies(self, url):
|
||||||
""" Return a compat_cookies.SimpleCookie with the cookies for the url """
|
""" Return a compat_cookies_SimpleCookie with the cookies for the url """
|
||||||
req = sanitized_Request(url)
|
req = sanitized_Request(url)
|
||||||
self._downloader.cookiejar.add_cookie_header(req)
|
self._downloader.cookiejar.add_cookie_header(req)
|
||||||
return compat_cookies.SimpleCookie(req.get_header('Cookie'))
|
return compat_cookies_SimpleCookie(req.get_header('Cookie'))
|
||||||
|
|
||||||
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
def _apply_first_set_cookie_header(self, url_handle, cookie):
|
||||||
"""
|
"""
|
||||||
|
@ -2940,7 +2998,7 @@ class InfoExtractor(object):
|
||||||
We will workaround this issue by resetting the cookie to
|
We will workaround this issue by resetting the cookie to
|
||||||
the first one manually.
|
the first one manually.
|
||||||
1. https://new.vk.com/
|
1. https://new.vk.com/
|
||||||
2. https://github.com/ytdl-org/haruhi-dl/issues/9841#issuecomment-227871201
|
2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201
|
||||||
3. https://learning.oreilly.com/
|
3. https://learning.oreilly.com/
|
||||||
"""
|
"""
|
||||||
for header, cookies in url_handle.headers.items():
|
for header, cookies in url_handle.headers.items():
|
||||||
|
|
|
@ -36,7 +36,7 @@ class UnicodeBOMIE(InfoExtractor):
|
||||||
_VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
|
_VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
|
||||||
|
|
||||||
# Disable test for python 3.2 since BOM is broken in re in this version
|
# Disable test for python 3.2 since BOM is broken in re in this version
|
||||||
# (see https://github.com/ytdl-org/haruhi-dl/issues/9751)
|
# (see https://github.com/ytdl-org/youtube-dl/issues/9751)
|
||||||
_TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{
|
_TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{
|
||||||
'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
|
'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
|
|
@ -1,9 +1,15 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from urllib.parse import parse_qs
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
compat_urlparse,
|
compat_urlparse,
|
||||||
)
|
)
|
||||||
|
from ..utils import (
|
||||||
|
try_get,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class RtmpIE(InfoExtractor):
|
class RtmpIE(InfoExtractor):
|
||||||
|
@ -58,3 +64,71 @@ class MmsIE(InfoExtractor):
|
||||||
'title': title,
|
'title': title,
|
||||||
'url': url,
|
'url': url,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class BitTorrentMagnetIE(InfoExtractor):
|
||||||
|
IE_DESC = False
|
||||||
|
_VALID_URL = r'(?i)magnet:\?.+'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'magnet:?xs=https%3A%2F%2Fvideo.internet-czas-dzialac.pl%2Fstatic%2Ftorrents%2F9085aa69-90c2-40c6-a707-3472b92cafc8-0.torrent&xt=urn:btih:0ae4cc8cb0e098a1a40b3224aa578bb4210a8cff&dn=Podcast+Internet.+Czas+dzia%C5%82a%C4%87!+-+Trailer&tr=wss%3A%2F%2Fvideo.internet-czas-dzialac.pl%3A443%2Ftracker%2Fsocket&tr=https%3A%2F%2Fvideo.internet-czas-dzialac.pl%2Ftracker%2Fannounce&ws=https%3A%2F%2Fvideo.internet-czas-dzialac.pl%2Fstatic%2Fwebseed%2F9085aa69-90c2-40c6-a707-3472b92cafc8-0.mp4',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'urn:btih:0ae4cc8cb0e098a1a40b3224aa578bb4210a8cff',
|
||||||
|
'ext': 'torrent',
|
||||||
|
'title': 'Podcast Internet. Czas działać! - Trailer',
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'allow_p2p': True,
|
||||||
|
'prefer_p2p': True,
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
qs = parse_qs(url[len('magnet:?'):])
|
||||||
|
|
||||||
|
# eXact Topic
|
||||||
|
video_id = qs['xt'][0]
|
||||||
|
if not video_id.startswith('urn:btih:'):
|
||||||
|
raise ExtractorError('Not a BitTorrent magnet')
|
||||||
|
# Display Name
|
||||||
|
title = try_get(qs, lambda x: x['dn'][0], str) or video_id[len('urn:btih:'):]
|
||||||
|
|
||||||
|
formats = [{
|
||||||
|
'url': url,
|
||||||
|
'protocol': 'bittorrent',
|
||||||
|
}]
|
||||||
|
# Web Seed
|
||||||
|
if qs.get('ws'):
|
||||||
|
for ws in qs['ws']:
|
||||||
|
formats.append({
|
||||||
|
'url': ws,
|
||||||
|
})
|
||||||
|
# Acceptable Source
|
||||||
|
if qs.get('as'):
|
||||||
|
for as_ in qs['as']:
|
||||||
|
formats.append({
|
||||||
|
'url': as_,
|
||||||
|
'preference': -2,
|
||||||
|
})
|
||||||
|
# eXact Source
|
||||||
|
if qs.get('xs'):
|
||||||
|
for xs in qs['xs']:
|
||||||
|
formats.append({
|
||||||
|
'url': xs,
|
||||||
|
'protocol': 'bittorrent',
|
||||||
|
})
|
||||||
|
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
# eXact Length
|
||||||
|
if qs.get('xl'):
|
||||||
|
xl = int(qs['xl'][0])
|
||||||
|
for i in range(0, len(formats)):
|
||||||
|
formats[i]['filesize'] = xl
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
}
|
||||||
|
|
|
@ -112,7 +112,7 @@ class CrunchyrollBaseIE(InfoExtractor):
|
||||||
# > This content may be inappropriate for some people.
|
# > This content may be inappropriate for some people.
|
||||||
# > Are you sure you want to continue?
|
# > Are you sure you want to continue?
|
||||||
# since it's not disabled by default in crunchyroll account's settings.
|
# since it's not disabled by default in crunchyroll account's settings.
|
||||||
# See https://github.com/ytdl-org/haruhi-dl/issues/7202.
|
# See https://github.com/ytdl-org/youtube-dl/issues/7202.
|
||||||
qs['skip_wall'] = ['1']
|
qs['skip_wall'] = ['1']
|
||||||
return compat_urlparse.urlunparse(
|
return compat_urlparse.urlunparse(
|
||||||
parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
|
parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
|
||||||
|
@ -267,7 +267,7 @@ class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
|
||||||
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
|
request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
|
||||||
else sanitized_Request(url_or_request))
|
else sanitized_Request(url_or_request))
|
||||||
# Accept-Language must be set explicitly to accept any language to avoid issues
|
# Accept-Language must be set explicitly to accept any language to avoid issues
|
||||||
# similar to https://github.com/ytdl-org/haruhi-dl/issues/6797.
|
# similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
|
||||||
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
|
# Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
|
||||||
# should be imposed or not (from what I can see it just takes the first language
|
# should be imposed or not (from what I can see it just takes the first language
|
||||||
# ignoring the priority and requires it to correspond the IP). By the way this causes
|
# ignoring the priority and requires it to correspond the IP). By the way this causes
|
||||||
|
|
|
@ -25,12 +25,12 @@ class CuriosityStreamBaseIE(InfoExtractor):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
'%s said: %s' % (self.IE_NAME, error), expected=True)
|
||||||
|
|
||||||
def _call_api(self, path, video_id):
|
def _call_api(self, path, video_id, query=None):
|
||||||
headers = {}
|
headers = {}
|
||||||
if self._auth_token:
|
if self._auth_token:
|
||||||
headers['X-Auth-Token'] = self._auth_token
|
headers['X-Auth-Token'] = self._auth_token
|
||||||
result = self._download_json(
|
result = self._download_json(
|
||||||
self._API_BASE_URL + path, video_id, headers=headers)
|
self._API_BASE_URL + path, video_id, headers=headers, query=query)
|
||||||
self._handle_errors(result)
|
self._handle_errors(result)
|
||||||
return result['data']
|
return result['data']
|
||||||
|
|
||||||
|
@ -52,62 +52,75 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://app.curiositystream.com/video/2',
|
'url': 'https://app.curiositystream.com/video/2',
|
||||||
'md5': '262bb2f257ff301115f1973540de8983',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2',
|
'id': '2',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'How Did You Develop The Internet?',
|
'title': 'How Did You Develop The Internet?',
|
||||||
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
|
||||||
}
|
},
|
||||||
|
'params': {
|
||||||
|
'format': 'bestvideo',
|
||||||
|
# m3u8 download
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
media = self._call_api('media/' + video_id, video_id)
|
|
||||||
title = media['title']
|
|
||||||
|
|
||||||
formats = []
|
formats = []
|
||||||
for encoding in media.get('encodings', []):
|
for encoding_format in ('m3u8', 'mpd'):
|
||||||
m3u8_url = encoding.get('master_playlist_url')
|
media = self._call_api('media/' + video_id, video_id, query={
|
||||||
if m3u8_url:
|
'encodingsNew': 'true',
|
||||||
formats.extend(self._extract_m3u8_formats(
|
'encodingsFormat': encoding_format,
|
||||||
m3u8_url, video_id, 'mp4', 'm3u8_native',
|
})
|
||||||
m3u8_id='hls', fatal=False))
|
for encoding in media.get('encodings', []):
|
||||||
encoding_url = encoding.get('url')
|
playlist_url = encoding.get('master_playlist_url')
|
||||||
file_url = encoding.get('file_url')
|
if encoding_format == 'm3u8':
|
||||||
if not encoding_url and not file_url:
|
# use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
|
||||||
continue
|
formats.extend(self._extract_m3u8_formats(
|
||||||
f = {
|
playlist_url, video_id, 'mp4',
|
||||||
'width': int_or_none(encoding.get('width')),
|
m3u8_id='hls', fatal=False))
|
||||||
'height': int_or_none(encoding.get('height')),
|
elif encoding_format == 'mpd':
|
||||||
'vbr': int_or_none(encoding.get('video_bitrate')),
|
formats.extend(self._extract_mpd_formats(
|
||||||
'abr': int_or_none(encoding.get('audio_bitrate')),
|
playlist_url, video_id, mpd_id='dash', fatal=False))
|
||||||
'filesize': int_or_none(encoding.get('size_in_bytes')),
|
encoding_url = encoding.get('url')
|
||||||
'vcodec': encoding.get('video_codec'),
|
file_url = encoding.get('file_url')
|
||||||
'acodec': encoding.get('audio_codec'),
|
if not encoding_url and not file_url:
|
||||||
'container': encoding.get('container_type'),
|
|
||||||
}
|
|
||||||
for f_url in (encoding_url, file_url):
|
|
||||||
if not f_url:
|
|
||||||
continue
|
continue
|
||||||
fmt = f.copy()
|
f = {
|
||||||
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
|
'width': int_or_none(encoding.get('width')),
|
||||||
if rtmp:
|
'height': int_or_none(encoding.get('height')),
|
||||||
fmt.update({
|
'vbr': int_or_none(encoding.get('video_bitrate')),
|
||||||
'url': rtmp.group('url'),
|
'abr': int_or_none(encoding.get('audio_bitrate')),
|
||||||
'play_path': rtmp.group('playpath'),
|
'filesize': int_or_none(encoding.get('size_in_bytes')),
|
||||||
'app': rtmp.group('app'),
|
'vcodec': encoding.get('video_codec'),
|
||||||
'ext': 'flv',
|
'acodec': encoding.get('audio_codec'),
|
||||||
'format_id': 'rtmp',
|
'container': encoding.get('container_type'),
|
||||||
})
|
}
|
||||||
else:
|
for f_url in (encoding_url, file_url):
|
||||||
fmt.update({
|
if not f_url:
|
||||||
'url': f_url,
|
continue
|
||||||
'format_id': 'http',
|
fmt = f.copy()
|
||||||
})
|
rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
|
||||||
formats.append(fmt)
|
if rtmp:
|
||||||
|
fmt.update({
|
||||||
|
'url': rtmp.group('url'),
|
||||||
|
'play_path': rtmp.group('playpath'),
|
||||||
|
'app': rtmp.group('app'),
|
||||||
|
'ext': 'flv',
|
||||||
|
'format_id': 'rtmp',
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
fmt.update({
|
||||||
|
'url': f_url,
|
||||||
|
'format_id': 'http',
|
||||||
|
})
|
||||||
|
formats.append(fmt)
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
title = media['title']
|
||||||
|
|
||||||
subtitles = {}
|
subtitles = {}
|
||||||
for closed_caption in media.get('closed_captions', []):
|
for closed_caption in media.get('closed_captions', []):
|
||||||
sub_url = closed_caption.get('file')
|
sub_url = closed_caption.get('file')
|
||||||
|
@ -132,7 +145,7 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
|
||||||
|
|
||||||
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||||
IE_NAME = 'curiositystream:collection'
|
IE_NAME = 'curiositystream:collection'
|
||||||
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collection|series)/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://app.curiositystream.com/collection/2',
|
'url': 'https://app.curiositystream.com/collection/2',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -140,10 +153,13 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
|
||||||
'title': 'Curious Minds: The Internet',
|
'title': 'Curious Minds: The Internet',
|
||||||
'description': 'How is the internet shaping our lives in the 21st Century?',
|
'description': 'How is the internet shaping our lives in the 21st Century?',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 17,
|
'playlist_mincount': 16,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://curiositystream.com/series/2',
|
'url': 'https://curiositystream.com/series/2',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://curiositystream.com/collections/36',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -32,6 +32,18 @@ class DigitallySpeakingIE(InfoExtractor):
|
||||||
# From http://www.gdcvault.com/play/1013700/Advanced-Material
|
# From http://www.gdcvault.com/play/1013700/Advanced-Material
|
||||||
'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
|
'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# From https://gdcvault.com/play/1016624, empty speakerVideo
|
||||||
|
'url': 'https://sevt.dispeak.com/ubm/gdc/online12/xml/201210-822101_1349794556671DDDD.xml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '201210-822101_1349794556671DDDD',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Pre-launch - Preparing to Take the Plunge',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# From http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru, empty slideVideo
|
||||||
|
'url': 'http://events.digitallyspeaking.com/gdc/project25/xml/p25-miyamoto1999_1282467389849HSVB.xml',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _parse_mp4(self, metadata):
|
def _parse_mp4(self, metadata):
|
||||||
|
@ -84,26 +96,20 @@ class DigitallySpeakingIE(InfoExtractor):
|
||||||
'vcodec': 'none',
|
'vcodec': 'none',
|
||||||
'format_id': audio.get('code'),
|
'format_id': audio.get('code'),
|
||||||
})
|
})
|
||||||
slide_video_path = xpath_text(metadata, './slideVideo', fatal=True)
|
for video_key, format_id, preference in (
|
||||||
formats.append({
|
('slide', 'slides', -2), ('speaker', 'speaker', -1)):
|
||||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
video_path = xpath_text(metadata, './%sVideo' % video_key)
|
||||||
'play_path': remove_end(slide_video_path, '.flv'),
|
if not video_path:
|
||||||
'ext': 'flv',
|
continue
|
||||||
'format_note': 'slide deck video',
|
formats.append({
|
||||||
'quality': -2,
|
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
||||||
'preference': -2,
|
'play_path': remove_end(video_path, '.flv'),
|
||||||
'format_id': 'slides',
|
'ext': 'flv',
|
||||||
})
|
'format_note': '%s video' % video_key,
|
||||||
speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True)
|
'quality': preference,
|
||||||
formats.append({
|
'preference': preference,
|
||||||
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
|
'format_id': format_id,
|
||||||
'play_path': remove_end(speaker_video_path, '.flv'),
|
})
|
||||||
'ext': 'flv',
|
|
||||||
'format_note': 'speaker video',
|
|
||||||
'quality': -1,
|
|
||||||
'preference': -1,
|
|
||||||
'format_id': 'speaker',
|
|
||||||
})
|
|
||||||
return formats
|
return formats
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -1,193 +1,43 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
from .zdf import ZDFIE
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
int_or_none,
|
|
||||||
unified_strdate,
|
|
||||||
xpath_text,
|
|
||||||
determine_ext,
|
|
||||||
float_or_none,
|
|
||||||
ExtractorError,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class DreiSatIE(InfoExtractor):
|
class DreiSatIE(ZDFIE):
|
||||||
IE_NAME = '3sat'
|
IE_NAME = '3sat'
|
||||||
_GEO_COUNTRIES = ['DE']
|
_VALID_URL = r'https?://(?:www\.)?3sat\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)\.html'
|
||||||
_VALID_URL = r'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)'
|
_TESTS = [{
|
||||||
_TESTS = [
|
# Same as https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html
|
||||||
{
|
'url': 'https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html',
|
||||||
'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
|
'md5': '0aff3e7bc72c8813f5e0fae333316a1d',
|
||||||
'md5': 'be37228896d30a88f315b638900a026e',
|
'info_dict': {
|
||||||
'info_dict': {
|
'id': '141007_ab18_10wochensommer_film',
|
||||||
'id': '45918',
|
'ext': 'mp4',
|
||||||
'ext': 'mp4',
|
'title': 'Ab 18! - 10 Wochen Sommer',
|
||||||
'title': 'Waidmannsheil',
|
'description': 'md5:8253f41dc99ce2c3ff892dac2d65fe26',
|
||||||
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
'duration': 2660,
|
||||||
'uploader': 'SCHWEIZWEIT',
|
'timestamp': 1608604200,
|
||||||
'uploader_id': '100000210',
|
'upload_date': '20201222',
|
||||||
'upload_date': '20140913'
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True, # m3u8 downloads
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
{
|
}, {
|
||||||
'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066',
|
'url': 'https://www.3sat.de/gesellschaft/schweizweit/waidmannsheil-100.html',
|
||||||
'only_matching': True,
|
'info_dict': {
|
||||||
|
'id': '140913_sendung_schweizweit',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Waidmannsheil',
|
||||||
|
'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
|
||||||
|
'timestamp': 1410623100,
|
||||||
|
'upload_date': '20140913'
|
||||||
},
|
},
|
||||||
]
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
|
|
||||||
param_groups = {}
|
|
||||||
for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
|
|
||||||
group_id = param_group.get(self._xpath_ns(
|
|
||||||
'id', 'http://www.w3.org/XML/1998/namespace'))
|
|
||||||
params = {}
|
|
||||||
for param in param_group:
|
|
||||||
params[param.get('name')] = param.get('value')
|
|
||||||
param_groups[group_id] = params
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
for video in smil.findall(self._xpath_ns('.//video', namespace)):
|
|
||||||
src = video.get('src')
|
|
||||||
if not src:
|
|
||||||
continue
|
|
||||||
bitrate = int_or_none(self._search_regex(r'_(\d+)k', src, 'bitrate', None)) or float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
|
|
||||||
group_id = video.get('paramGroup')
|
|
||||||
param_group = param_groups[group_id]
|
|
||||||
for proto in param_group['protocols'].split(','):
|
|
||||||
formats.append({
|
|
||||||
'url': '%s://%s' % (proto, param_group['host']),
|
|
||||||
'app': param_group['app'],
|
|
||||||
'play_path': src,
|
|
||||||
'ext': 'flv',
|
|
||||||
'format_id': '%s-%d' % (proto, bitrate),
|
|
||||||
'tbr': bitrate,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def extract_from_xml_url(self, video_id, xml_url):
|
|
||||||
doc = self._download_xml(
|
|
||||||
xml_url, video_id,
|
|
||||||
note='Downloading video info',
|
|
||||||
errnote='Failed to download video info')
|
|
||||||
|
|
||||||
status_code = xpath_text(doc, './status/statuscode')
|
|
||||||
if status_code and status_code != 'ok':
|
|
||||||
if status_code == 'notVisibleAnymore':
|
|
||||||
message = 'Video %s is not available' % video_id
|
|
||||||
else:
|
|
||||||
message = '%s returned error: %s' % (self.IE_NAME, status_code)
|
|
||||||
raise ExtractorError(message, expected=True)
|
|
||||||
|
|
||||||
title = xpath_text(doc, './/information/title', 'title', True)
|
|
||||||
|
|
||||||
urls = []
|
|
||||||
formats = []
|
|
||||||
for fnode in doc.findall('.//formitaeten/formitaet'):
|
|
||||||
video_url = xpath_text(fnode, 'url')
|
|
||||||
if not video_url or video_url in urls:
|
|
||||||
continue
|
|
||||||
urls.append(video_url)
|
|
||||||
|
|
||||||
is_available = 'http://www.metafilegenerator' not in video_url
|
|
||||||
geoloced = 'static_geoloced_online' in video_url
|
|
||||||
if not is_available or geoloced:
|
|
||||||
continue
|
|
||||||
|
|
||||||
format_id = fnode.attrib['basetype']
|
|
||||||
format_m = re.match(r'''(?x)
|
|
||||||
(?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
|
|
||||||
(?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
|
|
||||||
''', format_id)
|
|
||||||
|
|
||||||
ext = determine_ext(video_url, None) or format_m.group('container')
|
|
||||||
|
|
||||||
if ext == 'meta':
|
|
||||||
continue
|
|
||||||
elif ext == 'smil':
|
|
||||||
formats.extend(self._extract_smil_formats(
|
|
||||||
video_url, video_id, fatal=False))
|
|
||||||
elif ext == 'm3u8':
|
|
||||||
# the certificates are misconfigured (see
|
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/8665)
|
|
||||||
if video_url.startswith('https://'):
|
|
||||||
continue
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
video_url, video_id, 'mp4', 'm3u8_native',
|
|
||||||
m3u8_id=format_id, fatal=False))
|
|
||||||
elif ext == 'f4m':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
video_url, video_id, f4m_id=format_id, fatal=False))
|
|
||||||
else:
|
|
||||||
quality = xpath_text(fnode, './quality')
|
|
||||||
if quality:
|
|
||||||
format_id += '-' + quality
|
|
||||||
|
|
||||||
abr = int_or_none(xpath_text(fnode, './audioBitrate'), 1000)
|
|
||||||
vbr = int_or_none(xpath_text(fnode, './videoBitrate'), 1000)
|
|
||||||
|
|
||||||
tbr = int_or_none(self._search_regex(
|
|
||||||
r'_(\d+)k', video_url, 'bitrate', None))
|
|
||||||
if tbr and vbr and not abr:
|
|
||||||
abr = tbr - vbr
|
|
||||||
|
|
||||||
formats.append({
|
|
||||||
'format_id': format_id,
|
|
||||||
'url': video_url,
|
|
||||||
'ext': ext,
|
|
||||||
'acodec': format_m.group('acodec'),
|
|
||||||
'vcodec': format_m.group('vcodec'),
|
|
||||||
'abr': abr,
|
|
||||||
'vbr': vbr,
|
|
||||||
'tbr': tbr,
|
|
||||||
'width': int_or_none(xpath_text(fnode, './width')),
|
|
||||||
'height': int_or_none(xpath_text(fnode, './height')),
|
|
||||||
'filesize': int_or_none(xpath_text(fnode, './filesize')),
|
|
||||||
'protocol': format_m.group('proto').lower(),
|
|
||||||
})
|
|
||||||
|
|
||||||
geolocation = xpath_text(doc, './/details/geolocation')
|
|
||||||
if not formats and geolocation and geolocation != 'none':
|
|
||||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
thumbnails = []
|
|
||||||
for node in doc.findall('.//teaserimages/teaserimage'):
|
|
||||||
thumbnail_url = node.text
|
|
||||||
if not thumbnail_url:
|
|
||||||
continue
|
|
||||||
thumbnail = {
|
|
||||||
'url': thumbnail_url,
|
|
||||||
}
|
|
||||||
thumbnail_key = node.get('key')
|
|
||||||
if thumbnail_key:
|
|
||||||
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
|
|
||||||
if m:
|
|
||||||
thumbnail['width'] = int(m.group(1))
|
|
||||||
thumbnail['height'] = int(m.group(2))
|
|
||||||
thumbnails.append(thumbnail)
|
|
||||||
|
|
||||||
upload_date = unified_strdate(xpath_text(doc, './/details/airtime'))
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': title,
|
|
||||||
'description': xpath_text(doc, './/information/detail'),
|
|
||||||
'duration': int_or_none(xpath_text(doc, './/details/lengthSec')),
|
|
||||||
'thumbnails': thumbnails,
|
|
||||||
'uploader': xpath_text(doc, './/details/originChannelTitle'),
|
|
||||||
'uploader_id': xpath_text(doc, './/details/originChannelId'),
|
|
||||||
'upload_date': upload_date,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
def _real_extract(self, url):
|
# Same as https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html
|
||||||
video_id = self._match_id(url)
|
'url': 'https://www.3sat.de/film/spielfilm/der-hauptmann-100.html',
|
||||||
details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id
|
'only_matching': True,
|
||||||
return self.extract_from_xml_url(video_id, details_url)
|
}, {
|
||||||
|
# Same as https://www.zdf.de/wissen/nano/nano-21-mai-2019-102.html, equal media ids
|
||||||
|
'url': 'https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
|
@ -1,15 +1,57 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
smuggle_url,
|
||||||
unified_strdate,
|
unified_strdate,
|
||||||
|
unsmuggle_url,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
from ..compat import compat_urlparse
|
|
||||||
|
|
||||||
|
|
||||||
class DWIE(InfoExtractor):
|
class DWVideoIE(InfoExtractor):
|
||||||
|
IE_NAME = 'dw:video'
|
||||||
|
_VALID_URL = r'dw:(?P<id>\d+)'
|
||||||
|
|
||||||
|
def _get_dw_formats(self, media_id, hidden_inputs):
|
||||||
|
if hidden_inputs.get('player_type') == 'video':
|
||||||
|
# https://www.dw.com/smil/v-{video_id} returns more formats,
|
||||||
|
# but they are all RTMP. ytdl used to do this:
|
||||||
|
# url.replace('rtmp://tv-od.dw.de/flash/', 'http://tv-download.dw.de/dwtv_video/flv/')
|
||||||
|
# this returns formats, but it's completely random if they work or not.
|
||||||
|
formats = [{
|
||||||
|
'url': fmt['file'],
|
||||||
|
'format_code': fmt['label'],
|
||||||
|
'height': int_or_none(fmt['label']),
|
||||||
|
} for fmt in self._download_json(
|
||||||
|
'https://www.dw.com/playersources/v-%s' % media_id,
|
||||||
|
media_id, 'Downloading JSON formats')]
|
||||||
|
self._sort_formats(formats)
|
||||||
|
else:
|
||||||
|
formats = [{'url': hidden_inputs['file_name']}]
|
||||||
|
return {
|
||||||
|
'id': media_id,
|
||||||
|
'title': hidden_inputs['media_title'],
|
||||||
|
'formats': formats,
|
||||||
|
'duration': int_or_none(hidden_inputs.get('file_duration')),
|
||||||
|
'upload_date': hidden_inputs.get('display_date'),
|
||||||
|
'thumbnail': hidden_inputs.get('preview_image'),
|
||||||
|
'is_live': hidden_inputs.get('isLiveVideo'),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
media_id = self._match_id(url)
|
||||||
|
_, hidden_inputs = unsmuggle_url(url)
|
||||||
|
if not hidden_inputs:
|
||||||
|
return self.url_result('https://www.dw.com/en/av-%s' % media_id, 'DW', media_id)
|
||||||
|
return self._get_dw_formats(media_id, hidden_inputs)
|
||||||
|
|
||||||
|
|
||||||
|
class DWIE(DWVideoIE):
|
||||||
IE_NAME = 'dw'
|
IE_NAME = 'dw'
|
||||||
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+(?:av|e)-(?P<id>\d+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
|
@ -21,7 +63,7 @@ class DWIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Intelligent light',
|
'title': 'Intelligent light',
|
||||||
'description': 'md5:90e00d5881719f2a6a5827cb74985af1',
|
'description': 'md5:90e00d5881719f2a6a5827cb74985af1',
|
||||||
'upload_date': '20160311',
|
'upload_date': '20160605',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# audio
|
# audio
|
||||||
|
@ -52,57 +94,57 @@ class DWIE(InfoExtractor):
|
||||||
media_id = self._match_id(url)
|
media_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, media_id)
|
webpage = self._download_webpage(url, media_id)
|
||||||
hidden_inputs = self._hidden_inputs(webpage)
|
hidden_inputs = self._hidden_inputs(webpage)
|
||||||
title = hidden_inputs['media_title']
|
|
||||||
media_id = hidden_inputs.get('media_id') or media_id
|
media_id = hidden_inputs.get('media_id') or media_id
|
||||||
|
|
||||||
if hidden_inputs.get('player_type') == 'video' and hidden_inputs.get('stream_file') == '1':
|
info_dict = {
|
||||||
formats = self._extract_smil_formats(
|
'description': self._og_search_description(webpage),
|
||||||
'http://www.dw.com/smil/v-%s' % media_id, media_id,
|
}
|
||||||
transform_source=lambda s: s.replace(
|
info_dict.update(self._get_dw_formats(media_id, hidden_inputs))
|
||||||
'rtmp://tv-od.dw.de/flash/',
|
|
||||||
'http://tv-download.dw.de/dwtv_video/flv/'))
|
|
||||||
self._sort_formats(formats)
|
|
||||||
else:
|
|
||||||
formats = [{'url': hidden_inputs['file_name']}]
|
|
||||||
|
|
||||||
upload_date = hidden_inputs.get('display_date')
|
if info_dict.get('upload_date') is None:
|
||||||
if not upload_date:
|
|
||||||
upload_date = self._html_search_regex(
|
upload_date = self._html_search_regex(
|
||||||
r'<span[^>]+class="date">([0-9.]+)\s*\|', webpage,
|
r'<span[^>]+class="date">([0-9.]+)\s*\|', webpage,
|
||||||
'upload date', default=None)
|
'upload date', default=None)
|
||||||
upload_date = unified_strdate(upload_date)
|
info_dict['upload_date'] = unified_strdate(upload_date)
|
||||||
|
|
||||||
return {
|
return info_dict
|
||||||
'id': media_id,
|
|
||||||
'title': title,
|
|
||||||
'description': self._og_search_description(webpage),
|
|
||||||
'thumbnail': hidden_inputs.get('preview_image'),
|
|
||||||
'duration': int_or_none(hidden_inputs.get('file_duration')),
|
|
||||||
'upload_date': upload_date,
|
|
||||||
'formats': formats,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class DWArticleIE(InfoExtractor):
|
class DWArticleIE(DWVideoIE):
|
||||||
IE_NAME = 'dw:article'
|
IE_NAME = 'dw:article'
|
||||||
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+a-(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?dw\.com/(?:[^/]+/)+a-(?P<id>\d+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'http://www.dw.com/en/no-hope-limited-options-for-refugees-in-idomeni/a-19111009',
|
'url': 'https://www.dw.com/pl/zalecenie-ema-szczepmy-si%C4%99-astrazenec%C4%85/a-56919770',
|
||||||
'md5': '8ca657f9d068bbef74d6fc38b97fc869',
|
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '19105868',
|
'id': '56911196',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'The harsh life of refugees in Idomeni',
|
'title': 'Czy AstraZeneca jest bezpieczna?',
|
||||||
'description': 'md5:196015cc7e48ebf474db9399420043c7',
|
'upload_date': '20210318',
|
||||||
'upload_date': '20160310',
|
},
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
article_id = self._match_id(url)
|
article_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, article_id)
|
webpage = self._download_webpage(url, article_id)
|
||||||
hidden_inputs = self._hidden_inputs(webpage)
|
videos = re.finditer(
|
||||||
media_id = hidden_inputs['media_id']
|
r'<div class="mediaItem" data-media-id="(?P<id>\d+)">(?P<hidden_inputs>.+?)<div',
|
||||||
media_path = self._search_regex(r'href="([^"]+av-%s)"\s+class="overlayLink"' % media_id, webpage, 'media url')
|
webpage)
|
||||||
media_url = compat_urlparse.urljoin(url, media_path)
|
if not videos:
|
||||||
return self.url_result(media_url, 'DW', media_id)
|
raise ExtractorError('No videos found')
|
||||||
|
entries = []
|
||||||
|
for video in videos:
|
||||||
|
video_id, hidden_inputs = video.group('id', 'hidden_inputs')
|
||||||
|
hidden_inputs = self._hidden_inputs(hidden_inputs)
|
||||||
|
entries.append({
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'title': hidden_inputs['media_title'],
|
||||||
|
'url': smuggle_url('dw:%s' % video_id, hidden_inputs),
|
||||||
|
'ie_key': 'DWVideo',
|
||||||
|
})
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
'id': article_id,
|
||||||
|
'title': self._html_search_regex(r'<h1>([^>]+)</h1>', webpage, 'article title'),
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
}
|
||||||
|
|
|
@ -22,16 +22,19 @@ class EggheadBaseIE(InfoExtractor):
|
||||||
class EggheadCourseIE(EggheadBaseIE):
|
class EggheadCourseIE(EggheadBaseIE):
|
||||||
IE_DESC = 'egghead.io course'
|
IE_DESC = 'egghead.io course'
|
||||||
IE_NAME = 'egghead:course'
|
IE_NAME = 'egghead:course'
|
||||||
_VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https://(?:app\.)?egghead\.io/(?:course|playlist)s/(?P<id>[^/?#&]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
|
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
|
||||||
'playlist_count': 29,
|
'playlist_count': 29,
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '72',
|
'id': '432655',
|
||||||
'title': 'Professor Frisby Introduces Composable Functional JavaScript',
|
'title': 'Professor Frisby Introduces Composable Functional JavaScript',
|
||||||
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
|
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
|
||||||
},
|
},
|
||||||
}
|
}, {
|
||||||
|
'url': 'https://app.egghead.io/playlists/professor-frisby-introduces-composable-functional-javascript',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
playlist_id = self._match_id(url)
|
playlist_id = self._match_id(url)
|
||||||
|
@ -65,7 +68,7 @@ class EggheadCourseIE(EggheadBaseIE):
|
||||||
class EggheadLessonIE(EggheadBaseIE):
|
class EggheadLessonIE(EggheadBaseIE):
|
||||||
IE_DESC = 'egghead.io lesson'
|
IE_DESC = 'egghead.io lesson'
|
||||||
IE_NAME = 'egghead:lesson'
|
IE_NAME = 'egghead:lesson'
|
||||||
_VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https://(?:app\.)?egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
|
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -88,6 +91,9 @@ class EggheadLessonIE(EggheadBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
|
'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://app.egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
|
@ -6,7 +6,7 @@ from .common import InfoExtractor
|
||||||
from ..compat import compat_urllib_parse_urlencode
|
from ..compat import compat_urllib_parse_urlencode
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
unescapeHTML
|
merge_dicts,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -24,7 +24,8 @@ class EroProfileIE(InfoExtractor):
|
||||||
'title': 'sexy babe softcore',
|
'title': 'sexy babe softcore',
|
||||||
'thumbnail': r're:https?://.*\.jpg',
|
'thumbnail': r're:https?://.*\.jpg',
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
},
|
||||||
|
'skip': 'Video not found',
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
|
'url': 'http://www.eroprofile.com/m/videos/view/Try-It-On-Pee_cut_2-wmv-4shared-com-file-sharing-download-movie-file',
|
||||||
'md5': '1baa9602ede46ce904c431f5418d8916',
|
'md5': '1baa9602ede46ce904c431f5418d8916',
|
||||||
|
@ -77,19 +78,15 @@ class EroProfileIE(InfoExtractor):
|
||||||
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
[r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
|
||||||
webpage, 'video id', default=None)
|
webpage, 'video id', default=None)
|
||||||
|
|
||||||
video_url = unescapeHTML(self._search_regex(
|
|
||||||
r'<source src="([^"]+)', webpage, 'video url'))
|
|
||||||
title = self._html_search_regex(
|
title = self._html_search_regex(
|
||||||
r'Title:</th><td>([^<]+)</td>', webpage, 'title')
|
(r'Title:</th><td>([^<]+)</td>', r'<h1[^>]*>(.+?)</h1>'),
|
||||||
thumbnail = self._search_regex(
|
webpage, 'title')
|
||||||
r'onclick="showVideoPlayer\(\)"><img src="([^"]+)',
|
|
||||||
webpage, 'thumbnail', fatal=False)
|
|
||||||
|
|
||||||
return {
|
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
|
||||||
|
|
||||||
|
return merge_dicts(info, {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'display_id': display_id,
|
'display_id': display_id,
|
||||||
'url': video_url,
|
|
||||||
'title': title,
|
'title': title,
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'age_limit': 18,
|
'age_limit': 18,
|
||||||
}
|
})
|
||||||
|
|
|
@ -17,6 +17,7 @@ from .academicearth import AcademicEarthCourseIE
|
||||||
from .acast import (
|
from .acast import (
|
||||||
ACastIE,
|
ACastIE,
|
||||||
ACastChannelIE,
|
ACastChannelIE,
|
||||||
|
ACastPlayerIE,
|
||||||
)
|
)
|
||||||
from .adn import ADNIE
|
from .adn import ADNIE
|
||||||
from .adobeconnect import AdobeConnectIE
|
from .adobeconnect import AdobeConnectIE
|
||||||
|
@ -82,6 +83,7 @@ from .arte import (
|
||||||
ArteTVEmbedIE,
|
ArteTVEmbedIE,
|
||||||
ArteTVPlaylistIE,
|
ArteTVPlaylistIE,
|
||||||
)
|
)
|
||||||
|
from .arnes import ArnesIE
|
||||||
from .asiancrush import (
|
from .asiancrush import (
|
||||||
AsianCrushIE,
|
AsianCrushIE,
|
||||||
AsianCrushPlaylistIE,
|
AsianCrushPlaylistIE,
|
||||||
|
@ -100,11 +102,13 @@ from .awaan import (
|
||||||
)
|
)
|
||||||
from .azmedien import AZMedienIE
|
from .azmedien import AZMedienIE
|
||||||
from .baidu import BaiduVideoIE
|
from .baidu import BaiduVideoIE
|
||||||
|
from .bandaichannel import BandaiChannelIE
|
||||||
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
|
||||||
from .bbc import (
|
from .bbc import (
|
||||||
BBCCoUkIE,
|
BBCCoUkIE,
|
||||||
BBCCoUkArticleIE,
|
BBCCoUkArticleIE,
|
||||||
BBCCoUkIPlayerPlaylistIE,
|
BBCCoUkIPlayerEpisodesIE,
|
||||||
|
BBCCoUkIPlayerGroupIE,
|
||||||
BBCCoUkPlaylistIE,
|
BBCCoUkPlaylistIE,
|
||||||
BBCIE,
|
BBCIE,
|
||||||
)
|
)
|
||||||
|
@ -139,7 +143,6 @@ from .bleacherreport import (
|
||||||
BleacherReportIE,
|
BleacherReportIE,
|
||||||
BleacherReportCMSIE,
|
BleacherReportCMSIE,
|
||||||
)
|
)
|
||||||
from .blinkx import BlinkxIE
|
|
||||||
from .bloomberg import BloombergIE
|
from .bloomberg import BloombergIE
|
||||||
from .bokecc import BokeCCIE
|
from .bokecc import BokeCCIE
|
||||||
from .bongacams import BongaCamsIE
|
from .bongacams import BongaCamsIE
|
||||||
|
@ -180,6 +183,7 @@ from .carambatv import (
|
||||||
CarambaTVPageIE,
|
CarambaTVPageIE,
|
||||||
)
|
)
|
||||||
from .cartoonnetwork import CartoonNetworkIE
|
from .cartoonnetwork import CartoonNetworkIE
|
||||||
|
from .castos import CastosHostedIE
|
||||||
from .cbc import (
|
from .cbc import (
|
||||||
CBCIE,
|
CBCIE,
|
||||||
CBCPlayerIE,
|
CBCPlayerIE,
|
||||||
|
@ -198,7 +202,11 @@ from .cbsnews import (
|
||||||
CBSNewsIE,
|
CBSNewsIE,
|
||||||
CBSNewsLiveVideoIE,
|
CBSNewsLiveVideoIE,
|
||||||
)
|
)
|
||||||
from .cbssports import CBSSportsIE
|
from .cbssports import (
|
||||||
|
CBSSportsEmbedIE,
|
||||||
|
CBSSportsIE,
|
||||||
|
TwentyFourSevenSportsIE,
|
||||||
|
)
|
||||||
from .ccc import (
|
from .ccc import (
|
||||||
CCCIE,
|
CCCIE,
|
||||||
CCCPlaylistIE,
|
CCCPlaylistIE,
|
||||||
|
@ -251,6 +259,7 @@ from .comedycentral import (
|
||||||
)
|
)
|
||||||
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
|
||||||
from .commonprotocols import (
|
from .commonprotocols import (
|
||||||
|
BitTorrentMagnetIE,
|
||||||
MmsIE,
|
MmsIE,
|
||||||
RtmpIE,
|
RtmpIE,
|
||||||
)
|
)
|
||||||
|
@ -327,6 +336,7 @@ from .dropbox import DropboxIE
|
||||||
from .dw import (
|
from .dw import (
|
||||||
DWIE,
|
DWIE,
|
||||||
DWArticleIE,
|
DWArticleIE,
|
||||||
|
DWVideoIE,
|
||||||
)
|
)
|
||||||
from .eagleplatform import EaglePlatformIE
|
from .eagleplatform import EaglePlatformIE
|
||||||
from .ebaumsworld import EbaumsWorldIE
|
from .ebaumsworld import EbaumsWorldIE
|
||||||
|
@ -620,7 +630,11 @@ from .limelight import (
|
||||||
LimelightChannelIE,
|
LimelightChannelIE,
|
||||||
LimelightChannelListIE,
|
LimelightChannelListIE,
|
||||||
)
|
)
|
||||||
from .line import LineTVIE
|
from .line import (
|
||||||
|
LineTVIE,
|
||||||
|
LineLiveIE,
|
||||||
|
LineLiveChannelIE,
|
||||||
|
)
|
||||||
from .linkedin import (
|
from .linkedin import (
|
||||||
LinkedInPostIE,
|
LinkedInPostIE,
|
||||||
LinkedInLearningIE,
|
LinkedInLearningIE,
|
||||||
|
@ -629,10 +643,6 @@ from .linkedin import (
|
||||||
from .linuxacademy import LinuxAcademyIE
|
from .linuxacademy import LinuxAcademyIE
|
||||||
from .litv import LiTVIE
|
from .litv import LiTVIE
|
||||||
from .livejournal import LiveJournalIE
|
from .livejournal import LiveJournalIE
|
||||||
from .liveleak import (
|
|
||||||
LiveLeakIE,
|
|
||||||
LiveLeakEmbedIE,
|
|
||||||
)
|
|
||||||
from .livestream import (
|
from .livestream import (
|
||||||
LivestreamIE,
|
LivestreamIE,
|
||||||
LivestreamOriginalIE,
|
LivestreamOriginalIE,
|
||||||
|
@ -648,6 +658,7 @@ from .lynda import (
|
||||||
LyndaCourseIE
|
LyndaCourseIE
|
||||||
)
|
)
|
||||||
from .m6 import M6IE
|
from .m6 import M6IE
|
||||||
|
from .magentamusik360 import MagentaMusik360IE
|
||||||
from .mailru import (
|
from .mailru import (
|
||||||
MailRuIE,
|
MailRuIE,
|
||||||
MailRuMusicIE,
|
MailRuMusicIE,
|
||||||
|
@ -659,6 +670,7 @@ from .mangomolo import (
|
||||||
MangomoloLiveIE,
|
MangomoloLiveIE,
|
||||||
)
|
)
|
||||||
from .manyvids import ManyVidsIE
|
from .manyvids import ManyVidsIE
|
||||||
|
from .maoritv import MaoriTVIE
|
||||||
from .markiza import (
|
from .markiza import (
|
||||||
MarkizaIE,
|
MarkizaIE,
|
||||||
MarkizaPageIE,
|
MarkizaPageIE,
|
||||||
|
@ -696,6 +708,7 @@ from .minds import (
|
||||||
from .ministrygrid import MinistryGridIE
|
from .ministrygrid import MinistryGridIE
|
||||||
from .minoto import MinotoIE
|
from .minoto import MinotoIE
|
||||||
from .miomio import MioMioIE
|
from .miomio import MioMioIE
|
||||||
|
from .misskey import MisskeySHIE
|
||||||
from .mit import TechTVMITIE, OCWMITIE
|
from .mit import TechTVMITIE, OCWMITIE
|
||||||
from .mitele import MiTeleIE
|
from .mitele import MiTeleIE
|
||||||
from .mixcloud import (
|
from .mixcloud import (
|
||||||
|
@ -703,7 +716,10 @@ from .mixcloud import (
|
||||||
MixcloudUserIE,
|
MixcloudUserIE,
|
||||||
MixcloudPlaylistIE,
|
MixcloudPlaylistIE,
|
||||||
)
|
)
|
||||||
from .mlb import MLBIE
|
from .mlb import (
|
||||||
|
MLBIE,
|
||||||
|
MLBVideoIE,
|
||||||
|
)
|
||||||
from .mnet import MnetIE
|
from .mnet import MnetIE
|
||||||
from .moevideo import MoeVideoIE
|
from .moevideo import MoeVideoIE
|
||||||
from .mofosex import (
|
from .mofosex import (
|
||||||
|
@ -811,7 +827,6 @@ from .nick import (
|
||||||
NickRuIE,
|
NickRuIE,
|
||||||
)
|
)
|
||||||
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
from .niconico import NiconicoIE, NiconicoPlaylistIE
|
||||||
from .ninateka import NinatekaIE
|
|
||||||
from .ninecninemedia import NineCNineMediaIE
|
from .ninecninemedia import NineCNineMediaIE
|
||||||
from .ninegag import NineGagIE
|
from .ninegag import NineGagIE
|
||||||
from .ninenow import NineNowIE
|
from .ninenow import NineNowIE
|
||||||
|
@ -908,9 +923,15 @@ from .packtpub import (
|
||||||
PacktPubIE,
|
PacktPubIE,
|
||||||
PacktPubCourseIE,
|
PacktPubCourseIE,
|
||||||
)
|
)
|
||||||
|
from .palcomp3 import (
|
||||||
|
PalcoMP3IE,
|
||||||
|
PalcoMP3ArtistIE,
|
||||||
|
PalcoMP3VideoIE,
|
||||||
|
)
|
||||||
from .pandoratv import PandoraTVIE
|
from .pandoratv import PandoraTVIE
|
||||||
from .parliamentliveuk import ParliamentLiveUKIE
|
from .parliamentliveuk import ParliamentLiveUKIE
|
||||||
from .patreon import PatreonIE
|
from .patreon import PatreonIE
|
||||||
|
from .patronite import PatroniteAudioIE
|
||||||
from .pbs import PBSIE
|
from .pbs import PBSIE
|
||||||
from .pearvideo import PearVideoIE
|
from .pearvideo import PearVideoIE
|
||||||
from .peertube import (
|
from .peertube import (
|
||||||
|
@ -946,6 +967,7 @@ from .platzi import (
|
||||||
from .playfm import PlayFMIE
|
from .playfm import PlayFMIE
|
||||||
from .playplustv import PlayPlusTVIE
|
from .playplustv import PlayPlusTVIE
|
||||||
from .plays import PlaysTVIE
|
from .plays import PlaysTVIE
|
||||||
|
from .playstuff import PlayStuffIE
|
||||||
from .playtvak import PlaytvakIE
|
from .playtvak import PlaytvakIE
|
||||||
from .playvid import PlayvidIE
|
from .playvid import PlayvidIE
|
||||||
from .playwire import PlaywireIE
|
from .playwire import PlaywireIE
|
||||||
|
@ -960,6 +982,9 @@ from .polskieradio import (
|
||||||
PolskieRadioIE,
|
PolskieRadioIE,
|
||||||
PolskieRadioCategoryIE,
|
PolskieRadioCategoryIE,
|
||||||
PolskieRadioPlayerIE,
|
PolskieRadioPlayerIE,
|
||||||
|
PolskieRadioPodcastIE,
|
||||||
|
PolskieRadioPodcastListIE,
|
||||||
|
PolskieRadioRadioKierowcowIE,
|
||||||
)
|
)
|
||||||
from .popcorntimes import PopcorntimesIE
|
from .popcorntimes import PopcorntimesIE
|
||||||
from .popcorntv import PopcornTVIE
|
from .popcorntv import PopcornTVIE
|
||||||
|
@ -1006,6 +1031,10 @@ from .radiode import RadioDeIE
|
||||||
from .radiojavan import RadioJavanIE
|
from .radiojavan import RadioJavanIE
|
||||||
from .radiobremen import RadioBremenIE
|
from .radiobremen import RadioBremenIE
|
||||||
from .radiofrance import RadioFranceIE
|
from .radiofrance import RadioFranceIE
|
||||||
|
from .radiokapital import (
|
||||||
|
RadioKapitalIE,
|
||||||
|
RadioKapitalShowIE,
|
||||||
|
)
|
||||||
from .rai import (
|
from .rai import (
|
||||||
RaiPlayIE,
|
RaiPlayIE,
|
||||||
RaiPlayLiveIE,
|
RaiPlayLiveIE,
|
||||||
|
@ -1096,7 +1125,12 @@ from .scte import (
|
||||||
SCTECourseIE,
|
SCTECourseIE,
|
||||||
)
|
)
|
||||||
from .seeker import SeekerIE
|
from .seeker import SeekerIE
|
||||||
|
from .sejmpl import (
|
||||||
|
SejmPlIE,
|
||||||
|
SejmPlVideoIE,
|
||||||
|
)
|
||||||
from .senateisvp import SenateISVPIE
|
from .senateisvp import SenateISVPIE
|
||||||
|
from .senatpl import SenatPlIE
|
||||||
from .sendtonews import SendtoNewsIE
|
from .sendtonews import SendtoNewsIE
|
||||||
from .servus import ServusIE
|
from .servus import ServusIE
|
||||||
from .sevenplus import SevenPlusIE
|
from .sevenplus import SevenPlusIE
|
||||||
|
@ -1196,6 +1230,10 @@ from .spreaker import (
|
||||||
)
|
)
|
||||||
from .springboardplatform import SpringboardPlatformIE
|
from .springboardplatform import SpringboardPlatformIE
|
||||||
from .sprout import SproutIE
|
from .sprout import SproutIE
|
||||||
|
from .spryciarze import (
|
||||||
|
SpryciarzeIE,
|
||||||
|
SpryciarzePageIE,
|
||||||
|
)
|
||||||
from .srgssr import (
|
from .srgssr import (
|
||||||
SRGSSRIE,
|
SRGSSRIE,
|
||||||
SRGSSRPlayIE,
|
SRGSSRPlayIE,
|
||||||
|
@ -1284,6 +1322,8 @@ from .threeqsdn import ThreeQSDNIE
|
||||||
from .tiktok import (
|
from .tiktok import (
|
||||||
TikTokIE,
|
TikTokIE,
|
||||||
TikTokUserIE,
|
TikTokUserIE,
|
||||||
|
TikTokHashtagIE,
|
||||||
|
TikTokMusicIE,
|
||||||
)
|
)
|
||||||
from .tinypic import TinyPicIE
|
from .tinypic import TinyPicIE
|
||||||
from .tmz import (
|
from .tmz import (
|
||||||
|
@ -1364,10 +1404,9 @@ from .tvc import (
|
||||||
from .tver import TVerIE
|
from .tver import TVerIE
|
||||||
from .tvigle import TvigleIE
|
from .tvigle import TvigleIE
|
||||||
from .tvland import TVLandIE
|
from .tvland import TVLandIE
|
||||||
from .tvn24 import TVN24IE
|
from .tvn24 import (
|
||||||
from .tvnplayer import (
|
TVN24IE,
|
||||||
TVNPlayerIE,
|
TVN24NuviIE,
|
||||||
TVNPlayerSeriesIE,
|
|
||||||
)
|
)
|
||||||
from .tvnet import TVNetIE
|
from .tvnet import TVNetIE
|
||||||
from .tvnoe import TVNoeIE
|
from .tvnoe import TVNoeIE
|
||||||
|
@ -1473,6 +1512,8 @@ from .videomore import (
|
||||||
VideomoreSeasonIE,
|
VideomoreSeasonIE,
|
||||||
)
|
)
|
||||||
from .videopress import VideoPressIE
|
from .videopress import VideoPressIE
|
||||||
|
from .videotarget import VideoTargetIE
|
||||||
|
from .vider import ViderIE
|
||||||
from .vidio import VidioIE
|
from .vidio import VidioIE
|
||||||
from .vidlii import VidLiiIE
|
from .vidlii import VidLiiIE
|
||||||
from .vidme import (
|
from .vidme import (
|
||||||
|
@ -1521,7 +1562,7 @@ from .vk import (
|
||||||
from .vlive import (
|
from .vlive import (
|
||||||
VLiveIE,
|
VLiveIE,
|
||||||
VLiveChannelIE,
|
VLiveChannelIE,
|
||||||
VLivePlaylistIE
|
VLivePostIE,
|
||||||
)
|
)
|
||||||
from .vodlocker import VodlockerIE
|
from .vodlocker import VodlockerIE
|
||||||
from .vodpl import VODPlIE
|
from .vodpl import VODPlIE
|
||||||
|
@ -1579,6 +1620,10 @@ from .weibo import (
|
||||||
from .weiqitv import WeiqiTVIE
|
from .weiqitv import WeiqiTVIE
|
||||||
from .wistia import WistiaIE
|
from .wistia import WistiaIE
|
||||||
from .worldstarhiphop import WorldStarHipHopIE
|
from .worldstarhiphop import WorldStarHipHopIE
|
||||||
|
from .wppilot import (
|
||||||
|
WPPilotIE,
|
||||||
|
WPPilotChannelsIE,
|
||||||
|
)
|
||||||
from .wppl import WpPlIE
|
from .wppl import WpPlIE
|
||||||
from .wsj import (
|
from .wsj import (
|
||||||
WSJIE,
|
WSJIE,
|
||||||
|
@ -1624,6 +1669,8 @@ from .yandexmusic import (
|
||||||
YandexMusicTrackIE,
|
YandexMusicTrackIE,
|
||||||
YandexMusicAlbumIE,
|
YandexMusicAlbumIE,
|
||||||
YandexMusicPlaylistIE,
|
YandexMusicPlaylistIE,
|
||||||
|
YandexMusicArtistTracksIE,
|
||||||
|
YandexMusicArtistAlbumsIE,
|
||||||
)
|
)
|
||||||
from .yandexvideo import YandexVideoIE
|
from .yandexvideo import YandexVideoIE
|
||||||
from .yapfiles import YapFilesIE
|
from .yapfiles import YapFilesIE
|
||||||
|
@ -1679,5 +1726,9 @@ from .zattoo import (
|
||||||
)
|
)
|
||||||
from .zdf import ZDFIE, ZDFChannelIE
|
from .zdf import ZDFIE, ZDFChannelIE
|
||||||
from .zhihu import ZhihuIE
|
from .zhihu import ZhihuIE
|
||||||
from .zingmp3 import ZingMp3IE
|
from .zingmp3 import (
|
||||||
|
ZingMp3IE,
|
||||||
|
ZingMp3AlbumIE,
|
||||||
|
)
|
||||||
|
from .zoom import ZoomIE
|
||||||
from .zype import ZypeIE
|
from .zype import ZypeIE
|
||||||
|
|
|
@ -521,7 +521,10 @@ class FacebookIE(InfoExtractor):
|
||||||
raise ExtractorError(
|
raise ExtractorError(
|
||||||
'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
'The video is not available, Facebook said: "%s"' % m_msg.group(1),
|
||||||
expected=True)
|
expected=True)
|
||||||
elif '>You must log in to continue' in webpage:
|
elif any(p in webpage for p in (
|
||||||
|
'>You must log in to continue',
|
||||||
|
'id="login_form"',
|
||||||
|
'id="loginbutton"')):
|
||||||
self.raise_login_required()
|
self.raise_login_required()
|
||||||
|
|
||||||
if not video_data and '/watchparty/' in url:
|
if not video_data and '/watchparty/' in url:
|
||||||
|
|
|
@ -5,29 +5,23 @@ from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
class Formula1IE(InfoExtractor):
|
class Formula1IE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?formula1\.com/(?:content/fom-website/)?en/video/\d{4}/\d{1,2}/(?P<id>.+?)\.html'
|
_VALID_URL = r'https?://(?:www\.)?formula1\.com/en/latest/video\.[^.]+\.(?P<id>\d+)\.html'
|
||||||
_TESTS = [{
|
_TEST = {
|
||||||
'url': 'http://www.formula1.com/content/fom-website/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
'url': 'https://www.formula1.com/en/latest/video.race-highlights-spain-2016.6060988138001.html',
|
||||||
'md5': '8c79e54be72078b26b89e0e111c0502b',
|
'md5': 'be7d3a8c2f804eb2ab2aa5d941c359f8',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'JvYXJpMzE6pArfHWm5ARp5AiUmD-gibV',
|
'id': '6060988138001',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Race highlights - Spain 2016',
|
'title': 'Race highlights - Spain 2016',
|
||||||
|
'timestamp': 1463332814,
|
||||||
|
'upload_date': '20160515',
|
||||||
|
'uploader_id': '6057949432001',
|
||||||
},
|
},
|
||||||
'params': {
|
'add_ie': ['BrightcoveNew'],
|
||||||
# m3u8 download
|
}
|
||||||
'skip_download': True,
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/6057949432001/S1WMrhjlh_default/index.html?videoId=%s'
|
||||||
},
|
|
||||||
'add_ie': ['Ooyala'],
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.formula1.com/en/video/2016/5/Race_highlights_-_Spain_2016.html',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
bc_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
|
||||||
ooyala_embed_code = self._search_regex(
|
|
||||||
r'data-videoid="([^"]+)"', webpage, 'ooyala embed code')
|
|
||||||
return self.url_result(
|
return self.url_result(
|
||||||
'ooyala:%s' % ooyala_embed_code, 'Ooyala', ooyala_embed_code)
|
self.BRIGHTCOVE_URL_TEMPLATE % bc_id, 'BrightcoveNew', bc_id)
|
||||||
|
|
|
@ -164,7 +164,7 @@ class FranceTVIE(InfoExtractor):
|
||||||
ext = determine_ext(video_url)
|
ext = determine_ext(video_url)
|
||||||
if ext == 'f4m':
|
if ext == 'f4m':
|
||||||
if georestricted:
|
if georestricted:
|
||||||
# See https://github.com/ytdl-org/haruhi-dl/issues/3963
|
# See https://github.com/ytdl-org/youtube-dl/issues/3963
|
||||||
# m3u8 urls work fine
|
# m3u8 urls work fine
|
||||||
continue
|
continue
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
|
@ -383,6 +383,10 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
|
'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# "<figure id=" pattern (#28792)
|
||||||
|
'url': 'https://www.francetvinfo.fr/culture/patrimoine/incendie-de-notre-dame-de-paris/notre-dame-de-paris-de-l-incendie-de-la-cathedrale-a-sa-reconstruction_4372291.html',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -399,7 +403,8 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor):
|
||||||
video_id = self._search_regex(
|
video_id = self._search_regex(
|
||||||
(r'player\.load[^;]+src:\s*["\']([^"\']+)',
|
(r'player\.load[^;]+src:\s*["\']([^"\']+)',
|
||||||
r'id-video=([^@]+@[^"]+)',
|
r'id-video=([^@]+@[^"]+)',
|
||||||
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
|
r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
|
||||||
|
r'(?:data-id|<figure[^<]+\bid)=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'),
|
||||||
webpage, 'video id')
|
webpage, 'video id')
|
||||||
|
|
||||||
return self._make_url_result(video_id)
|
return self._make_url_result(video_id)
|
||||||
|
|
|
@ -17,7 +17,7 @@ class FujiTVFODPlus7IE(InfoExtractor):
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
formats = self._extract_m3u8_formats(
|
formats = self._extract_m3u8_formats(
|
||||||
self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id)
|
self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id, 'mp4')
|
||||||
for f in formats:
|
for f in formats:
|
||||||
wh = self._BITRATE_MAP.get(f.get('tbr'))
|
wh = self._BITRATE_MAP.get(f.get('tbr'))
|
||||||
if wh:
|
if wh:
|
||||||
|
|
|
@ -16,7 +16,7 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class FunimationIE(InfoExtractor):
|
class FunimationIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/shows/[^/]+/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'https?://(?:www\.)?funimation(?:\.com|now\.uk)/(?:[^/]+/)?shows/[^/]+/(?P<id>[^/?#&]+)'
|
||||||
|
|
||||||
_NETRC_MACHINE = 'funimation'
|
_NETRC_MACHINE = 'funimation'
|
||||||
_TOKEN = None
|
_TOKEN = None
|
||||||
|
@ -51,6 +51,10 @@ class FunimationIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
|
'url': 'https://www.funimationnow.uk/shows/puzzle-dragons-x/drop-impact/simulcast/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# with lang code
|
||||||
|
'url': 'https://www.funimation.com/en/shows/hacksign/role-play/',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _login(self):
|
def _login(self):
|
||||||
|
|
|
@ -6,6 +6,7 @@ from .common import InfoExtractor
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
HEADRequest,
|
HEADRequest,
|
||||||
|
remove_start,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
smuggle_url,
|
smuggle_url,
|
||||||
urlencode_postdata,
|
urlencode_postdata,
|
||||||
|
@ -102,6 +103,26 @@ class GDCVaultIE(InfoExtractor):
|
||||||
'format': 'mp4-408',
|
'format': 'mp4-408',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Kaltura embed, whitespace between quote and embedded URL in iframe's src
|
||||||
|
'url': 'https://www.gdcvault.com/play/1025699',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '0_zagynv0a',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Tech Toolbox',
|
||||||
|
'upload_date': '20190408',
|
||||||
|
'uploader_id': 'joe@blazestreaming.com',
|
||||||
|
'timestamp': 1554764629,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': True,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# HTML5 video
|
||||||
|
'url': 'http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def _login(self, webpage_url, display_id):
|
def _login(self, webpage_url, display_id):
|
||||||
|
@ -175,7 +196,18 @@ class GDCVaultIE(InfoExtractor):
|
||||||
|
|
||||||
xml_name = self._html_search_regex(
|
xml_name = self._html_search_regex(
|
||||||
r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>',
|
r'<iframe src=".*?\?xml(?:=|URL=xml/)(.+?\.xml).*?".*?</iframe>',
|
||||||
start_page, 'xml filename')
|
start_page, 'xml filename', default=None)
|
||||||
|
if not xml_name:
|
||||||
|
info = self._parse_html5_media_entries(url, start_page, video_id)[0]
|
||||||
|
info.update({
|
||||||
|
'title': remove_start(self._search_regex(
|
||||||
|
r'>Session Name:\s*<.*?>\s*<td>(.+?)</td>', start_page,
|
||||||
|
'title', default=None) or self._og_search_title(
|
||||||
|
start_page, default=None), 'GDC Vault - '),
|
||||||
|
'id': video_id,
|
||||||
|
'display_id': display_id,
|
||||||
|
})
|
||||||
|
return info
|
||||||
embed_url = '%s/xml/%s' % (xml_root, xml_name)
|
embed_url = '%s/xml/%s' % (xml_root, xml_name)
|
||||||
ie_key = 'DigitallySpeaking'
|
ie_key = 'DigitallySpeaking'
|
||||||
|
|
||||||
|
|
|
@ -84,7 +84,6 @@ from .jwplatform import JWPlatformIE
|
||||||
from .digiteka import DigitekaIE
|
from .digiteka import DigitekaIE
|
||||||
from .arkena import ArkenaIE
|
from .arkena import ArkenaIE
|
||||||
from .instagram import InstagramIE
|
from .instagram import InstagramIE
|
||||||
from .liveleak import LiveLeakIE
|
|
||||||
from .threeqsdn import ThreeQSDNIE
|
from .threeqsdn import ThreeQSDNIE
|
||||||
from .theplatform import ThePlatformIE
|
from .theplatform import ThePlatformIE
|
||||||
from .kaltura import KalturaIE
|
from .kaltura import KalturaIE
|
||||||
|
@ -136,6 +135,12 @@ from .pulsembed import PulsEmbedIE
|
||||||
from .arcpublishing import ArcPublishingIE
|
from .arcpublishing import ArcPublishingIE
|
||||||
from .medialaan import MedialaanIE
|
from .medialaan import MedialaanIE
|
||||||
from .simplecast import SimplecastIE
|
from .simplecast import SimplecastIE
|
||||||
|
from .spreaker import SpreakerIE
|
||||||
|
from .castos import (
|
||||||
|
CastosHostedIE,
|
||||||
|
CastosSSPIE,
|
||||||
|
)
|
||||||
|
from .vk import VKIE
|
||||||
|
|
||||||
|
|
||||||
class GenericIE(InfoExtractor):
|
class GenericIE(InfoExtractor):
|
||||||
|
@ -487,7 +492,7 @@ class GenericIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/2253
|
# https://github.com/ytdl-org/youtube-dl/issues/2253
|
||||||
'url': 'http://bcove.me/i6nfkrc3',
|
'url': 'http://bcove.me/i6nfkrc3',
|
||||||
'md5': '0ba9446db037002366bab3b3eb30c88c',
|
'md5': '0ba9446db037002366bab3b3eb30c88c',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -512,7 +517,7 @@ class GenericIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/3541
|
# https://github.com/ytdl-org/youtube-dl/issues/3541
|
||||||
'add_ie': ['BrightcoveLegacy'],
|
'add_ie': ['BrightcoveLegacy'],
|
||||||
'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
|
'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -976,7 +981,7 @@ class GenericIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
# Multiple brightcove videos
|
# Multiple brightcove videos
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/2283
|
# https://github.com/ytdl-org/youtube-dl/issues/2283
|
||||||
{
|
{
|
||||||
'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
|
'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -1634,34 +1639,6 @@ class GenericIE(InfoExtractor):
|
||||||
'upload_date': '20160409',
|
'upload_date': '20160409',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
# LiveLeak embed
|
|
||||||
{
|
|
||||||
'url': 'http://www.wykop.pl/link/3088787/',
|
|
||||||
'md5': '7619da8c820e835bef21a1efa2a0fc71',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '874_1459135191',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Man shows poor quality of new apartment building',
|
|
||||||
'description': 'The wall is like a sand pile.',
|
|
||||||
'uploader': 'Lake8737',
|
|
||||||
},
|
|
||||||
'add_ie': [LiveLeakIE.ie_key()],
|
|
||||||
'params': {
|
|
||||||
'force_generic_extractor': True,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
# Another LiveLeak embed pattern (#13336)
|
|
||||||
{
|
|
||||||
'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '2eb_1496309988',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Thief robs place where everyone was armed',
|
|
||||||
'description': 'md5:694d73ee79e535953cf2488562288eee',
|
|
||||||
'uploader': 'brazilwtf',
|
|
||||||
},
|
|
||||||
'add_ie': [LiveLeakIE.ie_key()],
|
|
||||||
},
|
|
||||||
# Duplicated embedded video URLs
|
# Duplicated embedded video URLs
|
||||||
{
|
{
|
||||||
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
|
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
|
||||||
|
@ -2212,12 +2189,12 @@ class GenericIE(InfoExtractor):
|
||||||
# OnNetwork.tv embed
|
# OnNetwork.tv embed
|
||||||
'url': 'https://wiadomosci.gazeta.pl/wiadomosci/7,114883,26377890,panstwo-polskie-nie-uznaje-takich-rodzin-jak-nasza-i-krzywdzi.html',
|
'url': 'https://wiadomosci.gazeta.pl/wiadomosci/7,114883,26377890,panstwo-polskie-nie-uznaje-takich-rodzin-jak-nasza-i-krzywdzi.html',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '337382',
|
'id': '7,114883,26377890,panstwo-polskie-nie-uznaje-takich-rodzin-jak-nasza-i-krzywdzi',
|
||||||
'title': 'Rodzina+ odc. 1. Karolina i Ania',
|
'title': '"Państwo polskie nie uznaje takich rodzin jak nasza i krzywdzi w ten sposób dzieci" [RODZINA+]',
|
||||||
'ext': 'm3u8',
|
'uploader': 'wiadomosci.gazeta.pl',
|
||||||
'age_limit': 16,
|
|
||||||
'upload_date': '20200929',
|
|
||||||
},
|
},
|
||||||
|
# 1x onnetwork:script, which resolves to onnetwork:frame
|
||||||
|
'playlist_count': 1,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# Embetty video embeds (youtube, vimeo, facebook)
|
# Embetty video embeds (youtube, vimeo, facebook)
|
||||||
|
@ -2301,6 +2278,43 @@ class GenericIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
'playlist_mincount': 52,
|
'playlist_mincount': 52,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
# Spreaker embed
|
||||||
|
'url': 'https://socjalizm.fm/jak-bedzie-w-socjalizmie/praca/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '44098221',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Jak będzie w socjalizmie? Praca.',
|
||||||
|
'uploader': 'Socjalizm FM',
|
||||||
|
'description': 'md5:d2833c41296a996153353890c329e1af',
|
||||||
|
'upload_date': '20210329',
|
||||||
|
'uploader_id': '13705223',
|
||||||
|
'timestamp': 1617024666,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# Castos (hosted) player
|
||||||
|
'url': 'https://castos.com/enhanced-podcast-player/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '210448',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '4 Ways To Create A Video Podcast (And Why You Should Try It)',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# Castos Super Simple Podcasting (WordPress plugin, selfhosted)
|
||||||
|
'url': 'https://pzbn.pl/4-heated-terf-moment/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '38',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': '#4: Heated TERF moment',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
# Sibnet embed (https://help.sibnet.ru/?sibnet_video_embed)
|
||||||
|
'url': 'https://phpbb3.x-tk.ru/bbcode-video-sibnet-t24.html',
|
||||||
|
'only_matching': True,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_following_redirect(self, new_url):
|
def report_following_redirect(self, new_url):
|
||||||
|
@ -2482,17 +2496,20 @@ class GenericIE(InfoExtractor):
|
||||||
|
|
||||||
# Check for direct link to a video
|
# Check for direct link to a video
|
||||||
content_type = head_response.headers.get('Content-Type', '').lower()
|
content_type = head_response.headers.get('Content-Type', '').lower()
|
||||||
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
|
m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.(?:apple|mpeg\.dash)\.|x-)?(?:mpegurl|mpd|bittorrent))))/(?P<format_id>[^;\s]+)', content_type)
|
||||||
if m:
|
if m:
|
||||||
format_id = compat_str(m.group('format_id'))
|
format_id = compat_str(m.group('format_id'))
|
||||||
if format_id.endswith('mpegurl'):
|
if format_id.endswith('mpegurl'):
|
||||||
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
formats = self._extract_m3u8_formats(url, video_id, 'mp4')
|
||||||
elif format_id == 'f4m':
|
elif format_id == 'f4m':
|
||||||
formats = self._extract_f4m_formats(url, video_id)
|
formats = self._extract_f4m_formats(url, video_id)
|
||||||
|
elif format_id.endswith('mpd'):
|
||||||
|
formats = self._extract_mpd_formats(url, video_id)
|
||||||
else:
|
else:
|
||||||
formats = [{
|
formats = [{
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
'url': url,
|
'url': url,
|
||||||
|
'protocol': 'bittorrent' if format_id.endswith('bittorrent') else None,
|
||||||
'vcodec': 'none' if m.group('type') == 'audio' else None
|
'vcodec': 'none' if m.group('type') == 'audio' else None
|
||||||
}]
|
}]
|
||||||
info_dict['direct'] = True
|
info_dict['direct'] = True
|
||||||
|
@ -2526,6 +2543,20 @@ class GenericIE(InfoExtractor):
|
||||||
self._sort_formats(info_dict['formats'])
|
self._sort_formats(info_dict['formats'])
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
|
# Is it a BitTorrent manifest file?
|
||||||
|
if any(first_bytes.startswith(byt) for byt in (
|
||||||
|
b'd8:announce',
|
||||||
|
b'd13:announce-list',
|
||||||
|
b'd7:comment',
|
||||||
|
b'd4:info',
|
||||||
|
)):
|
||||||
|
info_dict['formats'] = [{
|
||||||
|
'url': url,
|
||||||
|
'protocol': 'bittorrent',
|
||||||
|
}]
|
||||||
|
# info_dict['direct'] = True
|
||||||
|
return info_dict
|
||||||
|
|
||||||
# Maybe it's a direct link to a video?
|
# Maybe it's a direct link to a video?
|
||||||
# Be careful not to download the whole thing!
|
# Be careful not to download the whole thing!
|
||||||
if not is_html(first_bytes):
|
if not is_html(first_bytes):
|
||||||
|
@ -2593,12 +2624,12 @@ class GenericIE(InfoExtractor):
|
||||||
return camtasia_res
|
return camtasia_res
|
||||||
|
|
||||||
# Sometimes embedded video player is hidden behind percent encoding
|
# Sometimes embedded video player is hidden behind percent encoding
|
||||||
# (e.g. https://github.com/ytdl-org/haruhi-dl/issues/2448)
|
# (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
|
||||||
# Unescaping the whole page allows to handle those cases in a generic way
|
# Unescaping the whole page allows to handle those cases in a generic way
|
||||||
webpage = compat_urllib_parse_unquote(webpage)
|
webpage = compat_urllib_parse_unquote(webpage)
|
||||||
|
|
||||||
# Unescape squarespace embeds to be detected by generic extractor,
|
# Unescape squarespace embeds to be detected by generic extractor,
|
||||||
# see https://github.com/ytdl-org/haruhi-dl/issues/21294
|
# see https://github.com/ytdl-org/youtube-dl/issues/21294
|
||||||
webpage = re.sub(
|
webpage = re.sub(
|
||||||
r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
|
r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
|
||||||
lambda x: unescapeHTML(x.group(0)), webpage)
|
lambda x: unescapeHTML(x.group(0)), webpage)
|
||||||
|
@ -2684,7 +2715,6 @@ class GenericIE(InfoExtractor):
|
||||||
SoundcloudEmbedIE,
|
SoundcloudEmbedIE,
|
||||||
TuneInBaseIE,
|
TuneInBaseIE,
|
||||||
JWPlatformIE,
|
JWPlatformIE,
|
||||||
LiveLeakIE,
|
|
||||||
DBTVIE,
|
DBTVIE,
|
||||||
VideaIE,
|
VideaIE,
|
||||||
TwentyMinutenIE,
|
TwentyMinutenIE,
|
||||||
|
@ -2722,6 +2752,8 @@ class GenericIE(InfoExtractor):
|
||||||
ArcPublishingIE,
|
ArcPublishingIE,
|
||||||
MedialaanIE,
|
MedialaanIE,
|
||||||
SimplecastIE,
|
SimplecastIE,
|
||||||
|
SpreakerIE,
|
||||||
|
CastosHostedIE,
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
ie_key = embie.ie_key()
|
ie_key = embie.ie_key()
|
||||||
|
@ -2933,7 +2965,7 @@ class GenericIE(InfoExtractor):
|
||||||
webpage)
|
webpage)
|
||||||
if not mobj:
|
if not mobj:
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
|
r'data-video-link=["\'](?P<url>http://m\.mlb\.com/video/[^"\']+)',
|
||||||
webpage)
|
webpage)
|
||||||
if mobj is not None:
|
if mobj is not None:
|
||||||
return self.url_result(mobj.group('url'), 'MLB')
|
return self.url_result(mobj.group('url'), 'MLB')
|
||||||
|
@ -3184,6 +3216,15 @@ class GenericIE(InfoExtractor):
|
||||||
if pulsembed_entries:
|
if pulsembed_entries:
|
||||||
return self.playlist_result(pulsembed_entries, video_id, video_title)
|
return self.playlist_result(pulsembed_entries, video_id, video_title)
|
||||||
|
|
||||||
|
castos_ssp_entries = CastosSSPIE._extract_entries(webpage)
|
||||||
|
if castos_ssp_entries:
|
||||||
|
return self.playlist_result(castos_ssp_entries, video_id, video_title)
|
||||||
|
|
||||||
|
# Look for sibnet embedded player
|
||||||
|
sibnet_urls = VKIE._extract_sibnet_urls(webpage)
|
||||||
|
if sibnet_urls:
|
||||||
|
return self.playlist_from_matches(sibnet_urls, video_id, video_title)
|
||||||
|
|
||||||
# Look for HTML5 media
|
# Look for HTML5 media
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
|
||||||
if entries:
|
if entries:
|
||||||
|
@ -3210,7 +3251,7 @@ class GenericIE(InfoExtractor):
|
||||||
jwplayer_data, video_id, require_title=False, base_url=url)
|
jwplayer_data, video_id, require_title=False, base_url=url)
|
||||||
return merge_dicts(info, info_dict)
|
return merge_dicts(info, info_dict)
|
||||||
except ExtractorError:
|
except ExtractorError:
|
||||||
# See https://github.com/ytdl-org/haruhi-dl/pull/16735
|
# See https://github.com/ytdl-org/youtube-dl/pull/16735
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Video.js embed
|
# Video.js embed
|
||||||
|
@ -3247,6 +3288,9 @@ class GenericIE(InfoExtractor):
|
||||||
'url': src,
|
'url': src,
|
||||||
'ext': (mimetype2ext(src_type)
|
'ext': (mimetype2ext(src_type)
|
||||||
or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
|
or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
|
||||||
|
'http_headers': {
|
||||||
|
'Referer': full_response.geturl(),
|
||||||
|
},
|
||||||
})
|
})
|
||||||
if formats:
|
if formats:
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
@ -3315,7 +3359,7 @@ class GenericIE(InfoExtractor):
|
||||||
m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
|
m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
|
||||||
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
# We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
|
||||||
if m_video_type is not None:
|
if m_video_type is not None:
|
||||||
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
|
found = filter_video(re.findall(r'<meta.*?property="og:(?:video|audio)".*?content="(.*?)"', webpage))
|
||||||
if not found:
|
if not found:
|
||||||
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
|
||||||
found = re.search(
|
found = re.search(
|
||||||
|
|
|
@ -155,7 +155,7 @@ class GoIE(AdobePassIE):
|
||||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||||
r'data-video-id=["\']*(VDKA\w+)',
|
r'data-video-id=["\']*(VDKA\w+)',
|
||||||
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
|
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
|
||||||
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
|
r'\bvideoIdCode["\']\s*:\s*["\'](vdka\w+)'
|
||||||
), webpage, 'video id', default=video_id)
|
), webpage, 'video id', default=video_id)
|
||||||
if not site_info:
|
if not site_info:
|
||||||
brand = self._search_regex(
|
brand = self._search_regex(
|
||||||
|
|
|
@ -36,7 +36,7 @@ class GoogleDriveIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# video can't be watched anonymously due to view count limit reached,
|
# video can't be watched anonymously due to view count limit reached,
|
||||||
# but can be downloaded (see https://github.com/ytdl-org/haruhi-dl/issues/14046)
|
# but can be downloaded (see https://github.com/ytdl-org/youtube-dl/issues/14046)
|
||||||
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
|
'url': 'https://drive.google.com/file/d/0B-vUyvmDLdWDcEt4WjBqcmI2XzQ/view',
|
||||||
'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
|
'md5': 'bfbd670d03a470bb1e6d4a257adec12e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
|
|
@ -12,6 +12,7 @@ from ..compat import (
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
|
float_or_none,
|
||||||
get_element_by_attribute,
|
get_element_by_attribute,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
lowercase_escape,
|
lowercase_escape,
|
||||||
|
@ -32,6 +33,7 @@ class InstagramIE(InfoExtractor):
|
||||||
'title': 'Video by naomipq',
|
'title': 'Video by naomipq',
|
||||||
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
'description': 'md5:1f17f0ab29bd6fe2bfad705f58de3cb8',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'duration': 0,
|
||||||
'timestamp': 1371748545,
|
'timestamp': 1371748545,
|
||||||
'upload_date': '20130620',
|
'upload_date': '20130620',
|
||||||
'uploader_id': 'naomipq',
|
'uploader_id': 'naomipq',
|
||||||
|
@ -48,6 +50,7 @@ class InstagramIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Video by britneyspears',
|
'title': 'Video by britneyspears',
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'duration': 0,
|
||||||
'timestamp': 1453760977,
|
'timestamp': 1453760977,
|
||||||
'upload_date': '20160125',
|
'upload_date': '20160125',
|
||||||
'uploader_id': 'britneyspears',
|
'uploader_id': 'britneyspears',
|
||||||
|
@ -86,6 +89,24 @@ class InstagramIE(InfoExtractor):
|
||||||
'title': 'Post by instagram',
|
'title': 'Post by instagram',
|
||||||
'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
|
'description': 'md5:0f9203fc6a2ce4d228da5754bcf54957',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# IGTV
|
||||||
|
'url': 'https://www.instagram.com/tv/BkfuX9UB-eK/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'BkfuX9UB-eK',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Fingerboarding Tricks with @cass.fb',
|
||||||
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
|
'duration': 53.83,
|
||||||
|
'timestamp': 1530032919,
|
||||||
|
'upload_date': '20180626',
|
||||||
|
'uploader_id': 'instagram',
|
||||||
|
'uploader': 'Instagram',
|
||||||
|
'like_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'comments': list,
|
||||||
|
'description': 'Meet Cass Hirst (@cass.fb), a fingerboarding pro who can perform tiny ollies and kickflips while blindfolded.',
|
||||||
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
'url': 'https://instagram.com/p/-Cmh1cukG2/',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -141,7 +162,7 @@ class InstagramIE(InfoExtractor):
|
||||||
lambda x: x['entry_data']['PostPage'][0]['media']),
|
lambda x: x['entry_data']['PostPage'][0]['media']),
|
||||||
dict)
|
dict)
|
||||||
# _sharedData.entry_data.PostPage is empty when authenticated (see
|
# _sharedData.entry_data.PostPage is empty when authenticated (see
|
||||||
# https://github.com/hdl-org/haruhi-dl/pull/22880)
|
# https://github.com/ytdl-org/youtube-dl/pull/22880)
|
||||||
if not media:
|
if not media:
|
||||||
additional_data = self._parse_json(
|
additional_data = self._parse_json(
|
||||||
self._search_regex(
|
self._search_regex(
|
||||||
|
@ -159,7 +180,9 @@ class InstagramIE(InfoExtractor):
|
||||||
description = try_get(
|
description = try_get(
|
||||||
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
|
||||||
compat_str) or media.get('caption')
|
compat_str) or media.get('caption')
|
||||||
|
title = media.get('title')
|
||||||
thumbnail = media.get('display_src') or media.get('display_url')
|
thumbnail = media.get('display_src') or media.get('display_url')
|
||||||
|
duration = float_or_none(media.get('video_duration'))
|
||||||
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
|
||||||
uploader = media.get('owner', {}).get('full_name')
|
uploader = media.get('owner', {}).get('full_name')
|
||||||
uploader_id = media.get('owner', {}).get('username')
|
uploader_id = media.get('owner', {}).get('username')
|
||||||
|
@ -200,9 +223,10 @@ class InstagramIE(InfoExtractor):
|
||||||
continue
|
continue
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': node.get('shortcode') or node['id'],
|
'id': node.get('shortcode') or node['id'],
|
||||||
'title': 'Video %d' % edge_num,
|
'title': node.get('title') or 'Video %d' % edge_num,
|
||||||
'url': node_video_url,
|
'url': node_video_url,
|
||||||
'thumbnail': node.get('display_url'),
|
'thumbnail': node.get('display_url'),
|
||||||
|
'duration': float_or_none(node.get('video_duration')),
|
||||||
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
|
'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
|
||||||
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
|
'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
|
||||||
'view_count': int_or_none(node.get('video_view_count')),
|
'view_count': int_or_none(node.get('video_view_count')),
|
||||||
|
@ -239,8 +263,9 @@ class InstagramIE(InfoExtractor):
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Video by %s' % uploader_id,
|
'title': title or 'Video by %s' % uploader_id,
|
||||||
'description': description,
|
'description': description,
|
||||||
|
'duration': duration,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
|
|
|
@ -8,6 +8,7 @@ from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -79,7 +80,11 @@ class IplaIE(InfoExtractor):
|
||||||
'Content-type': 'application/json'
|
'Content-type': 'application/json'
|
||||||
}
|
}
|
||||||
|
|
||||||
res = self._download_json('http://b2c-mobile.redefine.pl/rpc/navigation/', media_id, data=req, headers=headers)
|
res = self._download_json('https://b2c-mobile.redefine.pl/rpc/navigation/', media_id, data=req, headers=headers)
|
||||||
|
if not res.get('result'):
|
||||||
|
if res['error']['code'] == 13404:
|
||||||
|
raise ExtractorError('Video requires DRM protection', expected=True)
|
||||||
|
raise ExtractorError(f"Ipla said: {res['error']['message']} - {res['error']['data']['userMessage']}")
|
||||||
return res['result']['mediaItem']
|
return res['result']['mediaItem']
|
||||||
|
|
||||||
def get_url(self, media_id, source_id):
|
def get_url(self, media_id, source_id):
|
||||||
|
@ -93,4 +98,6 @@ class IplaIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
|
|
||||||
res = self._download_json('https://b2c-mobile.redefine.pl/rpc/drm/', media_id, data=req, headers=headers)
|
res = self._download_json('https://b2c-mobile.redefine.pl/rpc/drm/', media_id, data=req, headers=headers)
|
||||||
|
if not res.get('result'):
|
||||||
|
raise ExtractorError(f"Ipla said: {res['error']['message']} - {res['error']['data']['userMessage']}")
|
||||||
return res['result']['url']
|
return res['result']['url']
|
||||||
|
|
|
@ -29,34 +29,51 @@ class JamendoIE(InfoExtractor):
|
||||||
'id': '196219',
|
'id': '196219',
|
||||||
'display_id': 'stories-from-emona-i',
|
'display_id': 'stories-from-emona-i',
|
||||||
'ext': 'flac',
|
'ext': 'flac',
|
||||||
'title': 'Maya Filipič - Stories from Emona I',
|
# 'title': 'Maya Filipič - Stories from Emona I',
|
||||||
'artist': 'Maya Filipič',
|
'title': 'Stories from Emona I',
|
||||||
|
# 'artist': 'Maya Filipič',
|
||||||
'track': 'Stories from Emona I',
|
'track': 'Stories from Emona I',
|
||||||
'duration': 210,
|
'duration': 210,
|
||||||
'thumbnail': r're:^https?://.*\.jpg',
|
'thumbnail': r're:^https?://.*\.jpg',
|
||||||
'timestamp': 1217438117,
|
'timestamp': 1217438117,
|
||||||
'upload_date': '20080730',
|
'upload_date': '20080730',
|
||||||
|
'license': 'by-nc-nd',
|
||||||
|
'view_count': int,
|
||||||
|
'like_count': int,
|
||||||
|
'average_rating': int,
|
||||||
|
'tags': ['piano', 'peaceful', 'newage', 'strings', 'upbeat'],
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
|
'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _call_api(self, resource, resource_id):
|
||||||
|
path = '/api/%ss' % resource
|
||||||
|
rand = compat_str(random.random())
|
||||||
|
return self._download_json(
|
||||||
|
'https://www.jamendo.com' + path, resource_id, query={
|
||||||
|
'id[]': resource_id,
|
||||||
|
}, headers={
|
||||||
|
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
|
||||||
|
})[0]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
track_id, display_id = self._VALID_URL_RE.match(url).groups()
|
track_id, display_id = self._VALID_URL_RE.match(url).groups()
|
||||||
webpage = self._download_webpage(
|
# webpage = self._download_webpage(
|
||||||
'https://www.jamendo.com/track/' + track_id, track_id)
|
# 'https://www.jamendo.com/track/' + track_id, track_id)
|
||||||
models = self._parse_json(self._html_search_regex(
|
# models = self._parse_json(self._html_search_regex(
|
||||||
r"data-bundled-models='([^']+)",
|
# r"data-bundled-models='([^']+)",
|
||||||
webpage, 'bundled models'), track_id)
|
# webpage, 'bundled models'), track_id)
|
||||||
track = models['track']['models'][0]
|
# track = models['track']['models'][0]
|
||||||
|
track = self._call_api('track', track_id)
|
||||||
title = track_name = track['name']
|
title = track_name = track['name']
|
||||||
get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
|
# get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
|
||||||
artist = get_model('artist')
|
# artist = get_model('artist')
|
||||||
artist_name = artist.get('name')
|
# artist_name = artist.get('name')
|
||||||
if artist_name:
|
# if artist_name:
|
||||||
title = '%s - %s' % (artist_name, title)
|
# title = '%s - %s' % (artist_name, title)
|
||||||
album = get_model('album')
|
# album = get_model('album')
|
||||||
|
|
||||||
formats = [{
|
formats = [{
|
||||||
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
|
'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
|
||||||
|
@ -74,7 +91,7 @@ class JamendoIE(InfoExtractor):
|
||||||
|
|
||||||
urls = []
|
urls = []
|
||||||
thumbnails = []
|
thumbnails = []
|
||||||
for _, covers in track.get('cover', {}).items():
|
for covers in (track.get('cover') or {}).values():
|
||||||
for cover_id, cover_url in covers.items():
|
for cover_id, cover_url in covers.items():
|
||||||
if not cover_url or cover_url in urls:
|
if not cover_url or cover_url in urls:
|
||||||
continue
|
continue
|
||||||
|
@ -88,13 +105,14 @@ class JamendoIE(InfoExtractor):
|
||||||
})
|
})
|
||||||
|
|
||||||
tags = []
|
tags = []
|
||||||
for tag in track.get('tags', []):
|
for tag in (track.get('tags') or []):
|
||||||
tag_name = tag.get('name')
|
tag_name = tag.get('name')
|
||||||
if not tag_name:
|
if not tag_name:
|
||||||
continue
|
continue
|
||||||
tags.append(tag_name)
|
tags.append(tag_name)
|
||||||
|
|
||||||
stats = track.get('stats') or {}
|
stats = track.get('stats') or {}
|
||||||
|
license = track.get('licenseCC') or []
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': track_id,
|
'id': track_id,
|
||||||
|
@ -103,11 +121,11 @@ class JamendoIE(InfoExtractor):
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': track.get('description'),
|
'description': track.get('description'),
|
||||||
'duration': int_or_none(track.get('duration')),
|
'duration': int_or_none(track.get('duration')),
|
||||||
'artist': artist_name,
|
# 'artist': artist_name,
|
||||||
'track': track_name,
|
'track': track_name,
|
||||||
'album': album.get('name'),
|
# 'album': album.get('name'),
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'license': '-'.join(track.get('licenseCC', [])) or None,
|
'license': '-'.join(license) if license else None,
|
||||||
'timestamp': int_or_none(track.get('dateCreated')),
|
'timestamp': int_or_none(track.get('dateCreated')),
|
||||||
'view_count': int_or_none(stats.get('listenedAll')),
|
'view_count': int_or_none(stats.get('listenedAll')),
|
||||||
'like_count': int_or_none(stats.get('favorited')),
|
'like_count': int_or_none(stats.get('favorited')),
|
||||||
|
@ -116,9 +134,9 @@ class JamendoIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class JamendoAlbumIE(InfoExtractor):
|
class JamendoAlbumIE(JamendoIE):
|
||||||
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
|
||||||
_TEST = {
|
_TESTS = [{
|
||||||
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
|
'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '121486',
|
'id': '121486',
|
||||||
|
@ -151,17 +169,7 @@ class JamendoAlbumIE(InfoExtractor):
|
||||||
'params': {
|
'params': {
|
||||||
'playlistend': 2
|
'playlistend': 2
|
||||||
}
|
}
|
||||||
}
|
}]
|
||||||
|
|
||||||
def _call_api(self, resource, resource_id):
|
|
||||||
path = '/api/%ss' % resource
|
|
||||||
rand = compat_str(random.random())
|
|
||||||
return self._download_json(
|
|
||||||
'https://www.jamendo.com' + path, resource_id, query={
|
|
||||||
'id[]': resource_id,
|
|
||||||
}, headers={
|
|
||||||
'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
|
|
||||||
})[0]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
album_id = self._match_id(url)
|
album_id = self._match_id(url)
|
||||||
|
@ -169,7 +177,7 @@ class JamendoAlbumIE(InfoExtractor):
|
||||||
album_name = album.get('name')
|
album_name = album.get('name')
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for track in album.get('tracks', []):
|
for track in (album.get('tracks') or []):
|
||||||
track_id = track.get('id')
|
track_id = track.get('id')
|
||||||
if not track_id:
|
if not track_id:
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -120,7 +120,7 @@ class KalturaIE(InfoExtractor):
|
||||||
def _extract_urls(webpage, url=None):
|
def _extract_urls(webpage, url=None):
|
||||||
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
|
# Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
|
||||||
finditer = (
|
finditer = (
|
||||||
re.finditer(
|
list(re.finditer(
|
||||||
r"""(?xs)
|
r"""(?xs)
|
||||||
kWidget\.(?:thumb)?[Ee]mbed\(
|
kWidget\.(?:thumb)?[Ee]mbed\(
|
||||||
\{.*?
|
\{.*?
|
||||||
|
@ -128,8 +128,8 @@ class KalturaIE(InfoExtractor):
|
||||||
(?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
|
(?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
|
||||||
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
|
(?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
|
||||||
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
|
(?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
|
||||||
""", webpage)
|
""", webpage))
|
||||||
or re.finditer(
|
or list(re.finditer(
|
||||||
r'''(?xs)
|
r'''(?xs)
|
||||||
(?P<q1>["'])
|
(?P<q1>["'])
|
||||||
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
|
||||||
|
@ -142,16 +142,16 @@ class KalturaIE(InfoExtractor):
|
||||||
\[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
|
\[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
|
||||||
)
|
)
|
||||||
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
|
(?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
|
||||||
''', webpage)
|
''', webpage))
|
||||||
or re.finditer(
|
or list(re.finditer(
|
||||||
r'''(?xs)
|
r'''(?xs)
|
||||||
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
|
<(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])\s*
|
||||||
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
|
(?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
|
||||||
(?:(?!(?P=q1)).)*
|
(?:(?!(?P=q1)).)*
|
||||||
[?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
|
[?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
|
||||||
(?:(?!(?P=q1)).)*
|
(?:(?!(?P=q1)).)*
|
||||||
(?P=q1)
|
(?P=q1)
|
||||||
''', webpage)
|
''', webpage))
|
||||||
)
|
)
|
||||||
urls = []
|
urls = []
|
||||||
for mobj in finditer:
|
for mobj in finditer:
|
||||||
|
|
|
@ -4,7 +4,13 @@ from __future__ import unicode_literals
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import js_to_json
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
ExtractorError,
|
||||||
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
|
str_or_none,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class LineTVIE(InfoExtractor):
|
class LineTVIE(InfoExtractor):
|
||||||
|
@ -88,3 +94,137 @@ class LineTVIE(InfoExtractor):
|
||||||
for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
|
for thumbnail in video_info.get('thumbnails', {}).get('list', [])],
|
||||||
'view_count': video_info.get('meta', {}).get('count'),
|
'view_count': video_info.get('meta', {}).get('count'),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LineLiveBaseIE(InfoExtractor):
|
||||||
|
_API_BASE_URL = 'https://live-api.line-apps.com/web/v4.0/channel/'
|
||||||
|
|
||||||
|
def _parse_broadcast_item(self, item):
|
||||||
|
broadcast_id = compat_str(item['id'])
|
||||||
|
title = item['title']
|
||||||
|
is_live = item.get('isBroadcastingNow')
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for thumbnail_id, thumbnail_url in (item.get('thumbnailURLs') or {}).items():
|
||||||
|
if not thumbnail_url:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'id': thumbnail_id,
|
||||||
|
'url': thumbnail_url,
|
||||||
|
})
|
||||||
|
|
||||||
|
channel = item.get('channel') or {}
|
||||||
|
channel_id = str_or_none(channel.get('id'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': broadcast_id,
|
||||||
|
'title': self._live_title(title) if is_live else title,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'timestamp': int_or_none(item.get('createdAt')),
|
||||||
|
'channel': channel.get('name'),
|
||||||
|
'channel_id': channel_id,
|
||||||
|
'channel_url': 'https://live.line.me/channels/' + channel_id if channel_id else None,
|
||||||
|
'duration': int_or_none(item.get('archiveDuration')),
|
||||||
|
'view_count': int_or_none(item.get('viewerCount')),
|
||||||
|
'comment_count': int_or_none(item.get('chatCount')),
|
||||||
|
'is_live': is_live,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class LineLiveIE(LineLiveBaseIE):
|
||||||
|
_VALID_URL = r'https?://live\.line\.me/channels/(?P<channel_id>\d+)/broadcast/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://live.line.me/channels/4867368/broadcast/16331360',
|
||||||
|
'md5': 'bc931f26bf1d4f971e3b0982b3fab4a3',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '16331360',
|
||||||
|
'title': '振りコピ講座😙😙😙',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'timestamp': 1617095132,
|
||||||
|
'upload_date': '20210330',
|
||||||
|
'channel': '白川ゆめか',
|
||||||
|
'channel_id': '4867368',
|
||||||
|
'view_count': int,
|
||||||
|
'comment_count': int,
|
||||||
|
'is_live': False,
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
# archiveStatus == 'DELETED'
|
||||||
|
'url': 'https://live.line.me/channels/4778159/broadcast/16378488',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
channel_id, broadcast_id = re.match(self._VALID_URL, url).groups()
|
||||||
|
broadcast = self._download_json(
|
||||||
|
self._API_BASE_URL + '%s/broadcast/%s' % (channel_id, broadcast_id),
|
||||||
|
broadcast_id)
|
||||||
|
item = broadcast['item']
|
||||||
|
info = self._parse_broadcast_item(item)
|
||||||
|
protocol = 'm3u8' if info['is_live'] else 'm3u8_native'
|
||||||
|
formats = []
|
||||||
|
for k, v in (broadcast.get(('live' if info['is_live'] else 'archived') + 'HLSURLs') or {}).items():
|
||||||
|
if not v:
|
||||||
|
continue
|
||||||
|
if k == 'abr':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
v, broadcast_id, 'mp4', protocol,
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
continue
|
||||||
|
f = {
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': 'hls-' + k,
|
||||||
|
'protocol': protocol,
|
||||||
|
'url': v,
|
||||||
|
}
|
||||||
|
if not k.isdigit():
|
||||||
|
f['vcodec'] = 'none'
|
||||||
|
formats.append(f)
|
||||||
|
if not formats:
|
||||||
|
archive_status = item.get('archiveStatus')
|
||||||
|
if archive_status != 'ARCHIVED':
|
||||||
|
raise ExtractorError('this video has been ' + archive_status.lower(), expected=True)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
info['formats'] = formats
|
||||||
|
return info
|
||||||
|
|
||||||
|
|
||||||
|
class LineLiveChannelIE(LineLiveBaseIE):
|
||||||
|
_VALID_URL = r'https?://live\.line\.me/channels/(?P<id>\d+)(?!/broadcast/\d+)(?:[/?&#]|$)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://live.line.me/channels/5893542',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '5893542',
|
||||||
|
'title': 'いくらちゃん',
|
||||||
|
'description': 'md5:c3a4af801f43b2fac0b02294976580be',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 29
|
||||||
|
}
|
||||||
|
|
||||||
|
def _archived_broadcasts_entries(self, archived_broadcasts, channel_id):
|
||||||
|
while True:
|
||||||
|
for row in (archived_broadcasts.get('rows') or []):
|
||||||
|
share_url = str_or_none(row.get('shareURL'))
|
||||||
|
if not share_url:
|
||||||
|
continue
|
||||||
|
info = self._parse_broadcast_item(row)
|
||||||
|
info.update({
|
||||||
|
'_type': 'url',
|
||||||
|
'url': share_url,
|
||||||
|
'ie_key': LineLiveIE.ie_key(),
|
||||||
|
})
|
||||||
|
yield info
|
||||||
|
if not archived_broadcasts.get('hasNextPage'):
|
||||||
|
return
|
||||||
|
archived_broadcasts = self._download_json(
|
||||||
|
self._API_BASE_URL + channel_id + '/archived_broadcasts',
|
||||||
|
channel_id, query={
|
||||||
|
'lastId': info['id'],
|
||||||
|
})
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
channel_id = self._match_id(url)
|
||||||
|
channel = self._download_json(self._API_BASE_URL + channel_id, channel_id)
|
||||||
|
return self.playlist_result(
|
||||||
|
self._archived_broadcasts_entries(channel.get('archivedBroadcasts') or {}, channel_id),
|
||||||
|
channel_id, channel.get('title'), channel.get('information'))
|
||||||
|
|
|
@ -1,191 +0,0 @@
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import int_or_none
|
|
||||||
|
|
||||||
|
|
||||||
class LiveLeakIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'http://www.liveleak.com/view?i=757_1364311680',
|
|
||||||
'md5': '0813c2430bea7a46bf13acf3406992f4',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '757_1364311680',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': 'extremely bad day for this guy..!',
|
|
||||||
'uploader': 'ljfriel2',
|
|
||||||
'title': 'Most unlucky car accident',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$'
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.liveleak.com/view?i=f93_1390833151',
|
|
||||||
'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'f93_1390833151',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
|
|
||||||
'uploader': 'ARD_Stinkt',
|
|
||||||
'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$'
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
# Prochan embed
|
|
||||||
'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
|
|
||||||
'md5': '42c6d97d54f1db107958760788c5f48f',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '4f7_1392687779',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing... I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.",
|
|
||||||
'uploader': 'CapObveus',
|
|
||||||
'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
|
|
||||||
'age_limit': 18,
|
|
||||||
},
|
|
||||||
'skip': 'Video is dead',
|
|
||||||
}, {
|
|
||||||
# Covers https://github.com/ytdl-org/haruhi-dl/pull/5983
|
|
||||||
# Multiple resolutions
|
|
||||||
'url': 'http://www.liveleak.com/view?i=801_1409392012',
|
|
||||||
'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '801_1409392012',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
|
|
||||||
'uploader': 'bony333',
|
|
||||||
'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
|
|
||||||
'thumbnail': r're:^https?://.*\.jpg$'
|
|
||||||
}
|
|
||||||
}, {
|
|
||||||
# Covers https://github.com/ytdl-org/haruhi-dl/pull/10664#issuecomment-247439521
|
|
||||||
'url': 'http://m.liveleak.com/view?i=763_1473349649',
|
|
||||||
'add_ie': ['Youtube'],
|
|
||||||
'info_dict': {
|
|
||||||
'id': '763_1473349649',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty',
|
|
||||||
'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.',
|
|
||||||
'uploader': 'Ziz',
|
|
||||||
'upload_date': '20160908',
|
|
||||||
'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw'
|
|
||||||
},
|
|
||||||
'params': {
|
|
||||||
'skip_download': True,
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.liveleak.com/view?i=677_1439397581',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '677_1439397581',
|
|
||||||
'title': 'Fuel Depot in China Explosion caught on video',
|
|
||||||
},
|
|
||||||
'playlist_count': 3,
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
# No original video
|
|
||||||
'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _extract_urls(webpage, **kwargs):
|
|
||||||
return re.findall(
|
|
||||||
r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
|
|
||||||
webpage)
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
|
|
||||||
video_description = self._og_search_description(webpage)
|
|
||||||
video_uploader = self._html_search_regex(
|
|
||||||
r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
|
|
||||||
age_limit = int_or_none(self._search_regex(
|
|
||||||
r'you confirm that you are ([0-9]+) years and over.',
|
|
||||||
webpage, 'age limit', default=None))
|
|
||||||
video_thumbnail = self._og_search_thumbnail(webpage)
|
|
||||||
|
|
||||||
entries = self._parse_html5_media_entries(url, webpage, video_id)
|
|
||||||
if not entries:
|
|
||||||
# Maybe an embed?
|
|
||||||
embed_url = self._search_regex(
|
|
||||||
r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
|
|
||||||
webpage, 'embed URL')
|
|
||||||
return {
|
|
||||||
'_type': 'url_transparent',
|
|
||||||
'url': embed_url,
|
|
||||||
'id': video_id,
|
|
||||||
'title': video_title,
|
|
||||||
'description': video_description,
|
|
||||||
'uploader': video_uploader,
|
|
||||||
'age_limit': age_limit,
|
|
||||||
}
|
|
||||||
|
|
||||||
for idx, info_dict in enumerate(entries):
|
|
||||||
formats = []
|
|
||||||
for a_format in info_dict['formats']:
|
|
||||||
if not a_format.get('height'):
|
|
||||||
a_format['height'] = int_or_none(self._search_regex(
|
|
||||||
r'([0-9]+)p\.mp4', a_format['url'], 'height label',
|
|
||||||
default=None))
|
|
||||||
formats.append(a_format)
|
|
||||||
|
|
||||||
# Removing '.*.mp4' gives the raw video, which is essentially
|
|
||||||
# the same video without the LiveLeak logo at the top (see
|
|
||||||
# https://github.com/ytdl-org/haruhi-dl/pull/4768)
|
|
||||||
orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
|
|
||||||
if a_format['url'] != orig_url:
|
|
||||||
format_id = a_format.get('format_id')
|
|
||||||
format_id = 'original' + ('-' + format_id if format_id else '')
|
|
||||||
if self._is_valid_url(orig_url, video_id, format_id):
|
|
||||||
formats.append({
|
|
||||||
'format_id': format_id,
|
|
||||||
'url': orig_url,
|
|
||||||
'preference': 1,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
info_dict['formats'] = formats
|
|
||||||
|
|
||||||
# Don't append entry ID for one-video pages to keep backward compatibility
|
|
||||||
if len(entries) > 1:
|
|
||||||
info_dict['id'] = '%s_%s' % (video_id, idx + 1)
|
|
||||||
else:
|
|
||||||
info_dict['id'] = video_id
|
|
||||||
|
|
||||||
info_dict.update({
|
|
||||||
'title': video_title,
|
|
||||||
'description': video_description,
|
|
||||||
'uploader': video_uploader,
|
|
||||||
'age_limit': age_limit,
|
|
||||||
'thumbnail': video_thumbnail,
|
|
||||||
})
|
|
||||||
|
|
||||||
return self.playlist_result(entries, video_id, video_title)
|
|
||||||
|
|
||||||
|
|
||||||
class LiveLeakEmbedIE(InfoExtractor):
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
|
|
||||||
|
|
||||||
# See generic.py for actual test cases
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
|
|
||||||
'only_matching': True,
|
|
||||||
}, {
|
|
||||||
'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
|
|
||||||
'only_matching': True,
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
kind, video_id = re.match(self._VALID_URL, url).groups()
|
|
||||||
|
|
||||||
if kind == 'f':
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
liveleak_url = self._search_regex(
|
|
||||||
r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
|
|
||||||
webpage, 'LiveLeak URL', group='url')
|
|
||||||
else:
|
|
||||||
liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
|
|
||||||
|
|
||||||
return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
|
|
61
haruhi_dl/extractor/magentamusik360.py
Normal file
61
haruhi_dl/extractor/magentamusik360.py
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class MagentaMusik360IE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?magenta-musik-360\.de/([a-z0-9-]+-(?P<id>[0-9]+)|festivals/.+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.magenta-musik-360.de/within-temptation-wacken-2019-1-9208205928595185932',
|
||||||
|
'md5': '65b6f060b40d90276ec6fb9b992c1216',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9208205928595185932',
|
||||||
|
'ext': 'm3u8',
|
||||||
|
'title': 'WITHIN TEMPTATION',
|
||||||
|
'description': 'Robert Westerholt und Sharon Janny den Adel gründeten die Symphonic Metal-Band. Privat sind die Niederländer ein Paar und haben zwei Kinder. Die Single Ice Queen brachte ihnen Platin und Gold und verhalf 2002 zum internationalen Durchbruch. Charakteristisch für die Band war Anfangs der hohe Gesang von Frontfrau Sharon. Stilistisch fing die Band im Gothic Metal an. Mit neuem Sound, schnellen Gitarrenriffs und Gitarrensoli, avancierte Within Temptation zur erfolgreichen Rockband. Auch dieses Jahr wird die Band ihre Fangemeinde wieder mitreißen.',
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://www.magenta-musik-360.de/festivals/wacken-world-wide-2020-body-count-feat-ice-t',
|
||||||
|
'md5': '81010d27d7cab3f7da0b0f681b983b7e',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9208205928595231363',
|
||||||
|
'ext': 'm3u8',
|
||||||
|
'title': 'Body Count feat. Ice-T',
|
||||||
|
'description': 'Body Count feat. Ice-T konnten bereits im vergangenen Jahr auf dem „Holy Ground“ in Wacken überzeugen. 2020 gehen die Crossover-Metaller aus einem Club in Los Angeles auf Sendung und bringen mit ihrer Mischung aus Metal und Hip-Hop Abwechslung und ordentlich Alarm zum WWW. Bereits seit 1990 stehen die beiden Gründer Ice-T (Gesang) und Ernie C (Gitarre) auf der Bühne. Sieben Studioalben hat die Gruppe bis jetzt veröffentlicht, darunter das Debüt „Body Count“ (1992) mit dem kontroversen Track „Cop Killer“.',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
# _match_id casts to string, but since "None" is not a valid video_id for magenta
|
||||||
|
# there is no risk for confusion
|
||||||
|
if video_id == "None":
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
video_id = self._html_search_regex(r'data-asset-id="([^"]+)"', webpage, 'video_id')
|
||||||
|
json = self._download_json("https://wcps.t-online.de/cvss/magentamusic/vodplayer/v3/player/58935/%s/Main%%20Movie" % video_id, video_id)
|
||||||
|
xml_url = json['content']['feature']['representations'][0]['contentPackages'][0]['media']['href']
|
||||||
|
metadata = json['content']['feature'].get('metadata')
|
||||||
|
title = None
|
||||||
|
description = None
|
||||||
|
duration = None
|
||||||
|
thumbnails = []
|
||||||
|
if metadata:
|
||||||
|
title = metadata.get('title')
|
||||||
|
description = metadata.get('fullDescription')
|
||||||
|
duration = metadata.get('runtimeInSeconds')
|
||||||
|
for img_key in ('teaserImageWide', 'smallCoverImage'):
|
||||||
|
if img_key in metadata:
|
||||||
|
thumbnails.append({'url': metadata[img_key].get('href')})
|
||||||
|
|
||||||
|
xml = self._download_xml(xml_url, video_id)
|
||||||
|
final_url = xml[0][0][0].attrib['src']
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'url': final_url,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnails': thumbnails
|
||||||
|
}
|
31
haruhi_dl/extractor/maoritv.py
Normal file
31
haruhi_dl/extractor/maoritv.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class MaoriTVIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?maoritelevision\.com/shows/(?:[^/]+/)+(?P<id>[^/?&#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.maoritelevision.com/shows/korero-mai/S01E054/korero-mai-series-1-episode-54',
|
||||||
|
'md5': '5ade8ef53851b6a132c051b1cd858899',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '4774724855001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Kōrero Mai, Series 1 Episode 54',
|
||||||
|
'upload_date': '20160226',
|
||||||
|
'timestamp': 1456455018,
|
||||||
|
'description': 'md5:59bde32fd066d637a1a55794c56d8dcb',
|
||||||
|
'uploader_id': '1614493167001',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1614493167001/HJlhIQhQf_default/index.html?videoId=%s'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
brightcove_id = self._search_regex(
|
||||||
|
r'data-main-video-id=["\'](\d+)', webpage, 'brightcove id')
|
||||||
|
return self.url_result(
|
||||||
|
self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
|
||||||
|
'BrightcoveNew', brightcove_id)
|
|
@ -5,12 +5,25 @@ from .common import SelfhostedInfoExtractor
|
||||||
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
clean_html,
|
clean_html,
|
||||||
|
float_or_none,
|
||||||
|
int_or_none,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
try_get,
|
||||||
|
unescapeHTML,
|
||||||
|
url_or_none,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from urllib.parse import (
|
||||||
|
parse_qs,
|
||||||
|
urlencode,
|
||||||
|
urlparse,
|
||||||
|
)
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
from .peertube import PeerTubeSHIE
|
||||||
|
|
||||||
|
|
||||||
class MastodonSHIE(SelfhostedInfoExtractor):
|
class MastodonSHIE(SelfhostedInfoExtractor):
|
||||||
"""
|
"""
|
||||||
|
@ -23,6 +36,7 @@ class MastodonSHIE(SelfhostedInfoExtractor):
|
||||||
"""
|
"""
|
||||||
IE_NAME = 'mastodon'
|
IE_NAME = 'mastodon'
|
||||||
_VALID_URL = r'mastodon:(?P<host>[^:]+):(?P<id>.+)'
|
_VALID_URL = r'mastodon:(?P<host>[^:]+):(?P<id>.+)'
|
||||||
|
_NETRC_MACHINE = 'mastodon'
|
||||||
_SH_VALID_URL = r'''(?x)
|
_SH_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?P<host>[^/\s]+)/
|
(?P<host>[^/\s]+)/
|
||||||
|
@ -45,6 +59,7 @@ class MastodonSHIE(SelfhostedInfoExtractor):
|
||||||
'<li><a href="https://docs.joinmastodon.org/">Documentation</a></li>',
|
'<li><a href="https://docs.joinmastodon.org/">Documentation</a></li>',
|
||||||
'<title>Pleroma</title>',
|
'<title>Pleroma</title>',
|
||||||
'<noscript>To use Pleroma, please enable JavaScript.</noscript>',
|
'<noscript>To use Pleroma, please enable JavaScript.</noscript>',
|
||||||
|
'<noscript>To use Soapbox, please enable JavaScript.</noscript>',
|
||||||
'Alternatively, try one of the <a href="https://apps.gab.com">native apps</a> for Gab Social for your platform.',
|
'Alternatively, try one of the <a href="https://apps.gab.com">native apps</a> for Gab Social for your platform.',
|
||||||
)
|
)
|
||||||
_SH_VALID_CONTENT_REGEXES = (
|
_SH_VALID_CONTENT_REGEXES = (
|
||||||
|
@ -96,39 +111,238 @@ class MastodonSHIE(SelfhostedInfoExtractor):
|
||||||
'title': 're:.+ - He shoots, he scores and the crowd went wild.... #Animal #Sports',
|
'title': 're:.+ - He shoots, he scores and the crowd went wild.... #Animal #Sports',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
# Soapbox, audio file
|
||||||
|
'url': 'https://gleasonator.com/notice/9zvJY6h7jJzwopKAIi',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '9zvJY6h7jJzwopKAIi',
|
||||||
|
'title': 're:.+ - #FEDIBLOCK',
|
||||||
|
'ext': 'oga',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
# mastodon, card to youtube
|
||||||
|
'url': 'https://mstdn.social/@polamatysiak/106183574509332910',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'RWDU0BjcYp0',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'polamatysiak - Moje wczorajsze wystąpienie w Sejmie, koniecznie zobaczcie do końca 🙂 \n#pracaposłanki\n\nhttps://youtu.be/RWDU0BjcYp0',
|
||||||
|
'description': 'md5:0c16fa11a698d5d1b171963fd6833297',
|
||||||
|
'uploader': 'Paulina Matysiak',
|
||||||
|
'uploader_id': 'UCLRAd9-Hw6kEI1aPBrSaF9A',
|
||||||
|
'upload_date': '20210505',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
|
def _determine_instance_software(self, host, webpage=None):
|
||||||
|
if webpage:
|
||||||
|
for i, string in enumerate(self._SH_VALID_CONTENT_STRINGS):
|
||||||
|
if string in webpage:
|
||||||
|
return ['mastodon', 'mastodon', 'pleroma', 'pleroma', 'pleroma', 'gab'][i]
|
||||||
|
if any(s in webpage for s in PeerTubeSHIE._SH_VALID_CONTENT_STRINGS):
|
||||||
|
return 'peertube'
|
||||||
|
|
||||||
|
nodeinfo_href = self._download_json(
|
||||||
|
f'https://{host}/.well-known/nodeinfo', host, 'Downloading instance nodeinfo link')
|
||||||
|
|
||||||
|
nodeinfo = self._download_json(
|
||||||
|
nodeinfo_href['links'][-1]['href'], host, 'Downloading instance nodeinfo')
|
||||||
|
|
||||||
|
return nodeinfo['software']['name']
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
if not username:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# very basic regex, but the instance domain (the one where user has an account)
|
||||||
|
# must be separated from the user login
|
||||||
|
mobj = re.match(r'^(?P<username>[^@]+(?:@[^@]+)?)@(?P<instance>.+)$', username)
|
||||||
|
if not mobj:
|
||||||
|
self.report_warning(
|
||||||
|
'Invalid login format - must be in format [username or email]@[instance]')
|
||||||
|
username, instance = mobj.group('username', 'instance')
|
||||||
|
|
||||||
|
app_info = self._downloader.cache.load('mastodon-apps', instance)
|
||||||
|
if not app_info:
|
||||||
|
app_info = self._download_json(
|
||||||
|
f'https://{instance}/api/v1/apps', None, 'Creating an app', headers={
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
}, data=bytes(json.dumps({
|
||||||
|
'client_name': 'haruhi-dl',
|
||||||
|
'redirect_uris': 'urn:ietf:wg:oauth:2.0:oob',
|
||||||
|
'scopes': 'read',
|
||||||
|
'website': 'https://haruhi.download',
|
||||||
|
}).encode('utf-8')))
|
||||||
|
self._downloader.cache.store('mastodon-apps', instance, app_info)
|
||||||
|
|
||||||
|
login_webpage = self._download_webpage(
|
||||||
|
f'https://{instance}/oauth/authorize', None, 'Downloading login page', query={
|
||||||
|
'client_id': app_info['client_id'],
|
||||||
|
'scope': 'read',
|
||||||
|
'redirect_uri': 'urn:ietf:wg:oauth:2.0:oob',
|
||||||
|
'response_type': 'code',
|
||||||
|
})
|
||||||
|
oauth_token = None
|
||||||
|
# this needs to be codebase-specific, as the HTML page differs between codebases
|
||||||
|
if 'xlink:href="#mastodon-svg-logo-full"' in login_webpage:
|
||||||
|
# mastodon
|
||||||
|
if '@' not in username:
|
||||||
|
self.report_warning(
|
||||||
|
'Invalid login format - for Mastodon instances e-mail address is required')
|
||||||
|
login_form = self._hidden_inputs(login_webpage)
|
||||||
|
login_form['user[email]'] = username
|
||||||
|
login_form['user[password]'] = password
|
||||||
|
login_req, urlh = self._download_webpage_handle(
|
||||||
|
f'https://{instance}/auth/sign_in', None, 'Sending login details',
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
}, data=bytes(urlencode(login_form).encode('utf-8')))
|
||||||
|
# cached apps may already be authorized
|
||||||
|
if '/oauth/authorize/native' in urlh.url:
|
||||||
|
oauth_token = parse_qs(urlparse(urlh.url).query)['code'][0]
|
||||||
|
else:
|
||||||
|
auth_form = self._hidden_inputs(
|
||||||
|
self._search_regex(
|
||||||
|
r'(?s)(<form\b[^>]+>.+?>Authorize</.+?</form>)',
|
||||||
|
login_req, 'authorization form'))
|
||||||
|
_, urlh = self._download_webpage_handle(
|
||||||
|
f'https://{instance}/oauth/authorize', None, 'Confirming authorization',
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
}, data=bytes(urlencode(auth_form).encode('utf-8')))
|
||||||
|
oauth_token = parse_qs(urlparse(urlh.url).query)['code'][0]
|
||||||
|
elif 'content: "✔\\fe0e";' in login_webpage:
|
||||||
|
# pleroma
|
||||||
|
login_form = self._hidden_inputs(login_webpage)
|
||||||
|
login_form['authorization[scope][]'] = 'read'
|
||||||
|
login_form['authorization[name]'] = username
|
||||||
|
login_form['authorization[password]'] = password
|
||||||
|
login_req = self._download_webpage(
|
||||||
|
f'https://{instance}/oauth/authorize', None, 'Sending login details',
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
}, data=bytes(urlencode(login_form).encode('utf-8')))
|
||||||
|
# TODO: 2FA, error handling
|
||||||
|
oauth_token = self._search_regex(
|
||||||
|
r'<h2>\s*Token code is\s*<br>\s*([a-zA-Z\d_-]+)\s*</h2>',
|
||||||
|
login_req, 'oauth token')
|
||||||
|
else:
|
||||||
|
raise ExtractorError('Unknown instance type')
|
||||||
|
|
||||||
|
actual_token = self._download_json(
|
||||||
|
f'https://{instance}/oauth/token', None, 'Downloading the actual token',
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
}, data=bytes(urlencode({
|
||||||
|
'client_id': app_info['client_id'],
|
||||||
|
'client_secret': app_info['client_secret'],
|
||||||
|
'redirect_uri': 'urn:ietf:wg:oauth:2.0:oob',
|
||||||
|
'scope': 'read',
|
||||||
|
'code': oauth_token,
|
||||||
|
'grant_type': 'authorization_code',
|
||||||
|
}).encode('utf-8')))
|
||||||
|
return {
|
||||||
|
'instance': instance,
|
||||||
|
'authorization': f"{actual_token['token_type']} {actual_token['access_token']}",
|
||||||
|
}
|
||||||
|
|
||||||
def _selfhosted_extract(self, url, webpage=None):
|
def _selfhosted_extract(self, url, webpage=None):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
ap_censorship_circuvement = False
|
||||||
if not mobj:
|
if not mobj:
|
||||||
mobj = re.match(self._SH_VALID_URL, url)
|
mobj = re.match(self._SH_VALID_URL, url)
|
||||||
|
if not mobj and self._downloader.params.get('force_use_mastodon'):
|
||||||
|
mobj = re.match(PeerTubeSHIE._VALID_URL, url)
|
||||||
|
if mobj:
|
||||||
|
ap_censorship_circuvement = 'peertube'
|
||||||
|
if not mobj and self._downloader.params.get('force_use_mastodon'):
|
||||||
|
mobj = re.match(PeerTubeSHIE._SH_VALID_URL, url)
|
||||||
|
if mobj:
|
||||||
|
ap_censorship_circuvement = 'peertube'
|
||||||
|
if not mobj:
|
||||||
|
raise ExtractorError('Unrecognized url type')
|
||||||
host, id = mobj.group('host', 'id')
|
host, id = mobj.group('host', 'id')
|
||||||
|
|
||||||
if any(frag in url for frag in ('/objects/', '/activities/')):
|
login_info = self._login()
|
||||||
if not webpage:
|
|
||||||
webpage = self._download_webpage(url, '%s:%s' % (host, id), expected_status=302)
|
|
||||||
real_url = self._og_search_property('url', webpage, default=None)
|
|
||||||
if real_url:
|
|
||||||
return self.url_result(real_url, ie='MastodonSH')
|
|
||||||
|
|
||||||
metadata = self._download_json('https://%s/api/v1/statuses/%s' % (host, id), '%s:%s' % (host, id))
|
if login_info and host != login_info['instance']:
|
||||||
|
wf_url = url
|
||||||
if not metadata['media_attachments']:
|
if not url.startswith('http'):
|
||||||
raise ExtractorError('No attached medias')
|
software = ap_censorship_circuvement
|
||||||
|
if not software:
|
||||||
|
software = self._determine_instance_software(host, webpage)
|
||||||
|
url_part = None
|
||||||
|
if software == 'pleroma':
|
||||||
|
if '-' in id: # UUID
|
||||||
|
url_part = 'objects'
|
||||||
|
else:
|
||||||
|
url_part = 'notice'
|
||||||
|
elif software == 'peertube':
|
||||||
|
url_part = 'videos/watch'
|
||||||
|
elif software in ('mastodon', 'gab'):
|
||||||
|
# mastodon and gab social require usernames in the url,
|
||||||
|
# but we can't determine the username without fetching the post,
|
||||||
|
# but we can't fetch the post without determining the username...
|
||||||
|
raise ExtractorError(f'Use the full url with --force-use-mastodon to download from {software}', expected=True)
|
||||||
|
else:
|
||||||
|
raise ExtractorError(f'Unknown software: {software}')
|
||||||
|
wf_url = f'https://{host}/{url_part}/{id}'
|
||||||
|
search = self._download_json(
|
||||||
|
f"https://{login_info['instance']}/api/v2/search", '%s:%s' % (host, id),
|
||||||
|
query={
|
||||||
|
'q': wf_url,
|
||||||
|
'type': 'statuses',
|
||||||
|
'resolve': True,
|
||||||
|
}, headers={
|
||||||
|
'Authorization': login_info['authorization'],
|
||||||
|
})
|
||||||
|
assert len(search['statuses']) == 1
|
||||||
|
metadata = search['statuses'][0]
|
||||||
|
else:
|
||||||
|
if not login_info and any(frag in url for frag in ('/objects/', '/activities/')):
|
||||||
|
if not webpage:
|
||||||
|
webpage = self._download_webpage(url, '%s:%s' % (host, id), expected_status=302)
|
||||||
|
real_url = self._og_search_property('url', webpage, default=None)
|
||||||
|
if real_url:
|
||||||
|
return self.url_result(real_url, ie='MastodonSH')
|
||||||
|
metadata = self._download_json(
|
||||||
|
'https://%s/api/v1/statuses/%s' % (host, id), '%s:%s' % (host, id),
|
||||||
|
headers={
|
||||||
|
'Authorization': login_info['authorization'],
|
||||||
|
} if login_info else {})
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for media in metadata['media_attachments']:
|
for media in metadata['media_attachments'] or ():
|
||||||
if media['type'] == 'video':
|
if media['type'] in ('video', 'audio'):
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': media['id'],
|
'id': media['id'],
|
||||||
'title': str_or_none(media['description']),
|
'title': str_or_none(media['description']),
|
||||||
'url': str_or_none(media['url']),
|
'url': str_or_none(media['url']),
|
||||||
'thumbnail': str_or_none(media['preview_url']),
|
'thumbnail': str_or_none(media['preview_url']) if media['type'] == 'video' else None,
|
||||||
|
'vcodec': 'none' if media['type'] == 'audio' else None,
|
||||||
|
'duration': float_or_none(try_get(media, lambda x: x['meta']['original']['duration'])),
|
||||||
|
'width': int_or_none(try_get(media, lambda x: x['meta']['original']['width'])),
|
||||||
|
'height': int_or_none(try_get(media, lambda x: x['meta']['original']['height'])),
|
||||||
|
'tbr': int_or_none(try_get(media, lambda x: x['meta']['original']['bitrate'])),
|
||||||
})
|
})
|
||||||
if len(entries) == 0:
|
|
||||||
raise ExtractorError('No audio/video attachments')
|
|
||||||
|
|
||||||
title = '%s - %s' % (str_or_none(metadata['account'].get('display_name') or metadata['account']['acct']), clean_html(str_or_none(metadata['content'])))
|
title = '%s - %s' % (str_or_none(metadata['account'].get('display_name') or metadata['account']['acct']), clean_html(str_or_none(metadata['content'])))
|
||||||
|
if ap_censorship_circuvement == 'peertube':
|
||||||
|
title = unescapeHTML(
|
||||||
|
self._search_regex(
|
||||||
|
r'^<p><a href="[^"]+">(.+?)</a></p>',
|
||||||
|
metadata['content'], 'video title'))
|
||||||
|
|
||||||
|
if len(entries) == 0:
|
||||||
|
card = metadata.get('card')
|
||||||
|
if card:
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': card['url'],
|
||||||
|
'title': title,
|
||||||
|
'thumbnail': url_or_none(card.get('image')),
|
||||||
|
}
|
||||||
|
raise ExtractorError('No audio/video attachments')
|
||||||
|
|
||||||
info_dict = {
|
info_dict = {
|
||||||
"id": id,
|
"id": id,
|
||||||
|
|
|
@ -15,33 +15,39 @@ from ..utils import (
|
||||||
|
|
||||||
|
|
||||||
class MedalTVIE(InfoExtractor):
|
class MedalTVIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[^/?#&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr',
|
'url': 'https://medal.tv/clips/2mA60jWAGQCBH',
|
||||||
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
|
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '34934644',
|
'id': '2mA60jWAGQCBH',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Quad Cold',
|
'title': 'Quad Cold',
|
||||||
'description': 'Medal,https://medal.tv/desktop/',
|
'description': 'Medal,https://medal.tv/desktop/',
|
||||||
'uploader': 'MowgliSB',
|
'uploader': 'MowgliSB',
|
||||||
'timestamp': 1603165266,
|
'timestamp': 1603165266,
|
||||||
'upload_date': '20201020',
|
'upload_date': '20201020',
|
||||||
'uploader_id': 10619174,
|
'uploader_id': '10619174',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://medal.tv/clips/36787208',
|
'url': 'https://medal.tv/clips/2um24TWdty0NA',
|
||||||
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
|
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '36787208',
|
'id': '2um24TWdty0NA',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'u tk me i tk u bigger',
|
'title': 'u tk me i tk u bigger',
|
||||||
'description': 'Medal,https://medal.tv/desktop/',
|
'description': 'Medal,https://medal.tv/desktop/',
|
||||||
'uploader': 'Mimicc',
|
'uploader': 'Mimicc',
|
||||||
'timestamp': 1605580939,
|
'timestamp': 1605580939,
|
||||||
'upload_date': '20201117',
|
'upload_date': '20201117',
|
||||||
'uploader_id': 5156321,
|
'uploader_id': '5156321',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
'url': 'https://medal.tv/clips/37rMeFpryCC-9',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://medal.tv/clips/2WRj40tpY_EU9',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
|
74
haruhi_dl/extractor/misskey.py
Normal file
74
haruhi_dl/extractor/misskey.py
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
from .common import SelfhostedInfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
mimetype2ext,
|
||||||
|
parse_iso8601,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
class MisskeySHIE(SelfhostedInfoExtractor):
|
||||||
|
IE_NAME = 'misskey'
|
||||||
|
_VALID_URL = r'misskey:(?P<host>[^:]+):(?P<id>[\da-z]+)'
|
||||||
|
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/notes/(?P<id>[\da-z]+)'
|
||||||
|
_SH_VALID_CONTENT_STRINGS = (
|
||||||
|
'<meta name="application-name" content="Misskey"',
|
||||||
|
'<meta name="misskey:',
|
||||||
|
'<!-- If you are reading this message... how about joining the development of Misskey? -->',
|
||||||
|
)
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://catgirl.life/notes/8lh52dlrii',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '8lh52dlrii',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'timestamp': 1604387877,
|
||||||
|
'upload_date': '20201103',
|
||||||
|
'title': '@graf@poa.st @Moon@shitposter.club \n*kickstarts your federation*',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _selfhosted_extract(self, url, webpage=None):
|
||||||
|
host, video_id = self._match_id_and_host(url)
|
||||||
|
|
||||||
|
post = self._download_json(f'https://{host}/api/notes/show', video_id,
|
||||||
|
data=bytes(json.dumps({
|
||||||
|
'noteId': video_id,
|
||||||
|
}).encode('utf-8')),
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
})
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for file in post['files']:
|
||||||
|
if not file['type'].startswith('video/') and not file['type'].startswith('audio/'):
|
||||||
|
continue
|
||||||
|
entries.append({
|
||||||
|
'id': file['id'],
|
||||||
|
'url': file['url'],
|
||||||
|
'ext': mimetype2ext(file.get('type')),
|
||||||
|
'title': file.get('name'),
|
||||||
|
'thumbnail': file.get('thumbnailUrl'),
|
||||||
|
'timestamp': parse_iso8601(file.get('createdAt')),
|
||||||
|
'filesize': file['size'] if file.get('size') != 0 else None,
|
||||||
|
'age_limit': 18 if file.get('isSensitive') else 0,
|
||||||
|
})
|
||||||
|
|
||||||
|
if len(entries) == 0:
|
||||||
|
raise ExtractorError('No media found in post')
|
||||||
|
elif len(entries) == 1:
|
||||||
|
info_dict = entries[0]
|
||||||
|
else:
|
||||||
|
info_dict = {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
}
|
||||||
|
|
||||||
|
info_dict.update({
|
||||||
|
'id': video_id,
|
||||||
|
'title': post.get('text') or '_',
|
||||||
|
})
|
||||||
|
return info_dict
|
|
@ -1,15 +1,91 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .nhl import NHLBaseIE
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
int_or_none,
|
||||||
|
parse_duration,
|
||||||
|
parse_iso8601,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class MLBIE(NHLBaseIE):
|
class MLBBaseIE(InfoExtractor):
|
||||||
|
def _real_extract(self, url):
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
video = self._download_video_data(display_id)
|
||||||
|
video_id = video['id']
|
||||||
|
title = video['title']
|
||||||
|
feed = self._get_feed(video)
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for playback in (feed.get('playbacks') or []):
|
||||||
|
playback_url = playback.get('url')
|
||||||
|
if not playback_url:
|
||||||
|
continue
|
||||||
|
name = playback.get('name')
|
||||||
|
ext = determine_ext(playback_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
playback_url, video_id, 'mp4',
|
||||||
|
'm3u8_native', m3u8_id=name, fatal=False))
|
||||||
|
else:
|
||||||
|
f = {
|
||||||
|
'format_id': name,
|
||||||
|
'url': playback_url,
|
||||||
|
}
|
||||||
|
mobj = re.search(r'_(\d+)K_(\d+)X(\d+)', name)
|
||||||
|
if mobj:
|
||||||
|
f.update({
|
||||||
|
'height': int(mobj.group(3)),
|
||||||
|
'tbr': int(mobj.group(1)),
|
||||||
|
'width': int(mobj.group(2)),
|
||||||
|
})
|
||||||
|
mobj = re.search(r'_(\d+)x(\d+)_(\d+)_(\d+)K\.mp4', playback_url)
|
||||||
|
if mobj:
|
||||||
|
f.update({
|
||||||
|
'fps': int(mobj.group(3)),
|
||||||
|
'height': int(mobj.group(2)),
|
||||||
|
'tbr': int(mobj.group(4)),
|
||||||
|
'width': int(mobj.group(1)),
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
for cut in (try_get(feed, lambda x: x['image']['cuts'], list) or []):
|
||||||
|
src = cut.get('src')
|
||||||
|
if not src:
|
||||||
|
continue
|
||||||
|
thumbnails.append({
|
||||||
|
'height': int_or_none(cut.get('height')),
|
||||||
|
'url': src,
|
||||||
|
'width': int_or_none(cut.get('width')),
|
||||||
|
})
|
||||||
|
|
||||||
|
language = (video.get('language') or 'EN').lower()
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'description': video.get('description'),
|
||||||
|
'duration': parse_duration(feed.get('duration')),
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'timestamp': parse_iso8601(video.get(self._TIMESTAMP_KEY)),
|
||||||
|
'subtitles': self._extract_mlb_subtitles(feed, language),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MLBIE(MLBBaseIE):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:[\da-z_-]+\.)*(?P<site>mlb)\.com/
|
(?:[\da-z_-]+\.)*mlb\.com/
|
||||||
(?:
|
(?:
|
||||||
(?:
|
(?:
|
||||||
(?:[^/]+/)*c-|
|
(?:[^/]+/)*video/[^/]+/c-|
|
||||||
(?:
|
(?:
|
||||||
shared/video/embed/(?:embed|m-internal-embed)\.html|
|
shared/video/embed/(?:embed|m-internal-embed)\.html|
|
||||||
(?:[^/]+/)+(?:play|index)\.jsp|
|
(?:[^/]+/)+(?:play|index)\.jsp|
|
||||||
|
@ -18,7 +94,6 @@ class MLBIE(NHLBaseIE):
|
||||||
(?P<id>\d+)
|
(?P<id>\d+)
|
||||||
)
|
)
|
||||||
'''
|
'''
|
||||||
_CONTENT_DOMAIN = 'content.mlb.com'
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
|
'url': 'https://www.mlb.com/mariners/video/ackleys-spectacular-catch/c-34698933',
|
||||||
|
@ -76,18 +151,6 @@ class MLBIE(NHLBaseIE):
|
||||||
'thumbnail': r're:^https?://.*\.jpg$',
|
'thumbnail': r're:^https?://.*\.jpg$',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
'url': 'https://www.mlb.com/news/blue-jays-kevin-pillar-goes-spidey-up-the-wall-to-rob-tim-beckham-of-a-homer/c-118550098',
|
|
||||||
'md5': 'e09e37b552351fddbf4d9e699c924d68',
|
|
||||||
'info_dict': {
|
|
||||||
'id': '75609783',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Must C: Pillar climbs for catch',
|
|
||||||
'description': '4/15/15: Blue Jays outfielder Kevin Pillar continues his defensive dominance by climbing the wall in left to rob Tim Beckham of a home run',
|
|
||||||
'timestamp': 1429139220,
|
|
||||||
'upload_date': '20150415',
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694',
|
'url': 'https://www.mlb.com/video/hargrove-homers-off-caldwell/c-1352023483?tid=67793694',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -113,8 +176,92 @@ class MLBIE(NHLBaseIE):
|
||||||
'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
|
'url': 'http://mlb.mlb.com/shared/video/embed/m-internal-embed.html?content_id=75609783&property=mlb&autoplay=true&hashmode=false&siteSection=mlb/multimedia/article_118550098/article_embed&club=mlb',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
'url': 'https://www.mlb.com/cut4/carlos-gomez-borrowed-sunglasses-from-an-as-fan/c-278912842',
|
|
||||||
'only_matching': True,
|
|
||||||
}
|
|
||||||
]
|
]
|
||||||
|
_TIMESTAMP_KEY = 'date'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_feed(video):
|
||||||
|
return video
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_mlb_subtitles(feed, language):
|
||||||
|
subtitles = {}
|
||||||
|
for keyword in (feed.get('keywordsAll') or []):
|
||||||
|
keyword_type = keyword.get('type')
|
||||||
|
if keyword_type and keyword_type.startswith('closed_captions_location_'):
|
||||||
|
cc_location = keyword.get('value')
|
||||||
|
if cc_location:
|
||||||
|
subtitles.setdefault(language, []).append({
|
||||||
|
'url': cc_location,
|
||||||
|
})
|
||||||
|
return subtitles
|
||||||
|
|
||||||
|
def _download_video_data(self, display_id):
|
||||||
|
return self._download_json(
|
||||||
|
'http://content.mlb.com/mlb/item/id/v1/%s/details/web-v1.json' % display_id,
|
||||||
|
display_id)
|
||||||
|
|
||||||
|
|
||||||
|
class MLBVideoIE(MLBBaseIE):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?mlb\.com/(?:[^/]+/)*video/(?P<id>[^/?&#]+)'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.mlb.com/mariners/video/ackley-s-spectacular-catch-c34698933',
|
||||||
|
'md5': '632358dacfceec06bad823b83d21df2d',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'c04a8863-f569-42e6-9f87-992393657614',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': "Ackley's spectacular catch",
|
||||||
|
'description': 'md5:7f5a981eb4f3cbc8daf2aeffa2215bf0',
|
||||||
|
'duration': 66,
|
||||||
|
'timestamp': 1405995000,
|
||||||
|
'upload_date': '20140722',
|
||||||
|
'thumbnail': r're:^https?://.+',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
_TIMESTAMP_KEY = 'timestamp'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if MLBIE.suitable(url) else super(MLBVideoIE, cls).suitable(url)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_feed(video):
|
||||||
|
return video['feeds'][0]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_mlb_subtitles(feed, language):
|
||||||
|
subtitles = {}
|
||||||
|
for cc_location in (feed.get('closedCaptions') or []):
|
||||||
|
subtitles.setdefault(language, []).append({
|
||||||
|
'url': cc_location,
|
||||||
|
})
|
||||||
|
|
||||||
|
def _download_video_data(self, display_id):
|
||||||
|
# https://www.mlb.com/data-service/en/videos/[SLUG]
|
||||||
|
return self._download_json(
|
||||||
|
'https://fastball-gateway.mlb.com/graphql',
|
||||||
|
display_id, query={
|
||||||
|
'query': '''{
|
||||||
|
mediaPlayback(ids: "%s") {
|
||||||
|
description
|
||||||
|
feeds(types: CMS) {
|
||||||
|
closedCaptions
|
||||||
|
duration
|
||||||
|
image {
|
||||||
|
cuts {
|
||||||
|
width
|
||||||
|
height
|
||||||
|
src
|
||||||
|
}
|
||||||
|
}
|
||||||
|
playbacks {
|
||||||
|
name
|
||||||
|
url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
id
|
||||||
|
timestamp
|
||||||
|
title
|
||||||
|
}
|
||||||
|
}''' % display_id,
|
||||||
|
})['data']['mediaPlayback'][0]
|
||||||
|
|
|
@ -111,7 +111,7 @@ class MSNIE(InfoExtractor):
|
||||||
continue
|
continue
|
||||||
if 'format=m3u8-aapl' in format_url:
|
if 'format=m3u8-aapl' in format_url:
|
||||||
# m3u8_native should not be used here until
|
# m3u8_native should not be used here until
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/9913 is fixed
|
# https://github.com/ytdl-org/youtube-dl/issues/9913 is fixed
|
||||||
formats.extend(self._extract_m3u8_formats(
|
formats.extend(self._extract_m3u8_formats(
|
||||||
format_url, display_id, 'mp4',
|
format_url, display_id, 'mp4',
|
||||||
m3u8_id='hls', fatal=False))
|
m3u8_id='hls', fatal=False))
|
||||||
|
|
|
@ -255,7 +255,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_child_with_type(parent, t):
|
def _extract_child_with_type(parent, t):
|
||||||
return next(c for c in parent['children'] if c.get('type') == t)
|
for c in parent['children']:
|
||||||
|
if c.get('type') == t:
|
||||||
|
return c
|
||||||
|
|
||||||
def _extract_mgid(self, webpage):
|
def _extract_mgid(self, webpage):
|
||||||
try:
|
try:
|
||||||
|
@ -286,7 +288,8 @@ class MTVServicesInfoExtractor(InfoExtractor):
|
||||||
data = self._parse_json(self._search_regex(
|
data = self._parse_json(self._search_regex(
|
||||||
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
|
||||||
main_container = self._extract_child_with_type(data, 'MainContainer')
|
main_container = self._extract_child_with_type(data, 'MainContainer')
|
||||||
video_player = self._extract_child_with_type(main_container, 'VideoPlayer')
|
ab_testing = self._extract_child_with_type(main_container, 'ABTesting')
|
||||||
|
video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer')
|
||||||
mgid = video_player['props']['media']['video']['config']['uri']
|
mgid = video_player['props']['media']['video']['config']['uri']
|
||||||
|
|
||||||
return mgid
|
return mgid
|
||||||
|
@ -320,7 +323,7 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_url(webpage):
|
def _extract_url(webpage):
|
||||||
mobj = re.search(
|
mobj = re.search(
|
||||||
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media.mtvnservices.com/embed/.+?)\1', webpage)
|
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media\.mtvnservices\.com/embed/.+?)\1', webpage)
|
||||||
if mobj:
|
if mobj:
|
||||||
return mobj.group('url')
|
return mobj.group('url')
|
||||||
|
|
||||||
|
|
|
@ -108,7 +108,7 @@ class NHLIE(NHLBaseIE):
|
||||||
'timestamp': 1454544904,
|
'timestamp': 1454544904,
|
||||||
},
|
},
|
||||||
}, {
|
}, {
|
||||||
# Some m3u8 URLs are invalid (https://github.com/ytdl-org/haruhi-dl/issues/10713)
|
# Some m3u8 URLs are invalid (https://github.com/ytdl-org/youtube-dl/issues/10713)
|
||||||
'url': 'https://www.nhl.com/predators/video/poile-laviolette-on-subban-trade/t-277437416/c-44315003',
|
'url': 'https://www.nhl.com/predators/video/poile-laviolette-on-subban-trade/t-277437416/c-44315003',
|
||||||
'md5': '50b2bb47f405121484dda3ccbea25459',
|
'md5': '50b2bb47f405121484dda3ccbea25459',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
|
|
@ -1,25 +1,28 @@
|
||||||
|
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import datetime
|
import re
|
||||||
import functools
|
|
||||||
import json
|
import json
|
||||||
import math
|
import datetime
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
from ..postprocessor.ffmpeg import FFmpegPostProcessor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
compat_str,
|
||||||
compat_parse_qs,
|
compat_parse_qs,
|
||||||
compat_urllib_parse_urlparse,
|
compat_urllib_parse_urlparse,
|
||||||
)
|
)
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
determine_ext,
|
|
||||||
dict_get,
|
dict_get,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
|
||||||
InAdvancePagedList,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
float_or_none,
|
||||||
|
OnDemandPagedList,
|
||||||
parse_duration,
|
parse_duration,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
|
PostProcessingError,
|
||||||
|
str_or_none,
|
||||||
remove_start,
|
remove_start,
|
||||||
try_get,
|
try_get,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
@ -34,7 +37,7 @@ class NiconicoIE(InfoExtractor):
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
'url': 'http://www.nicovideo.jp/watch/sm22312215',
|
||||||
'md5': 'd1a75c0823e2f629128c43e1212760f9',
|
'md5': 'a5bad06f1347452102953f323c69da34s',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'sm22312215',
|
'id': 'sm22312215',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
|
@ -162,6 +165,11 @@ class NiconicoIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
|
||||||
_NETRC_MACHINE = 'niconico'
|
_NETRC_MACHINE = 'niconico'
|
||||||
|
|
||||||
|
_API_HEADERS = {
|
||||||
|
'X-Frontend-ID': '6',
|
||||||
|
'X-Frontend-Version': '0'
|
||||||
|
}
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
self._login()
|
self._login()
|
||||||
|
|
||||||
|
@ -188,40 +196,92 @@ class NiconicoIE(InfoExtractor):
|
||||||
if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
|
if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
|
||||||
login_ok = False
|
login_ok = False
|
||||||
if not login_ok:
|
if not login_ok:
|
||||||
self._downloader.report_warning('unable to log in: bad username or password')
|
self.report_warning('unable to log in: bad username or password')
|
||||||
return login_ok
|
return login_ok
|
||||||
|
|
||||||
def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
|
def _get_heartbeat_info(self, info_dict):
|
||||||
def yesno(boolean):
|
|
||||||
return 'yes' if boolean else 'no'
|
|
||||||
|
|
||||||
session_api_data = api_data['video']['dmcInfo']['session_api']
|
video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
|
||||||
session_api_endpoint = session_api_data['urls'][0]
|
|
||||||
|
|
||||||
format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
|
api_data = (
|
||||||
|
info_dict.get('_api_data')
|
||||||
|
or self._parse_json(
|
||||||
|
self._html_search_regex(
|
||||||
|
'data-api-data="([^"]+)"',
|
||||||
|
self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
|
||||||
|
'API data', default='{}'),
|
||||||
|
video_id))
|
||||||
|
|
||||||
|
session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
|
||||||
|
session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
|
||||||
|
|
||||||
|
def ping():
|
||||||
|
status = try_get(
|
||||||
|
self._download_json(
|
||||||
|
'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id,
|
||||||
|
query={'t': try_get(api_data, lambda x: x['media']['delivery']['trackingId'])},
|
||||||
|
note='Acquiring permission for downloading video',
|
||||||
|
headers=self._API_HEADERS),
|
||||||
|
lambda x: x['meta']['status'])
|
||||||
|
if status != 200:
|
||||||
|
self.report_warning('Failed to acquire permission for playing video. The video may not download.')
|
||||||
|
|
||||||
|
yesno = lambda x: 'yes' if x else 'no'
|
||||||
|
|
||||||
|
# m3u8 (encryption)
|
||||||
|
if try_get(api_data, lambda x: x['media']['delivery']['encryption']) is not None:
|
||||||
|
protocol = 'm3u8'
|
||||||
|
encryption = self._parse_json(session_api_data['token'], video_id)['hls_encryption']
|
||||||
|
session_api_http_parameters = {
|
||||||
|
'parameters': {
|
||||||
|
'hls_parameters': {
|
||||||
|
'encryption': {
|
||||||
|
encryption: {
|
||||||
|
'encrypted_key': try_get(api_data, lambda x: x['media']['delivery']['encryption']['encryptedKey']),
|
||||||
|
'key_uri': try_get(api_data, lambda x: x['media']['delivery']['encryption']['keyUri'])
|
||||||
|
}
|
||||||
|
},
|
||||||
|
'transfer_preset': '',
|
||||||
|
'use_ssl': yesno(session_api_endpoint['isSsl']),
|
||||||
|
'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
|
||||||
|
'segment_duration': 6000,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# http
|
||||||
|
else:
|
||||||
|
protocol = 'http'
|
||||||
|
session_api_http_parameters = {
|
||||||
|
'parameters': {
|
||||||
|
'http_output_download_parameters': {
|
||||||
|
'use_ssl': yesno(session_api_endpoint['isSsl']),
|
||||||
|
'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
session_response = self._download_json(
|
session_response = self._download_json(
|
||||||
session_api_endpoint['url'], video_id,
|
session_api_endpoint['url'], video_id,
|
||||||
query={'_format': 'json'},
|
query={'_format': 'json'},
|
||||||
headers={'Content-Type': 'application/json'},
|
headers={'Content-Type': 'application/json'},
|
||||||
note='Downloading JSON metadata for %s' % format_id,
|
note='Downloading JSON metadata for %s' % info_dict['format_id'],
|
||||||
data=json.dumps({
|
data=json.dumps({
|
||||||
'session': {
|
'session': {
|
||||||
'client_info': {
|
'client_info': {
|
||||||
'player_id': session_api_data['player_id'],
|
'player_id': session_api_data.get('playerId'),
|
||||||
},
|
},
|
||||||
'content_auth': {
|
'content_auth': {
|
||||||
'auth_type': session_api_data['auth_types'][session_api_data['protocols'][0]],
|
'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
|
||||||
'content_key_timeout': session_api_data['content_key_timeout'],
|
'content_key_timeout': session_api_data.get('contentKeyTimeout'),
|
||||||
'service_id': 'nicovideo',
|
'service_id': 'nicovideo',
|
||||||
'service_user_id': session_api_data['service_user_id']
|
'service_user_id': session_api_data.get('serviceUserId')
|
||||||
},
|
},
|
||||||
'content_id': session_api_data['content_id'],
|
'content_id': session_api_data.get('contentId'),
|
||||||
'content_src_id_sets': [{
|
'content_src_id_sets': [{
|
||||||
'content_src_ids': [{
|
'content_src_ids': [{
|
||||||
'src_id_to_mux': {
|
'src_id_to_mux': {
|
||||||
'audio_src_ids': [audio_quality['id']],
|
'audio_src_ids': [audio_src_id],
|
||||||
'video_src_ids': [video_quality['id']],
|
'video_src_ids': [video_src_id],
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
}],
|
}],
|
||||||
|
@ -229,52 +289,81 @@ class NiconicoIE(InfoExtractor):
|
||||||
'content_uri': '',
|
'content_uri': '',
|
||||||
'keep_method': {
|
'keep_method': {
|
||||||
'heartbeat': {
|
'heartbeat': {
|
||||||
'lifetime': session_api_data['heartbeat_lifetime']
|
'lifetime': session_api_data.get('heartbeatLifetime')
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'priority': session_api_data['priority'],
|
'priority': session_api_data.get('priority'),
|
||||||
'protocol': {
|
'protocol': {
|
||||||
'name': 'http',
|
'name': 'http',
|
||||||
'parameters': {
|
'parameters': {
|
||||||
'http_parameters': {
|
'http_parameters': session_api_http_parameters
|
||||||
'parameters': {
|
|
||||||
'http_output_download_parameters': {
|
|
||||||
'use_ssl': yesno(session_api_endpoint['is_ssl']),
|
|
||||||
'use_well_known_port': yesno(session_api_endpoint['is_well_known_port']),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'recipe_id': session_api_data['recipe_id'],
|
'recipe_id': session_api_data.get('recipeId'),
|
||||||
'session_operation_auth': {
|
'session_operation_auth': {
|
||||||
'session_operation_auth_by_signature': {
|
'session_operation_auth_by_signature': {
|
||||||
'signature': session_api_data['signature'],
|
'signature': session_api_data.get('signature'),
|
||||||
'token': session_api_data['token'],
|
'token': session_api_data.get('token'),
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
'timing_constraint': 'unlimited'
|
'timing_constraint': 'unlimited'
|
||||||
}
|
}
|
||||||
}).encode())
|
}).encode())
|
||||||
|
|
||||||
resolution = video_quality.get('resolution', {})
|
info_dict['url'] = session_response['data']['session']['content_uri']
|
||||||
|
info_dict['protocol'] = protocol
|
||||||
|
|
||||||
|
# get heartbeat info
|
||||||
|
heartbeat_info_dict = {
|
||||||
|
'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
|
||||||
|
'data': json.dumps(session_response['data']),
|
||||||
|
# interval, convert milliseconds to seconds, then halve to make a buffer.
|
||||||
|
'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
|
||||||
|
'ping': ping
|
||||||
|
}
|
||||||
|
|
||||||
|
return info_dict, heartbeat_info_dict
|
||||||
|
|
||||||
|
def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
|
||||||
|
def parse_format_id(id_code):
|
||||||
|
mobj = re.match(r'''(?x)
|
||||||
|
(?:archive_)?
|
||||||
|
(?:(?P<codec>[^_]+)_)?
|
||||||
|
(?:(?P<br>[\d]+)kbps_)?
|
||||||
|
(?:(?P<res>[\d+]+)p_)?
|
||||||
|
''', '%s_' % id_code)
|
||||||
|
return mobj.groupdict() if mobj else {}
|
||||||
|
|
||||||
|
protocol = 'niconico_dmc'
|
||||||
|
format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
|
||||||
|
vdict = parse_format_id(video_quality['id'])
|
||||||
|
adict = parse_format_id(audio_quality['id'])
|
||||||
|
resolution = try_get(video_quality, lambda x: x['metadata']['resolution'], dict) or {'height': vdict.get('res')}
|
||||||
|
vbr = try_get(video_quality, lambda x: x['metadata']['bitrate'], float)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'url': session_response['data']['session']['content_uri'],
|
'url': '%s:%s/%s/%s' % (protocol, video_id, video_quality['id'], audio_quality['id']),
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
|
'format_note': 'DMC %s' % try_get(video_quality, lambda x: x['metadata']['label'], compat_str),
|
||||||
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
|
||||||
'abr': float_or_none(audio_quality.get('bitrate'), 1000),
|
'vcodec': vdict.get('codec'),
|
||||||
'vbr': float_or_none(video_quality.get('bitrate'), 1000),
|
'acodec': adict.get('codec'),
|
||||||
'height': resolution.get('height'),
|
'vbr': float_or_none(vbr, 1000) or float_or_none(vdict.get('br')),
|
||||||
'width': resolution.get('width'),
|
'abr': float_or_none(audio_quality.get('bitrate'), 1000) or float_or_none(adict.get('br')),
|
||||||
|
'height': int_or_none(resolution.get('height', vdict.get('res'))),
|
||||||
|
'width': int_or_none(resolution.get('width')),
|
||||||
|
'quality': -2 if 'low' in format_id else -1, # Default quality value is -1
|
||||||
|
'protocol': protocol,
|
||||||
|
'http_headers': {
|
||||||
|
'Origin': 'https://www.nicovideo.jp',
|
||||||
|
'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
# Get video webpage. We are not actually interested in it for normal
|
# Get video webpage for API data.
|
||||||
# cases, but need the cookies in order to be able to download the
|
|
||||||
# info webpage
|
|
||||||
webpage, handle = self._download_webpage_handle(
|
webpage, handle = self._download_webpage_handle(
|
||||||
'http://www.nicovideo.jp/watch/' + video_id, video_id)
|
'http://www.nicovideo.jp/watch/' + video_id, video_id)
|
||||||
if video_id.startswith('so'):
|
if video_id.startswith('so'):
|
||||||
|
@ -284,86 +373,136 @@ class NiconicoIE(InfoExtractor):
|
||||||
'data-api-data="([^"]+)"', webpage,
|
'data-api-data="([^"]+)"', webpage,
|
||||||
'API data', default='{}'), video_id)
|
'API data', default='{}'), video_id)
|
||||||
|
|
||||||
def _format_id_from_url(video_url):
|
def get_video_info_web(items):
|
||||||
return 'economy' if video_real_url.endswith('low') else 'normal'
|
return dict_get(api_data['video'], items)
|
||||||
|
|
||||||
try:
|
# Get video info
|
||||||
video_real_url = api_data['video']['smileInfo']['url']
|
video_info_xml = self._download_xml(
|
||||||
except KeyError: # Flash videos
|
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
|
||||||
# Get flv info
|
video_id, note='Downloading video info page')
|
||||||
flv_info_webpage = self._download_webpage(
|
|
||||||
'http://flapi.nicovideo.jp/api/getflv/' + video_id + '?as3=1',
|
|
||||||
video_id, 'Downloading flv info')
|
|
||||||
|
|
||||||
flv_info = compat_parse_qs(flv_info_webpage)
|
def get_video_info_xml(items):
|
||||||
if 'url' not in flv_info:
|
if not isinstance(items, list):
|
||||||
if 'deleted' in flv_info:
|
items = [items]
|
||||||
raise ExtractorError('The video has been deleted.',
|
for item in items:
|
||||||
expected=True)
|
ret = xpath_text(video_info_xml, './/' + item)
|
||||||
elif 'closed' in flv_info:
|
if ret:
|
||||||
raise ExtractorError('Niconico videos now require logging in',
|
return ret
|
||||||
expected=True)
|
|
||||||
elif 'error' in flv_info:
|
|
||||||
raise ExtractorError('%s reports error: %s' % (
|
|
||||||
self.IE_NAME, flv_info['error'][0]), expected=True)
|
|
||||||
else:
|
|
||||||
raise ExtractorError('Unable to find video URL')
|
|
||||||
|
|
||||||
video_info_xml = self._download_xml(
|
if get_video_info_xml('error'):
|
||||||
'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
|
error_code = get_video_info_xml('code')
|
||||||
video_id, note='Downloading video info page')
|
|
||||||
|
|
||||||
def get_video_info(items):
|
if error_code == 'DELETED':
|
||||||
if not isinstance(items, list):
|
raise ExtractorError('The video has been deleted.',
|
||||||
items = [items]
|
expected=True)
|
||||||
for item in items:
|
elif error_code == 'NOT_FOUND':
|
||||||
ret = xpath_text(video_info_xml, './/' + item)
|
raise ExtractorError('The video is not found.',
|
||||||
if ret:
|
expected=True)
|
||||||
return ret
|
elif error_code == 'COMMUNITY':
|
||||||
|
self.to_screen('%s: The video is community members only.' % video_id)
|
||||||
|
else:
|
||||||
|
raise ExtractorError('%s reports error: %s' % (self.IE_NAME, error_code))
|
||||||
|
|
||||||
video_real_url = flv_info['url'][0]
|
# Start extracting video formats
|
||||||
|
formats = []
|
||||||
|
|
||||||
extension = get_video_info('movie_type')
|
# Get HTML5 videos info
|
||||||
if not extension:
|
quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie'])
|
||||||
extension = determine_ext(video_real_url)
|
if not quality_info:
|
||||||
|
raise ExtractorError('The video can\'t be downloaded', expected=True)
|
||||||
|
|
||||||
formats = [{
|
for audio_quality in quality_info.get('audios') or {}:
|
||||||
'url': video_real_url,
|
for video_quality in quality_info.get('videos') or {}:
|
||||||
'ext': extension,
|
if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
|
||||||
'format_id': _format_id_from_url(video_real_url),
|
continue
|
||||||
}]
|
formats.append(self._extract_format_for_quality(
|
||||||
else:
|
api_data, video_id, audio_quality, video_quality))
|
||||||
formats = []
|
|
||||||
|
|
||||||
dmc_info = api_data['video'].get('dmcInfo')
|
# Get flv/swf info
|
||||||
if dmc_info: # "New" HTML5 videos
|
timestamp = None
|
||||||
quality_info = dmc_info['quality']
|
video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url'])
|
||||||
for audio_quality in quality_info['audios']:
|
if video_real_url:
|
||||||
for video_quality in quality_info['videos']:
|
is_economy = video_real_url.endswith('low')
|
||||||
if not audio_quality['available'] or not video_quality['available']:
|
|
||||||
continue
|
|
||||||
formats.append(self._extract_format_for_quality(
|
|
||||||
api_data, video_id, audio_quality, video_quality))
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
if is_economy:
|
||||||
else: # "Old" HTML5 videos
|
self.report_warning('Site is currently in economy mode! You will only have access to lower quality streams')
|
||||||
formats = [{
|
|
||||||
|
# Invoking ffprobe to determine resolution
|
||||||
|
pp = FFmpegPostProcessor(self._downloader)
|
||||||
|
cookies = self._get_cookies('https://nicovideo.jp').output(header='', sep='; path=/; domain=nicovideo.jp;\n')
|
||||||
|
|
||||||
|
self.to_screen('%s: %s' % (video_id, 'Checking smile format with ffprobe'))
|
||||||
|
|
||||||
|
try:
|
||||||
|
metadata = pp.get_metadata_object(video_real_url, ['-cookies', cookies])
|
||||||
|
except PostProcessingError as err:
|
||||||
|
raise ExtractorError(err.msg, expected=True)
|
||||||
|
|
||||||
|
v_stream = a_stream = {}
|
||||||
|
|
||||||
|
# Some complex swf files doesn't have video stream (e.g. nm4809023)
|
||||||
|
for stream in metadata['streams']:
|
||||||
|
if stream['codec_type'] == 'video':
|
||||||
|
v_stream = stream
|
||||||
|
elif stream['codec_type'] == 'audio':
|
||||||
|
a_stream = stream
|
||||||
|
|
||||||
|
# Community restricted videos seem to have issues with the thumb API not returning anything at all
|
||||||
|
filesize = int(
|
||||||
|
(get_video_info_xml('size_high') if not is_economy else get_video_info_xml('size_low'))
|
||||||
|
or metadata['format']['size']
|
||||||
|
)
|
||||||
|
extension = (
|
||||||
|
get_video_info_xml('movie_type')
|
||||||
|
or 'mp4' if 'mp4' in metadata['format']['format_name'] else metadata['format']['format_name']
|
||||||
|
)
|
||||||
|
|
||||||
|
# 'creation_time' tag on video stream of re-encoded SMILEVIDEO mp4 files are '1970-01-01T00:00:00.000000Z'.
|
||||||
|
timestamp = (
|
||||||
|
parse_iso8601(get_video_info_web('first_retrieve'))
|
||||||
|
or unified_timestamp(get_video_info_web('postedDateTime'))
|
||||||
|
)
|
||||||
|
metadata_timestamp = (
|
||||||
|
parse_iso8601(try_get(v_stream, lambda x: x['tags']['creation_time']))
|
||||||
|
or timestamp if extension != 'mp4' else 0
|
||||||
|
)
|
||||||
|
|
||||||
|
# According to compconf, smile videos from pre-2017 are always better quality than their DMC counterparts
|
||||||
|
smile_threshold_timestamp = parse_iso8601('2016-12-08T00:00:00+09:00')
|
||||||
|
|
||||||
|
is_source = timestamp < smile_threshold_timestamp or metadata_timestamp > 0
|
||||||
|
|
||||||
|
# If movie file size is unstable, old server movie is not source movie.
|
||||||
|
if filesize > 1:
|
||||||
|
formats.append({
|
||||||
'url': video_real_url,
|
'url': video_real_url,
|
||||||
'ext': 'mp4',
|
'format_id': 'smile' if not is_economy else 'smile_low',
|
||||||
'format_id': _format_id_from_url(video_real_url),
|
'format_note': 'SMILEVIDEO source' if not is_economy else 'SMILEVIDEO low quality',
|
||||||
}]
|
'ext': extension,
|
||||||
|
'container': extension,
|
||||||
|
'vcodec': v_stream.get('codec_name'),
|
||||||
|
'acodec': a_stream.get('codec_name'),
|
||||||
|
# Some complex swf files doesn't have total bit rate metadata (e.g. nm6049209)
|
||||||
|
'tbr': int_or_none(metadata['format'].get('bit_rate'), scale=1000),
|
||||||
|
'vbr': int_or_none(v_stream.get('bit_rate'), scale=1000),
|
||||||
|
'abr': int_or_none(a_stream.get('bit_rate'), scale=1000),
|
||||||
|
'height': int_or_none(v_stream.get('height')),
|
||||||
|
'width': int_or_none(v_stream.get('width')),
|
||||||
|
'source_preference': 5 if not is_economy else -2,
|
||||||
|
'quality': 5 if is_source and not is_economy else None,
|
||||||
|
'filesize': filesize
|
||||||
|
})
|
||||||
|
|
||||||
def get_video_info(items):
|
self._sort_formats(formats)
|
||||||
return dict_get(api_data['video'], items)
|
|
||||||
|
|
||||||
# Start extracting information
|
# Start extracting information
|
||||||
title = get_video_info('title')
|
title = (
|
||||||
if not title:
|
get_video_info_xml('title') # prefer to get the untranslated original title
|
||||||
title = self._og_search_title(webpage, default=None)
|
or get_video_info_web(['originalTitle', 'title'])
|
||||||
if not title:
|
or self._og_search_title(webpage, default=None)
|
||||||
title = self._html_search_regex(
|
or self._html_search_regex(
|
||||||
r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
|
r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
|
||||||
webpage, 'video title')
|
webpage, 'video title'))
|
||||||
|
|
||||||
watch_api_data_string = self._html_search_regex(
|
watch_api_data_string = self._html_search_regex(
|
||||||
r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
|
r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
|
||||||
|
@ -372,14 +511,15 @@ class NiconicoIE(InfoExtractor):
|
||||||
video_detail = watch_api_data.get('videoDetail', {})
|
video_detail = watch_api_data.get('videoDetail', {})
|
||||||
|
|
||||||
thumbnail = (
|
thumbnail = (
|
||||||
get_video_info(['thumbnail_url', 'thumbnailURL'])
|
self._html_search_regex(r'<meta property="og:image" content="([^"]+)">', webpage, 'thumbnail data', default=None)
|
||||||
|
or dict_get( # choose highest from 720p to 240p
|
||||||
|
get_video_info_web('thumbnail'),
|
||||||
|
['ogp', 'player', 'largeUrl', 'middleUrl', 'url'])
|
||||||
or self._html_search_meta('image', webpage, 'thumbnail', default=None)
|
or self._html_search_meta('image', webpage, 'thumbnail', default=None)
|
||||||
or video_detail.get('thumbnail'))
|
or video_detail.get('thumbnail'))
|
||||||
|
|
||||||
description = get_video_info('description')
|
description = get_video_info_web('description')
|
||||||
|
|
||||||
timestamp = (parse_iso8601(get_video_info('first_retrieve'))
|
|
||||||
or unified_timestamp(get_video_info('postedDateTime')))
|
|
||||||
if not timestamp:
|
if not timestamp:
|
||||||
match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
|
match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
|
||||||
if match:
|
if match:
|
||||||
|
@ -388,19 +528,25 @@ class NiconicoIE(InfoExtractor):
|
||||||
timestamp = parse_iso8601(
|
timestamp = parse_iso8601(
|
||||||
video_detail['postedAt'].replace('/', '-'),
|
video_detail['postedAt'].replace('/', '-'),
|
||||||
delimiter=' ', timezone=datetime.timedelta(hours=9))
|
delimiter=' ', timezone=datetime.timedelta(hours=9))
|
||||||
|
timestamp = timestamp or try_get(api_data, lambda x: parse_iso8601(x['video']['registeredAt']))
|
||||||
|
|
||||||
view_count = int_or_none(get_video_info(['view_counter', 'viewCount']))
|
view_count = int_or_none(get_video_info_web(['view_counter', 'viewCount']))
|
||||||
if not view_count:
|
if not view_count:
|
||||||
match = self._html_search_regex(
|
match = self._html_search_regex(
|
||||||
r'>Views: <strong[^>]*>([^<]+)</strong>',
|
r'>Views: <strong[^>]*>([^<]+)</strong>',
|
||||||
webpage, 'view count', default=None)
|
webpage, 'view count', default=None)
|
||||||
if match:
|
if match:
|
||||||
view_count = int_or_none(match.replace(',', ''))
|
view_count = int_or_none(match.replace(',', ''))
|
||||||
view_count = view_count or video_detail.get('viewCount')
|
view_count = (
|
||||||
|
view_count
|
||||||
|
or video_detail.get('viewCount')
|
||||||
|
or try_get(api_data, lambda x: x['video']['count']['view']))
|
||||||
|
|
||||||
|
comment_count = (
|
||||||
|
int_or_none(get_video_info_web('comment_num'))
|
||||||
|
or video_detail.get('commentCount')
|
||||||
|
or try_get(api_data, lambda x: x['video']['count']['comment']))
|
||||||
|
|
||||||
comment_count = (int_or_none(get_video_info('comment_num'))
|
|
||||||
or video_detail.get('commentCount')
|
|
||||||
or try_get(api_data, lambda x: x['thread']['commentCount']))
|
|
||||||
if not comment_count:
|
if not comment_count:
|
||||||
match = self._html_search_regex(
|
match = self._html_search_regex(
|
||||||
r'>Comments: <strong[^>]*>([^<]+)</strong>',
|
r'>Comments: <strong[^>]*>([^<]+)</strong>',
|
||||||
|
@ -409,22 +555,41 @@ class NiconicoIE(InfoExtractor):
|
||||||
comment_count = int_or_none(match.replace(',', ''))
|
comment_count = int_or_none(match.replace(',', ''))
|
||||||
|
|
||||||
duration = (parse_duration(
|
duration = (parse_duration(
|
||||||
get_video_info('length')
|
get_video_info_web('length')
|
||||||
or self._html_search_meta(
|
or self._html_search_meta(
|
||||||
'video:duration', webpage, 'video duration', default=None))
|
'video:duration', webpage, 'video duration', default=None))
|
||||||
or video_detail.get('length')
|
or video_detail.get('length')
|
||||||
or get_video_info('duration'))
|
or get_video_info_web('duration'))
|
||||||
|
|
||||||
webpage_url = get_video_info('watch_url') or url
|
webpage_url = get_video_info_web('watch_url') or url
|
||||||
|
|
||||||
|
# for channel movie and community movie
|
||||||
|
channel_id = try_get(
|
||||||
|
api_data,
|
||||||
|
(lambda x: x['channel']['globalId'],
|
||||||
|
lambda x: x['community']['globalId']))
|
||||||
|
channel = try_get(
|
||||||
|
api_data,
|
||||||
|
(lambda x: x['channel']['name'],
|
||||||
|
lambda x: x['community']['name']))
|
||||||
|
|
||||||
# Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
|
# Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
|
||||||
# in the JSON, which will cause None to be returned instead of {}.
|
# in the JSON, which will cause None to be returned instead of {}.
|
||||||
owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
|
owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
|
||||||
uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
|
uploader_id = str_or_none(
|
||||||
uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')
|
get_video_info_web(['ch_id', 'user_id'])
|
||||||
|
or owner.get('id')
|
||||||
|
or channel_id
|
||||||
|
)
|
||||||
|
uploader = (
|
||||||
|
get_video_info_web(['ch_name', 'user_nickname'])
|
||||||
|
or owner.get('nickname')
|
||||||
|
or channel
|
||||||
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
'_api_data': api_data,
|
||||||
'title': title,
|
'title': title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': thumbnail,
|
'thumbnail': thumbnail,
|
||||||
|
@ -432,6 +597,8 @@ class NiconicoIE(InfoExtractor):
|
||||||
'uploader': uploader,
|
'uploader': uploader,
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'uploader_id': uploader_id,
|
'uploader_id': uploader_id,
|
||||||
|
'channel': channel,
|
||||||
|
'channel_id': channel_id,
|
||||||
'view_count': view_count,
|
'view_count': view_count,
|
||||||
'comment_count': comment_count,
|
'comment_count': comment_count,
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
|
@ -440,7 +607,7 @@ class NiconicoIE(InfoExtractor):
|
||||||
|
|
||||||
|
|
||||||
class NiconicoPlaylistIE(InfoExtractor):
|
class NiconicoPlaylistIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/)?mylist/(?P<id>\d+)'
|
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P<id>\d+)'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
'url': 'http://www.nicovideo.jp/mylist/27411728',
|
||||||
|
@ -456,60 +623,77 @@ class NiconicoPlaylistIE(InfoExtractor):
|
||||||
'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
|
'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
_PAGE_SIZE = 100
|
|
||||||
|
|
||||||
def _call_api(self, list_id, resource, query):
|
_API_HEADERS = {
|
||||||
return self._download_json(
|
'X-Frontend-ID': '6',
|
||||||
'https://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
|
'X-Frontend-Version': '0'
|
||||||
'Downloading %s JSON metatdata' % resource, query=query,
|
}
|
||||||
headers={'X-Frontend-Id': 6})['data']['mylist']
|
|
||||||
|
|
||||||
def _parse_owner(self, item):
|
|
||||||
owner = item.get('owner') or {}
|
|
||||||
if owner:
|
|
||||||
return {
|
|
||||||
'uploader': owner.get('name'),
|
|
||||||
'uploader_id': owner.get('id'),
|
|
||||||
}
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def _fetch_page(self, list_id, page):
|
|
||||||
page += 1
|
|
||||||
items = self._call_api(list_id, 'page %d' % page, {
|
|
||||||
'page': page,
|
|
||||||
'pageSize': self._PAGE_SIZE,
|
|
||||||
})['items']
|
|
||||||
for item in items:
|
|
||||||
video = item.get('video') or {}
|
|
||||||
video_id = video.get('id')
|
|
||||||
if not video_id:
|
|
||||||
continue
|
|
||||||
count = video.get('count') or {}
|
|
||||||
get_count = lambda x: int_or_none(count.get(x))
|
|
||||||
info = {
|
|
||||||
'_type': 'url',
|
|
||||||
'id': video_id,
|
|
||||||
'title': video.get('title'),
|
|
||||||
'url': 'https://www.nicovideo.jp/watch/' + video_id,
|
|
||||||
'description': video.get('shortDescription'),
|
|
||||||
'duration': int_or_none(video.get('duration')),
|
|
||||||
'view_count': get_count('view'),
|
|
||||||
'comment_count': get_count('comment'),
|
|
||||||
'ie_key': NiconicoIE.ie_key(),
|
|
||||||
}
|
|
||||||
info.update(self._parse_owner(video))
|
|
||||||
yield info
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
list_id = self._match_id(url)
|
list_id = self._match_id(url)
|
||||||
mylist = self._call_api(list_id, 'list', {
|
|
||||||
'pageSize': 1,
|
def get_page_data(pagenum, pagesize):
|
||||||
})
|
return self._download_json(
|
||||||
entries = InAdvancePagedList(
|
'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
|
||||||
functools.partial(self._fetch_page, list_id),
|
query={'page': 1 + pagenum, 'pageSize': pagesize},
|
||||||
math.ceil(mylist['totalItemCount'] / self._PAGE_SIZE),
|
headers=self._API_HEADERS).get('data').get('mylist')
|
||||||
self._PAGE_SIZE)
|
|
||||||
result = self.playlist_result(
|
data = get_page_data(0, 1)
|
||||||
entries, list_id, mylist.get('name'), mylist.get('description'))
|
title = data.get('name')
|
||||||
result.update(self._parse_owner(mylist))
|
description = data.get('description')
|
||||||
return result
|
uploader = data.get('owner').get('name')
|
||||||
|
uploader_id = data.get('owner').get('id')
|
||||||
|
|
||||||
|
def pagefunc(pagenum):
|
||||||
|
data = get_page_data(pagenum, 25)
|
||||||
|
return ({
|
||||||
|
'_type': 'url',
|
||||||
|
'url': 'http://www.nicovideo.jp/watch/' + item.get('watchId'),
|
||||||
|
} for item in data.get('items'))
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': list_id,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'uploader': uploader,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'entries': OnDemandPagedList(pagefunc, 25),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class NiconicoUserIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
|
||||||
|
_TEST = {
|
||||||
|
'url': 'https://www.nicovideo.jp/user/419948',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '419948',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 101,
|
||||||
|
}
|
||||||
|
_API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
|
||||||
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
|
_API_HEADERS = {
|
||||||
|
'X-Frontend-ID': '6',
|
||||||
|
'X-Frontend-Version': '0'
|
||||||
|
}
|
||||||
|
|
||||||
|
def _entries(self, list_id, ):
|
||||||
|
total_count = 1
|
||||||
|
count = page_num = 0
|
||||||
|
while count < total_count:
|
||||||
|
json_parsed = self._download_json(
|
||||||
|
self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
|
||||||
|
headers=self._API_HEADERS,
|
||||||
|
note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
|
||||||
|
if not page_num:
|
||||||
|
total_count = int_or_none(json_parsed['data'].get('totalCount'))
|
||||||
|
for entry in json_parsed["data"]["items"]:
|
||||||
|
count += 1
|
||||||
|
yield self.url_result('https://www.nicovideo.jp/watch/%s' % entry['id'])
|
||||||
|
page_num += 1
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
list_id = self._match_id(url)
|
||||||
|
return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())
|
||||||
|
|
|
@ -1,100 +0,0 @@
|
||||||
# coding: utf-8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
|
||||||
from ..utils import (
|
|
||||||
determine_ext,
|
|
||||||
js_to_json,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class NinatekaIE(InfoExtractor):
|
|
||||||
IE_NAME = 'ninateka'
|
|
||||||
_VALID_URL = r'https?://ninateka\.pl/(?:film|audio)/(?P<id>[^/\?#]+)'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://ninateka.pl/film/dziwne-przygody-kota-filemona-7',
|
|
||||||
'md5': '8b25c2998b48e1add7d93a5e27030786',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'dziwne-przygody-kota-filemona-7',
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': 'Dziwny świat kota Filemona | Poważne zmartwienie',
|
|
||||||
'description': 'Filemon ma kłopot z własnym wyglądem, czy uda mu się z nim uporać?',
|
|
||||||
},
|
|
||||||
}, {
|
|
||||||
'url': 'https://ninateka.pl/audio/telefon-drony-fisz-1-12',
|
|
||||||
'md5': 'fa03fc229d3b4d8eaa18976a7020909e',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'telefon-drony-fisz-1-12',
|
|
||||||
'ext': 'm4a',
|
|
||||||
'title': 'Telefon | Drony | Fisz Emade Tworzywo | 1/12',
|
|
||||||
'description': 'Utwór z długo wyczekiwanego albumu studyjnego Fisz Emade Tworzywo pt. „Drony”',
|
|
||||||
},
|
|
||||||
}]
|
|
||||||
|
|
||||||
def decode_url(self, encoded):
|
|
||||||
xor_val = ord('h') ^ ord(encoded[0])
|
|
||||||
return ''.join(chr(ord(c) ^ xor_val) for c in encoded)
|
|
||||||
|
|
||||||
def extract_formats(self, data, video_id, name):
|
|
||||||
info = self._parse_json(data, video_id, transform_source=js_to_json)
|
|
||||||
formats = []
|
|
||||||
|
|
||||||
for source_info in info['sources']:
|
|
||||||
url = self.decode_url(source_info['src'])
|
|
||||||
type_ = source_info.get('type')
|
|
||||||
|
|
||||||
if type_ == 'application/vnd.ms-sstr+xml' or url.endswith('/Manifest'):
|
|
||||||
formats.extend(self._extract_ism_formats(
|
|
||||||
url, video_id, ism_id='mss-{}'.format(name), fatal=False))
|
|
||||||
|
|
||||||
elif type_ == 'application/x-mpegURL' or url.endswith('.m3u8'):
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
url, video_id, ext='mp4', m3u8_id='hls-{}'.format(name), fatal=False))
|
|
||||||
|
|
||||||
elif type_ == 'application/dash+xml' or url.endswith('.mpd'):
|
|
||||||
formats.extend(self._extract_mpd_formats(
|
|
||||||
url, video_id, mpd_id='dash-{}'.format(name), fatal=False))
|
|
||||||
|
|
||||||
elif url.endswith('.f4m'):
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
url, video_id, f4m_id='hds-{}'.format(name), fatal=False))
|
|
||||||
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'format_id': 'direct-{}'.format(name),
|
|
||||||
'url': url,
|
|
||||||
'ext': determine_ext(url, 'mp4'),
|
|
||||||
})
|
|
||||||
|
|
||||||
return formats
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
main = self._search_regex(
|
|
||||||
r'(?m)(?:var|let|const)\s+playerOptionsWithMainSource\s*=\s*(\{.*?\})\s*;\s*?$',
|
|
||||||
webpage, 'main source')
|
|
||||||
formats = self.extract_formats(main, video_id, 'main')
|
|
||||||
|
|
||||||
audiodesc = self._search_regex(
|
|
||||||
r'(?m)(?:var|let|const)\s+playerOptionsWithAudioDescriptionSource\s*=\s*(\{.*?\})\s*;\s*?$',
|
|
||||||
webpage, 'audio description', default=None)
|
|
||||||
if audiodesc:
|
|
||||||
formats.extend(self.extract_formats(audiodesc, video_id, 'audiodescription'))
|
|
||||||
|
|
||||||
english_ver = self._search_regex(
|
|
||||||
r'(?m)(?:var|let|const)\s+playerOptionsWithEnglishVersion\s*=\s*(\{.*?\})\s*;\s*?$',
|
|
||||||
webpage, 'english version', default=None)
|
|
||||||
if english_ver:
|
|
||||||
formats.extend(self.extract_formats(english_ver, video_id, 'english'))
|
|
||||||
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'title': self._og_search_title(webpage),
|
|
||||||
'formats': formats,
|
|
||||||
'description': self._og_search_description(webpage),
|
|
||||||
'thumbnail': self._og_search_thumbnail(webpage),
|
|
||||||
}
|
|
|
@ -23,11 +23,9 @@ class NineCNineMediaIE(InfoExtractor):
|
||||||
destination_code, content_id = re.match(self._VALID_URL, url).groups()
|
destination_code, content_id = re.match(self._VALID_URL, url).groups()
|
||||||
api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id)
|
api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id)
|
||||||
content = self._download_json(api_base_url, content_id, query={
|
content = self._download_json(api_base_url, content_id, query={
|
||||||
'$include': '[Media,Season,ContentPackages]',
|
'$include': '[Media.Name,Season,ContentPackages.Duration,ContentPackages.Id]',
|
||||||
})
|
})
|
||||||
title = content['Name']
|
title = content['Name']
|
||||||
if len(content['ContentPackages']) > 1:
|
|
||||||
raise ExtractorError('multiple content packages')
|
|
||||||
content_package = content['ContentPackages'][0]
|
content_package = content['ContentPackages'][0]
|
||||||
package_id = content_package['Id']
|
package_id = content_package['Id']
|
||||||
content_package_url = api_base_url + 'contentpackages/%s/' % package_id
|
content_package_url = api_base_url + 'contentpackages/%s/' % package_id
|
||||||
|
|
|
@ -115,7 +115,7 @@ class NocoIE(InfoExtractor):
|
||||||
|
|
||||||
# Timestamp adjustment offset between server time and local time
|
# Timestamp adjustment offset between server time and local time
|
||||||
# must be calculated in order to use timestamps closest to server's
|
# must be calculated in order to use timestamps closest to server's
|
||||||
# in all API requests (see https://github.com/ytdl-org/haruhi-dl/issues/7864)
|
# in all API requests (see https://github.com/ytdl-org/youtube-dl/issues/7864)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
player_url = self._search_regex(
|
player_url = self._search_regex(
|
||||||
|
|
|
@ -58,7 +58,7 @@ class NRKBaseIE(InfoExtractor):
|
||||||
|
|
||||||
def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
|
def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
urljoin('http://psapi.nrk.no/', path),
|
urljoin('https://psapi.nrk.no/', path),
|
||||||
video_id, note or 'Downloading %s JSON' % item,
|
video_id, note or 'Downloading %s JSON' % item,
|
||||||
fatal=fatal, query=query,
|
fatal=fatal, query=query,
|
||||||
headers={'Accept-Encoding': 'gzip, deflate, br'})
|
headers={'Accept-Encoding': 'gzip, deflate, br'})
|
||||||
|
|
|
@ -21,7 +21,7 @@ class OnceIE(InfoExtractor):
|
||||||
progressive_formats = []
|
progressive_formats = []
|
||||||
for adaptive_format in formats:
|
for adaptive_format in formats:
|
||||||
# Prevent advertisement from embedding into m3u8 playlist (see
|
# Prevent advertisement from embedding into m3u8 playlist (see
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/8893#issuecomment-199912684)
|
# https://github.com/ytdl-org/youtube-dl/issues/8893#issuecomment-199912684)
|
||||||
adaptive_format['url'] = re.sub(
|
adaptive_format['url'] = re.sub(
|
||||||
r'\badsegmentlength=\d+', r'adsegmentlength=0', adaptive_format['url'])
|
r'\badsegmentlength=\d+', r'adsegmentlength=0', adaptive_format['url'])
|
||||||
rendition_id = self._search_regex(
|
rendition_id = self._search_regex(
|
||||||
|
|
|
@ -3,10 +3,10 @@ from __future__ import unicode_literals
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
js_to_json,
|
||||||
)
|
)
|
||||||
|
|
||||||
import re
|
import re
|
||||||
import datetime
|
|
||||||
|
|
||||||
|
|
||||||
class OnNetworkLoaderIE(InfoExtractor):
|
class OnNetworkLoaderIE(InfoExtractor):
|
||||||
|
@ -45,51 +45,46 @@ class OnNetworkLoaderIE(InfoExtractor):
|
||||||
|
|
||||||
class OnNetworkFrameIE(InfoExtractor):
|
class OnNetworkFrameIE(InfoExtractor):
|
||||||
IE_NAME = 'onnetwork:frame'
|
IE_NAME = 'onnetwork:frame'
|
||||||
_VALID_URL = r'https?://video\.onnetwork\.tv/frame84\.php\?(?:[^&]+&)*?mid=(?P<mid>[^&]+)&(?:[^&]+&)*?id=(?P<vid>[^&]+)'
|
_VALID_URL = r'https?://video\.onnetwork\.tv/frame\d+\.php\?(?:[^&]+&)*?mid=(?P<mid>[^&]+)&(?:[^&]+&)*?id=(?P<vid>[^&]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://video.onnetwork.tv/frame84.php?mid=MCwxNng5LDAsMCwxNzU1LDM3MjksMSwwLDEsMzYsNSwwLDIsMCw0LDEsMCwxLDEsMiwwLDAsMSwwLDAsMCwwLC0xOy0xOzIwOzIwLDAsNTAsMA==&preview=0&iid=0&e=1&widget=524&id=ffEXS991c5f8f4dbb502b540687287098d2d8',
|
'url': 'https://video.onnetwork.tv/frame84.php?mid=MCwxNng5LDAsMCwxNzU1LDM3MjksMSwwLDEsMzYsNSwwLDIsMCw0LDEsMCwxLDEsMiwwLDAsMSwwLDAsMCwwLC0xOy0xOzIwOzIwLDAsNTAsMA==&preview=0&iid=0&e=1&widget=524&id=ffEXS991c5f8f4dbb502b540687287098d2d8',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
_BASE_OBJECT_RE = r'''var onplayer\s*=\s*new tUIPlayer\(\s*{\s*videos\s*:\s*\[\s*{.*?'''
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
vid = mobj.group('vid')
|
vid = mobj.group('vid')
|
||||||
webpage = self._download_webpage(url, vid, 'Downloading video frame')
|
webpage = self._download_webpage(url, vid, 'Downloading video frame')
|
||||||
|
|
||||||
video_id = self._search_regex(
|
data = self._search_regex(
|
||||||
self._BASE_OBJECT_RE + r'id\s*:\s*(\d+)',
|
r'(?s)var onplayer\s*=\s*new tUIPlayer\(\s*({\s*videos\s*:\s*\[\s*{.*?})\s*,\s*OnPlayerUI',
|
||||||
webpage, 'video id')
|
webpage, 'video data')
|
||||||
m3u_url = self._search_regex(
|
data = js_to_json(data)
|
||||||
self._BASE_OBJECT_RE + r'(?:urls\s*:\[{[^}]+}\],)?url\s*:"([^"]+)"',
|
data = re.sub(
|
||||||
webpage, 'm3u url')
|
r'\((?P<value>\d+(?:\.\d+)?|(["\']).+?\2)(?:\s*\|\|\s*.+?)?\)',
|
||||||
title = self._search_regex(
|
lambda x: x.group('value'), data)
|
||||||
self._BASE_OBJECT_RE + r"(?<!p)title\s*:\s*'([^']+)'",
|
data = re.sub(r'"\s*\+\s*"', '', data)
|
||||||
webpage, 'title')
|
data = self._parse_json(data, vid)
|
||||||
thumbnail = self._search_regex(
|
|
||||||
self._BASE_OBJECT_RE + r"""(?<![a-z])poster\s*:\s*'([^']+)'""",
|
|
||||||
webpage, 'thumbnail', fatal=False)
|
|
||||||
duration = self._search_regex(
|
|
||||||
self._BASE_OBJECT_RE + r'duration\s*:\s*(\d+)',
|
|
||||||
webpage, 'duration', fatal=False)
|
|
||||||
age_limit = self._search_regex(
|
|
||||||
self._BASE_OBJECT_RE + r'ageallow\s*:\s*(\d+)',
|
|
||||||
webpage, 'age limit', fatal=False)
|
|
||||||
upload_date_unix = self._search_regex(
|
|
||||||
self._BASE_OBJECT_RE + r'adddate\s*:\s*(\d+)',
|
|
||||||
webpage, 'upload date', fatal=False)
|
|
||||||
if upload_date_unix:
|
|
||||||
upload_date = datetime.datetime.fromtimestamp(int(upload_date_unix)).strftime('%Y%m%d')
|
|
||||||
|
|
||||||
formats = self._extract_m3u8_formats(m3u_url, video_id)
|
entries = []
|
||||||
|
for video in data['videos']:
|
||||||
|
video_id = str(video['id'])
|
||||||
|
|
||||||
|
formats = self._extract_m3u8_formats(video['url'], video_id)
|
||||||
|
self._sort_formats(formats)
|
||||||
|
|
||||||
|
entries.append({
|
||||||
|
'id': video_id,
|
||||||
|
'title': video['title'],
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': video.get('poster'),
|
||||||
|
'duration': int_or_none(video.get('duration')),
|
||||||
|
'age_limit': int_or_none(video.get('ageallow')),
|
||||||
|
'timestamp': int_or_none(video.get('adddate')),
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'_type': 'playlist',
|
||||||
'title': title,
|
'entries': entries,
|
||||||
'formats': formats,
|
'id': vid,
|
||||||
'thumbnail': thumbnail,
|
|
||||||
'duration': int_or_none(duration),
|
|
||||||
'age_limit': int_or_none(age_limit),
|
|
||||||
'upload_date': upload_date,
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -98,6 +98,9 @@ class ORFTVthekIE(InfoExtractor):
|
||||||
elif ext == 'f4m':
|
elif ext == 'f4m':
|
||||||
formats.extend(self._extract_f4m_formats(
|
formats.extend(self._extract_f4m_formats(
|
||||||
src, video_id, f4m_id=format_id, fatal=False))
|
src, video_id, f4m_id=format_id, fatal=False))
|
||||||
|
elif ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
src, video_id, mpd_id=format_id, fatal=False))
|
||||||
else:
|
else:
|
||||||
formats.append({
|
formats.append({
|
||||||
'format_id': format_id,
|
'format_id': format_id,
|
||||||
|
@ -140,6 +143,25 @@ class ORFTVthekIE(InfoExtractor):
|
||||||
})
|
})
|
||||||
|
|
||||||
upload_date = unified_strdate(sd.get('created_date'))
|
upload_date = unified_strdate(sd.get('created_date'))
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
preview = sd.get('preview_image_url')
|
||||||
|
if preview:
|
||||||
|
thumbnails.append({
|
||||||
|
'id': 'preview',
|
||||||
|
'url': preview,
|
||||||
|
'preference': 0,
|
||||||
|
})
|
||||||
|
image = sd.get('image_full_url')
|
||||||
|
if not image and len(data_jsb) == 1:
|
||||||
|
image = self._og_search_thumbnail(webpage)
|
||||||
|
if image:
|
||||||
|
thumbnails.append({
|
||||||
|
'id': 'full',
|
||||||
|
'url': image,
|
||||||
|
'preference': 1,
|
||||||
|
})
|
||||||
|
|
||||||
entries.append({
|
entries.append({
|
||||||
'_type': 'video',
|
'_type': 'video',
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
|
@ -149,7 +171,7 @@ class ORFTVthekIE(InfoExtractor):
|
||||||
'description': sd.get('description'),
|
'description': sd.get('description'),
|
||||||
'duration': int_or_none(sd.get('duration_in_seconds')),
|
'duration': int_or_none(sd.get('duration_in_seconds')),
|
||||||
'upload_date': upload_date,
|
'upload_date': upload_date,
|
||||||
'thumbnail': sd.get('image_full_url'),
|
'thumbnails': thumbnails,
|
||||||
})
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
@ -182,7 +204,7 @@ class ORFRadioIE(InfoExtractor):
|
||||||
duration = end - start if end and start else None
|
duration = end - start if end and start else None
|
||||||
entries.append({
|
entries.append({
|
||||||
'id': loop_stream_id.replace('.mp3', ''),
|
'id': loop_stream_id.replace('.mp3', ''),
|
||||||
'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
|
'url': 'https://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
|
||||||
'title': title,
|
'title': title,
|
||||||
'description': clean_html(data.get('subtitle')),
|
'description': clean_html(data.get('subtitle')),
|
||||||
'duration': duration,
|
'duration': duration,
|
||||||
|
|
148
haruhi_dl/extractor/palcomp3.py
Normal file
148
haruhi_dl/extractor/palcomp3.py
Normal file
|
@ -0,0 +1,148 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
str_or_none,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PalcoMP3BaseIE(InfoExtractor):
|
||||||
|
_GQL_QUERY_TMPL = '''{
|
||||||
|
artist(slug: "%s") {
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}'''
|
||||||
|
_ARTIST_FIELDS_TMPL = '''music(slug: "%%s") {
|
||||||
|
%s
|
||||||
|
}'''
|
||||||
|
_MUSIC_FIELDS = '''duration
|
||||||
|
hls
|
||||||
|
mp3File
|
||||||
|
musicID
|
||||||
|
plays
|
||||||
|
title'''
|
||||||
|
|
||||||
|
def _call_api(self, artist_slug, artist_fields):
|
||||||
|
return self._download_json(
|
||||||
|
'https://www.palcomp3.com.br/graphql/', artist_slug, query={
|
||||||
|
'query': self._GQL_QUERY_TMPL % (artist_slug, artist_fields),
|
||||||
|
})['data']
|
||||||
|
|
||||||
|
def _parse_music(self, music):
|
||||||
|
music_id = compat_str(music['musicID'])
|
||||||
|
title = music['title']
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
hls_url = music.get('hls')
|
||||||
|
if hls_url:
|
||||||
|
formats.append({
|
||||||
|
'url': hls_url,
|
||||||
|
'protocol': 'm3u8_native',
|
||||||
|
'ext': 'mp4',
|
||||||
|
})
|
||||||
|
mp3_file = music.get('mp3File')
|
||||||
|
if mp3_file:
|
||||||
|
formats.append({
|
||||||
|
'url': mp3_file,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'id': music_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'duration': int_or_none(music.get('duration')),
|
||||||
|
'view_count': int_or_none(music.get('plays')),
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_initialize(self):
|
||||||
|
self._ARTIST_FIELDS_TMPL = self._ARTIST_FIELDS_TMPL % self._MUSIC_FIELDS
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
artist_slug, music_slug = re.match(self._VALID_URL, url).groups()
|
||||||
|
artist_fields = self._ARTIST_FIELDS_TMPL % music_slug
|
||||||
|
music = self._call_api(artist_slug, artist_fields)['artist']['music']
|
||||||
|
return self._parse_music(music)
|
||||||
|
|
||||||
|
|
||||||
|
class PalcoMP3IE(PalcoMP3BaseIE):
|
||||||
|
IE_NAME = 'PalcoMP3:song'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/nossas-composicoes-cuida-bem-dela/',
|
||||||
|
'md5': '99fd6405b2d8fd589670f6db1ba3b358',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '3162927',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Nossas Composições - CUIDA BEM DELA',
|
||||||
|
'duration': 210,
|
||||||
|
'view_count': int,
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if PalcoMP3VideoIE.suitable(url) else super(PalcoMP3IE, cls).suitable(url)
|
||||||
|
|
||||||
|
|
||||||
|
class PalcoMP3ArtistIE(PalcoMP3BaseIE):
|
||||||
|
IE_NAME = 'PalcoMP3:artist'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<id>[^/?&#]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.palcomp3.com.br/condedoforro/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '358396',
|
||||||
|
'title': 'Conde do Forró',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 188,
|
||||||
|
}]
|
||||||
|
_ARTIST_FIELDS_TMPL = '''artistID
|
||||||
|
musics {
|
||||||
|
nodes {
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
name'''
|
||||||
|
|
||||||
|
@ classmethod
|
||||||
|
def suitable(cls, url):
|
||||||
|
return False if re.match(PalcoMP3IE._VALID_URL, url) else super(PalcoMP3ArtistIE, cls).suitable(url)
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
artist_slug = self._match_id(url)
|
||||||
|
artist = self._call_api(artist_slug, self._ARTIST_FIELDS_TMPL)['artist']
|
||||||
|
|
||||||
|
def entries():
|
||||||
|
for music in (try_get(artist, lambda x: x['musics']['nodes'], list) or []):
|
||||||
|
yield self._parse_music(music)
|
||||||
|
|
||||||
|
return self.playlist_result(
|
||||||
|
entries(), str_or_none(artist.get('artistID')), artist.get('name'))
|
||||||
|
|
||||||
|
|
||||||
|
class PalcoMP3VideoIE(PalcoMP3BaseIE):
|
||||||
|
IE_NAME = 'PalcoMP3:video'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?palcomp3\.com(?:\.br)?/(?P<artist>[^/]+)/(?P<id>[^/?&#]+)/?#clipe'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.palcomp3.com/maiaraemaraisaoficial/maiara-e-maraisa-voce-faz-falta-aqui-ao-vivo-em-vicosa-mg/#clipe',
|
||||||
|
'add_ie': ['Youtube'],
|
||||||
|
'info_dict': {
|
||||||
|
'id': '_pD1nR2qqPg',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Maiara e Maraisa - Você Faz Falta Aqui - DVD Ao Vivo Em Campo Grande',
|
||||||
|
'description': 'md5:7043342c09a224598e93546e98e49282',
|
||||||
|
'upload_date': '20161107',
|
||||||
|
'uploader_id': 'maiaramaraisaoficial',
|
||||||
|
'uploader': 'Maiara e Maraisa',
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
_MUSIC_FIELDS = 'youtubeID'
|
||||||
|
|
||||||
|
def _parse_music(self, music):
|
||||||
|
youtube_id = music['youtubeID']
|
||||||
|
return self.url_result(youtube_id, 'Youtube', youtube_id)
|
36
haruhi_dl/extractor/patronite.py
Normal file
36
haruhi_dl/extractor/patronite.py
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
js_to_json,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PatroniteAudioIE(InfoExtractor):
|
||||||
|
IE_NAME = 'patronite:audio'
|
||||||
|
_VALID_URL = r'https?://patronite\.pl/(?P<id>[a-zA-Z\d-]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://patronite.pl/radionowyswiat',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'radionowyswiat',
|
||||||
|
'ext': 'unknown_video',
|
||||||
|
'title': 'Radio Nowy Świat',
|
||||||
|
'description': 'Dobre radio tworzą nie tylko dziennikarze, realizatorzy, technicy czy reporterzy. Bez nich nie byłoby radia, ale też radia nie byłoby bez słuchaczy. Dziś każdy z Was może pójść o krok dalej - stając się współtwórcą i mecenasem Radia Nowy Świat!',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
# only works with radio streams, no podcast support
|
||||||
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, display_id)
|
||||||
|
data = self._parse_json(self._search_regex(
|
||||||
|
r"(?s)const player\s*=\s*new window\.PatroniteWebPlayer\('\.web-player',\s*({.+?})\);",
|
||||||
|
webpage, 'player data'), display_id, js_to_json)
|
||||||
|
return {
|
||||||
|
'id': display_id,
|
||||||
|
'url': data['url'],
|
||||||
|
'title': data['title'],
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': data.get('artwork'),
|
||||||
|
'vcodec': 'none',
|
||||||
|
}
|
|
@ -305,7 +305,7 @@ class PBSIE(InfoExtractor):
|
||||||
{
|
{
|
||||||
# Video embedded in iframe containing angle brackets as attribute's value (e.g.
|
# Video embedded in iframe containing angle brackets as attribute's value (e.g.
|
||||||
# "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see
|
# "<iframe style='position: absolute;<br />\ntop: 0; left: 0;' ...", see
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/7059)
|
# https://github.com/ytdl-org/youtube-dl/issues/7059)
|
||||||
'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/',
|
'url': 'http://www.pbs.org/food/features/a-chefs-life-season-3-episode-5-prickly-business/',
|
||||||
'md5': '59b0ef5009f9ac8a319cc5efebcd865e',
|
'md5': '59b0ef5009f9ac8a319cc5efebcd865e',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -348,7 +348,7 @@ class PBSIE(InfoExtractor):
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/13801
|
# https://github.com/ytdl-org/youtube-dl/issues/13801
|
||||||
'url': 'https://www.pbs.org/video/pbs-newshour-full-episode-july-31-2017-1501539057/',
|
'url': 'https://www.pbs.org/video/pbs-newshour-full-episode-july-31-2017-1501539057/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3003333873',
|
'id': '3003333873',
|
||||||
|
@ -642,7 +642,7 @@ class PBSIE(InfoExtractor):
|
||||||
# we won't try extracting them.
|
# we won't try extracting them.
|
||||||
# Since summer 2016 higher quality formats (4500k and 6500k) are also available
|
# Since summer 2016 higher quality formats (4500k and 6500k) are also available
|
||||||
# albeit they are not documented in [2].
|
# albeit they are not documented in [2].
|
||||||
# 1. https://github.com/ytdl-org/haruhi-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656
|
# 1. https://github.com/ytdl-org/youtube-dl/commit/cbc032c8b70a038a69259378c92b4ba97b42d491#commitcomment-17313656
|
||||||
# 2. https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
|
# 2. https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications
|
||||||
if not bitrate or int(bitrate) < 400:
|
if not bitrate or int(bitrate) < 400:
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -1,11 +1,14 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
from urllib.parse import urlencode
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import SelfhostedInfoExtractor
|
from .common import SelfhostedInfoExtractor
|
||||||
from ..compat import compat_str
|
from ..compat import compat_str
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_resolution,
|
parse_resolution,
|
||||||
str_or_none,
|
str_or_none,
|
||||||
|
@ -13,11 +16,12 @@ from ..utils import (
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
urljoin,
|
urljoin,
|
||||||
|
ExtractorError,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class PeerTubeBaseExtractor(SelfhostedInfoExtractor):
|
class PeerTubeBaseExtractor(SelfhostedInfoExtractor):
|
||||||
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
|
_UUID_RE = r'[\da-zA-Z]{22}|[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
|
||||||
_API_BASE = 'https://%s/api/v1/%s/%s/%s'
|
_API_BASE = 'https://%s/api/v1/%s/%s/%s'
|
||||||
_SH_VALID_CONTENT_STRINGS = (
|
_SH_VALID_CONTENT_STRINGS = (
|
||||||
'<title>PeerTube<',
|
'<title>PeerTube<',
|
||||||
|
@ -26,10 +30,55 @@ class PeerTubeBaseExtractor(SelfhostedInfoExtractor):
|
||||||
'>We are sorry but it seems that PeerTube is not compatible with your web browser.<',
|
'>We are sorry but it seems that PeerTube is not compatible with your web browser.<',
|
||||||
'<meta property="og:platform" content="PeerTube"',
|
'<meta property="og:platform" content="PeerTube"',
|
||||||
)
|
)
|
||||||
|
_NETRC_MACHINE = 'peertube'
|
||||||
|
_LOGIN_INFO = None
|
||||||
|
|
||||||
|
def _login(self):
|
||||||
|
if self._LOGIN_INFO:
|
||||||
|
ts = datetime.datetime.now().timestamp()
|
||||||
|
if self._LOGIN_INFO['expires_on'] >= ts + 5:
|
||||||
|
return True
|
||||||
|
|
||||||
|
username, password = self._get_login_info()
|
||||||
|
if not username:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# the instance domain (the one where user has an account) must be separated from the user e-mail
|
||||||
|
mobj = re.match(r'^(?P<username>[^@]+(?:@[^@]+)?)@(?P<instance>.+)$', username)
|
||||||
|
if not mobj:
|
||||||
|
self.report_warning(
|
||||||
|
'Invalid login format - must be in format [username or email]@[instance]')
|
||||||
|
username, instance = mobj.group('username', 'instance')
|
||||||
|
|
||||||
|
oauth_keys = self._downloader.cache.load('peertube-oauth', instance)
|
||||||
|
if not oauth_keys:
|
||||||
|
oauth_keys = self._download_json(f'https://{instance}/api/v1/oauth-clients/local', instance, 'Downloading OAuth keys')
|
||||||
|
self._downloader.cache.store('peertube-oauth', instance, oauth_keys)
|
||||||
|
client_id, client_secret = oauth_keys['client_id'], oauth_keys['client_secret']
|
||||||
|
|
||||||
|
auth_res = self._download_json(f'https://{instance}/api/v1/users/token', instance, 'Logging in', data=bytes(urlencode({
|
||||||
|
'client_id': client_id,
|
||||||
|
'client_secret': client_secret,
|
||||||
|
'response_type': 'code',
|
||||||
|
'grant_type': 'password',
|
||||||
|
'scope': 'user',
|
||||||
|
'username': username,
|
||||||
|
'password': password,
|
||||||
|
}).encode('utf-8')))
|
||||||
|
|
||||||
|
ts = datetime.datetime.now().timestamp()
|
||||||
|
auth_res['instance'] = instance
|
||||||
|
auth_res['expires_on'] = ts + auth_res['expires_in']
|
||||||
|
auth_res['refresh_token_expires_on'] = ts + auth_res['refresh_token_expires_in']
|
||||||
|
# not using self to set the details to expose it to all peertube extractors
|
||||||
|
PeerTubeBaseExtractor._LOGIN_INFO = auth_res
|
||||||
|
|
||||||
def _call_api(self, host, resource, resource_id, path, note=None, errnote=None, fatal=True):
|
def _call_api(self, host, resource, resource_id, path, note=None, errnote=None, fatal=True):
|
||||||
return self._download_json(
|
return self._download_json(
|
||||||
self._API_BASE % (host, resource, resource_id, path), resource_id,
|
self._API_BASE % (host, resource, resource_id, path), resource_id,
|
||||||
|
headers={
|
||||||
|
'Authorization': f'Bearer {self._LOGIN_INFO["access_token"]}',
|
||||||
|
} if self._LOGIN_INFO and self._LOGIN_INFO['instance'] == host else {},
|
||||||
note=note, errnote=errnote, fatal=fatal)
|
note=note, errnote=errnote, fatal=fatal)
|
||||||
|
|
||||||
def _parse_video(self, video, url):
|
def _parse_video(self, video, url):
|
||||||
|
@ -65,6 +114,15 @@ class PeerTubeBaseExtractor(SelfhostedInfoExtractor):
|
||||||
else:
|
else:
|
||||||
f['fps'] = int_or_none(file_.get('fps'))
|
f['fps'] = int_or_none(file_.get('fps'))
|
||||||
formats.append(f)
|
formats.append(f)
|
||||||
|
if file_.get('torrentDownloadUrl'):
|
||||||
|
f = f.copy()
|
||||||
|
f.update({
|
||||||
|
'url': file_['torrentDownloadUrl'],
|
||||||
|
'ext': determine_ext(file_url),
|
||||||
|
'format_id': '%s-torrent' % format_id,
|
||||||
|
'protocol': 'bittorrent',
|
||||||
|
})
|
||||||
|
formats.append(f)
|
||||||
if files:
|
if files:
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
info_dict['formats'] = formats
|
info_dict['formats'] = formats
|
||||||
|
@ -93,11 +151,13 @@ class PeerTubeBaseExtractor(SelfhostedInfoExtractor):
|
||||||
else:
|
else:
|
||||||
age_limit = None
|
age_limit = None
|
||||||
|
|
||||||
|
webpage_url = 'https://%s/videos/watch/%s' % (host, display_id)
|
||||||
|
|
||||||
info_dict.update({
|
info_dict.update({
|
||||||
'id': video['uuid'],
|
'id': video['uuid'],
|
||||||
'title': video['name'],
|
'title': video['name'],
|
||||||
'description': video.get('description'),
|
'description': video.get('description'),
|
||||||
'thumbnail': urljoin(url, video.get('thumbnailPath')),
|
'thumbnail': urljoin(webpage_url, video.get('thumbnailPath')),
|
||||||
'timestamp': unified_timestamp(video.get('publishedAt')),
|
'timestamp': unified_timestamp(video.get('publishedAt')),
|
||||||
'uploader': account_data('displayName', compat_str),
|
'uploader': account_data('displayName', compat_str),
|
||||||
'uploader_id': str_or_none(account_data('id', int)),
|
'uploader_id': str_or_none(account_data('id', int)),
|
||||||
|
@ -120,16 +180,16 @@ class PeerTubeBaseExtractor(SelfhostedInfoExtractor):
|
||||||
|
|
||||||
class PeerTubeSHIE(PeerTubeBaseExtractor):
|
class PeerTubeSHIE(PeerTubeBaseExtractor):
|
||||||
_VALID_URL = r'peertube:(?P<host>[^:]+):(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
|
_VALID_URL = r'peertube:(?P<host>[^:]+):(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
|
||||||
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)|api/v\d/videos)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
|
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)|api/v\d/videos|w)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
|
'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
|
||||||
'md5': '9bed8c0137913e17b86334e5885aacff',
|
'md5': '8563064d245a4be5705bddb22bb00a28',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
|
'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'What is PeerTube?',
|
'title': 'What is PeerTube?',
|
||||||
'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
|
'description': 'md5:96adbaf219b4d41747bfc5937df0b017',
|
||||||
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
'thumbnail': r're:https?://.*\.(?:jpg|png)',
|
||||||
'timestamp': 1538391166,
|
'timestamp': 1538391166,
|
||||||
'upload_date': '20181001',
|
'upload_date': '20181001',
|
||||||
|
@ -160,6 +220,27 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):
|
||||||
'upload_date': '20200420',
|
'upload_date': '20200420',
|
||||||
'uploader': 'Drew DeVault',
|
'uploader': 'Drew DeVault',
|
||||||
}
|
}
|
||||||
|
}, {
|
||||||
|
# new url scheme since PeerTube 3.3
|
||||||
|
'url': 'https://peertube2.cpy.re/w/3fbif9S3WmtTP8gGsC5HBd',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '122d093a-1ede-43bd-bd34-59d2931ffc5e',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'E2E tests',
|
||||||
|
'uploader_id': '37855',
|
||||||
|
'timestamp': 1589276219,
|
||||||
|
'upload_date': '20200512',
|
||||||
|
'uploader': 'chocobozzz',
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'https://peertube2.cpy.re/w/122d093a-1ede-43bd-bd34-59d2931ffc5e',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://peertube2.cpy.re/api/v1/videos/3fbif9S3WmtTP8gGsC5HBd',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'peertube:peertube2.cpy.re:3fbif9S3WmtTP8gGsC5HBd',
|
||||||
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
|
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -209,6 +290,17 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):
|
||||||
def _selfhosted_extract(self, url, webpage=None):
|
def _selfhosted_extract(self, url, webpage=None):
|
||||||
host, video_id = self._match_id_and_host(url)
|
host, video_id = self._match_id_and_host(url)
|
||||||
|
|
||||||
|
self._login()
|
||||||
|
|
||||||
|
if self._LOGIN_INFO and self._LOGIN_INFO['instance'] != host:
|
||||||
|
video_search = self._call_api(
|
||||||
|
self._LOGIN_INFO['instance'], 'search', 'videos', '?' + urlencode({
|
||||||
|
'search': f'https://{host}/videos/watch/{video_id}',
|
||||||
|
}), note='Searching for remote video')
|
||||||
|
if len(video_search) == 0:
|
||||||
|
raise ExtractorError('Remote video not found')
|
||||||
|
host, video_id = self._LOGIN_INFO['instance'], video_search['data'][0]['uuid']
|
||||||
|
|
||||||
video = self._call_api(
|
video = self._call_api(
|
||||||
host, 'videos', video_id, '', note='Downloading video JSON')
|
host, 'videos', video_id, '', note='Downloading video JSON')
|
||||||
|
|
||||||
|
@ -218,7 +310,7 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):
|
||||||
|
|
||||||
description = None
|
description = None
|
||||||
if webpage:
|
if webpage:
|
||||||
description = self._og_search_description(webpage)
|
description = self._og_search_description(webpage, default=None)
|
||||||
if not description:
|
if not description:
|
||||||
full_description = self._call_api(
|
full_description = self._call_api(
|
||||||
host, 'videos', video_id, 'description', note='Downloading description JSON',
|
host, 'videos', video_id, 'description', note='Downloading description JSON',
|
||||||
|
@ -234,7 +326,7 @@ class PeerTubeSHIE(PeerTubeBaseExtractor):
|
||||||
|
|
||||||
class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
|
class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
|
||||||
_VALID_URL = r'peertube:playlist:(?P<host>[^:]+):(?P<id>.+)'
|
_VALID_URL = r'peertube:playlist:(?P<host>[^:]+):(?P<id>.+)'
|
||||||
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)/playlist|api/v\d/video-playlists)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
|
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:videos/(?:watch|embed)/playlist|api/v\d/video-playlists|w/p)/(?P<id>%s)' % (PeerTubeBaseExtractor._UUID_RE)
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://video.internet-czas-dzialac.pl/videos/watch/playlist/3c81b894-acde-4539-91a2-1748b208c14c?playlistPosition=1',
|
'url': 'https://video.internet-czas-dzialac.pl/videos/watch/playlist/3c81b894-acde-4539-91a2-1748b208c14c?playlistPosition=1',
|
||||||
|
@ -245,11 +337,16 @@ class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
|
||||||
'uploader': 'Internet. Czas działać!',
|
'uploader': 'Internet. Czas działać!',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 14,
|
'playlist_mincount': 14,
|
||||||
|
}, {
|
||||||
|
'url': 'https://peertube2.cpy.re/w/p/hrAdcvjkMMkHJ28upnoN21',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _selfhosted_extract(self, url, webpage=None):
|
def _selfhosted_extract(self, url, webpage=None):
|
||||||
host, display_id = self._match_id_and_host(url)
|
host, display_id = self._match_id_and_host(url)
|
||||||
|
|
||||||
|
self._login()
|
||||||
|
|
||||||
playlist_data = self._call_api(host, 'video-playlists', display_id, '', 'Downloading playlist metadata')
|
playlist_data = self._call_api(host, 'video-playlists', display_id, '', 'Downloading playlist metadata')
|
||||||
entries = []
|
entries = []
|
||||||
i = 0
|
i = 0
|
||||||
|
@ -279,23 +376,28 @@ class PeerTubePlaylistSHIE(PeerTubeBaseExtractor):
|
||||||
|
|
||||||
class PeerTubeChannelSHIE(PeerTubeBaseExtractor):
|
class PeerTubeChannelSHIE(PeerTubeBaseExtractor):
|
||||||
_VALID_URL = r'peertube:channel:(?P<host>[^:]+):(?P<id>.+)'
|
_VALID_URL = r'peertube:channel:(?P<host>[^:]+):(?P<id>.+)'
|
||||||
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:api/v\d/)?video-channels/(?P<id>[^/?#]+)(?:/videos)?'
|
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:(?:api/v\d/)?video-channels|c)/(?P<id>[^/?#]+)(?:/videos)?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://video.internet-czas-dzialac.pl/video-channels/internet_czas_dzialac/videos',
|
'url': 'https://video.internet-czas-dzialac.pl/video-channels/internet_czas_dzialac/videos',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '2',
|
'id': '2',
|
||||||
'title': 'internet_czas_dzialac',
|
'title': 'Internet. Czas działać!',
|
||||||
'description': 'md5:4d2e215ea0d9ae4501a556ef6e9a5308',
|
'description': 'md5:ac35d70f6625b04b189e0b4b76e62e17',
|
||||||
'uploader_id': 3,
|
'uploader_id': 3,
|
||||||
'uploader': 'Internet. Czas działać!',
|
'uploader': 'Internet. Czas działać!',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 14,
|
'playlist_mincount': 14,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.internet-czas-dzialac.pl/c/internet_czas_dzialac',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _selfhosted_extract(self, url, webpage=None):
|
def _selfhosted_extract(self, url, webpage=None):
|
||||||
host, display_id = self._match_id_and_host(url)
|
host, display_id = self._match_id_and_host(url)
|
||||||
|
|
||||||
|
self._login()
|
||||||
|
|
||||||
channel_data = self._call_api(host, 'video-channels', display_id, '', 'Downloading channel metadata')
|
channel_data = self._call_api(host, 'video-channels', display_id, '', 'Downloading channel metadata')
|
||||||
entries = []
|
entries = []
|
||||||
i = 0
|
i = 0
|
||||||
|
@ -326,23 +428,28 @@ class PeerTubeChannelSHIE(PeerTubeBaseExtractor):
|
||||||
|
|
||||||
class PeerTubeAccountSHIE(PeerTubeBaseExtractor):
|
class PeerTubeAccountSHIE(PeerTubeBaseExtractor):
|
||||||
_VALID_URL = r'peertube:account:(?P<host>[^:]+):(?P<id>.+)'
|
_VALID_URL = r'peertube:account:(?P<host>[^:]+):(?P<id>.+)'
|
||||||
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:api/v\d/)?accounts/(?P<id>[^/?#]+)(?:/video(?:s|-channels))?'
|
_SH_VALID_URL = r'https?://(?P<host>[^/]+)/(?:(?:api/v\d/)?accounts|a)/(?P<id>[^/?#]+)(?:/video(?:s|-channels))?'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://video.internet-czas-dzialac.pl/accounts/icd/video-channels',
|
'url': 'https://video.internet-czas-dzialac.pl/accounts/icd/video-channels',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3',
|
'id': '3',
|
||||||
'description': 'md5:ab3c9b934dd39030eea1c9fe76079870',
|
'description': 'md5:ac35d70f6625b04b189e0b4b76e62e17',
|
||||||
'uploader': 'Internet. Czas działać!',
|
'uploader': 'Internet. Czas działać!',
|
||||||
'title': 'Internet. Czas działać!',
|
'title': 'Internet. Czas działać!',
|
||||||
'uploader_id': 3,
|
'uploader_id': 3,
|
||||||
},
|
},
|
||||||
'playlist_mincount': 14,
|
'playlist_mincount': 14,
|
||||||
|
}, {
|
||||||
|
'url': 'https://video.internet-czas-dzialac.pl/a/icd',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _selfhosted_extract(self, url, webpage=None):
|
def _selfhosted_extract(self, url, webpage=None):
|
||||||
host, display_id = self._match_id_and_host(url)
|
host, display_id = self._match_id_and_host(url)
|
||||||
|
|
||||||
|
self._login()
|
||||||
|
|
||||||
account_data = self._call_api(host, 'accounts', display_id, '', 'Downloading account metadata')
|
account_data = self._call_api(host, 'accounts', display_id, '', 'Downloading account metadata')
|
||||||
entries = []
|
entries = []
|
||||||
i = 0
|
i = 0
|
||||||
|
|
|
@ -1,45 +1,133 @@
|
||||||
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from .dreisat import DreiSatIE
|
import re
|
||||||
|
|
||||||
|
from .youtube import YoutubeIE
|
||||||
|
from .zdf import ZDFBaseIE
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
int_or_none,
|
||||||
|
merge_dicts,
|
||||||
|
try_get,
|
||||||
|
unified_timestamp,
|
||||||
|
urljoin,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class PhoenixIE(DreiSatIE):
|
class PhoenixIE(ZDFBaseIE):
|
||||||
IE_NAME = 'phoenix.de'
|
IE_NAME = 'phoenix.de'
|
||||||
_VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/
|
_VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html'
|
||||||
(?:
|
_TESTS = [{
|
||||||
phoenix/die_sendungen/(?:[^/]+/)?
|
# Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html
|
||||||
)?
|
'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html',
|
||||||
(?P<id>[0-9]+)'''
|
'md5': '34ec321e7eb34231fd88616c65c92db0',
|
||||||
_TESTS = [
|
'info_dict': {
|
||||||
{
|
'id': '210222_phx_nachgehakt_corona_protest',
|
||||||
'url': 'http://www.phoenix.de/content/884301',
|
'ext': 'mp4',
|
||||||
'md5': 'ed249f045256150c92e72dbb70eadec6',
|
'title': 'Wohin führt der Protest in der Pandemie?',
|
||||||
'info_dict': {
|
'description': 'md5:7d643fe7f565e53a24aac036b2122fbd',
|
||||||
'id': '884301',
|
'duration': 1691,
|
||||||
'ext': 'mp4',
|
'timestamp': 1613902500,
|
||||||
'title': 'Michael Krons mit Hans-Werner Sinn',
|
'upload_date': '20210221',
|
||||||
'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
|
'uploader': 'Phoenix',
|
||||||
'upload_date': '20141025',
|
'series': 'corona nachgehakt',
|
||||||
'uploader': 'Im Dialog',
|
'episode': 'Wohin führt der Protest in der Pandemie?',
|
||||||
}
|
|
||||||
},
|
},
|
||||||
{
|
}, {
|
||||||
'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815',
|
# Youtube embed
|
||||||
'only_matching': True,
|
'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'hMQtqFYjomk',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?',
|
||||||
|
'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd',
|
||||||
|
'duration': 3509,
|
||||||
|
'upload_date': '20201219',
|
||||||
|
'uploader': 'phoenix',
|
||||||
|
'uploader_id': 'phoenix',
|
||||||
},
|
},
|
||||||
{
|
'params': {
|
||||||
'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234',
|
'skip_download': True,
|
||||||
'only_matching': True,
|
|
||||||
},
|
},
|
||||||
]
|
}, {
|
||||||
|
'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# no media
|
||||||
|
'url': 'https://www.phoenix.de/sendungen/dokumentationen/mit-dem-jumbo-durch-die-nacht-a-89625.html',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# Same as https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html
|
||||||
|
'url': 'https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
article_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
|
||||||
|
|
||||||
internal_id = self._search_regex(
|
article = self._download_json(
|
||||||
r'<div class="phx_vod" id="phx_vod_([0-9]+)"',
|
'https://www.phoenix.de/response/id/%s' % article_id, article_id,
|
||||||
webpage, 'internal video ID')
|
'Downloading article JSON')
|
||||||
|
|
||||||
api_url = 'http://www.phoenix.de/php/mediaplayer/data/beitrags_details.php?ak=web&id=%s' % internal_id
|
video = article['absaetze'][0]
|
||||||
return self.extract_from_xml_url(video_id, api_url)
|
title = video.get('titel') or article.get('subtitel')
|
||||||
|
|
||||||
|
if video.get('typ') == 'video-youtube':
|
||||||
|
video_id = video['id']
|
||||||
|
return self.url_result(
|
||||||
|
video_id, ie=YoutubeIE.ie_key(), video_id=video_id,
|
||||||
|
video_title=title)
|
||||||
|
|
||||||
|
video_id = compat_str(video.get('basename') or video.get('content'))
|
||||||
|
|
||||||
|
details = self._download_json(
|
||||||
|
'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php',
|
||||||
|
video_id, 'Downloading details JSON', query={
|
||||||
|
'ak': 'web',
|
||||||
|
'ptmd': 'true',
|
||||||
|
'id': video_id,
|
||||||
|
'profile': 'player2',
|
||||||
|
})
|
||||||
|
|
||||||
|
title = title or details['title']
|
||||||
|
content_id = details['tracking']['nielsen']['content']['assetid']
|
||||||
|
|
||||||
|
info = self._extract_ptmd(
|
||||||
|
'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/%s' % content_id,
|
||||||
|
content_id, None, url)
|
||||||
|
|
||||||
|
duration = int_or_none(try_get(
|
||||||
|
details, lambda x: x['tracking']['nielsen']['content']['length']))
|
||||||
|
timestamp = unified_timestamp(details.get('editorialDate'))
|
||||||
|
series = try_get(
|
||||||
|
details, lambda x: x['tracking']['nielsen']['content']['program'],
|
||||||
|
compat_str)
|
||||||
|
episode = title if details.get('contentType') == 'episode' else None
|
||||||
|
|
||||||
|
thumbnails = []
|
||||||
|
teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {}
|
||||||
|
for thumbnail_key, thumbnail_url in teaser_images.items():
|
||||||
|
thumbnail_url = urljoin(url, thumbnail_url)
|
||||||
|
if not thumbnail_url:
|
||||||
|
continue
|
||||||
|
thumbnail = {
|
||||||
|
'url': thumbnail_url,
|
||||||
|
}
|
||||||
|
m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
|
||||||
|
if m:
|
||||||
|
thumbnail['width'] = int(m.group(1))
|
||||||
|
thumbnail['height'] = int(m.group(2))
|
||||||
|
thumbnails.append(thumbnail)
|
||||||
|
|
||||||
|
return merge_dicts(info, {
|
||||||
|
'id': content_id,
|
||||||
|
'title': title,
|
||||||
|
'description': details.get('leadParagraph'),
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnails': thumbnails,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'uploader': details.get('tvService'),
|
||||||
|
'series': series,
|
||||||
|
'episode': episode,
|
||||||
|
})
|
||||||
|
|
|
@ -1,22 +1,15 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import compat_str
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
js_to_json,
|
js_to_json,
|
||||||
try_get,
|
|
||||||
update_url_query,
|
|
||||||
urlencode_postdata,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class PicartoIE(InfoExtractor):
|
class PicartoIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
|
_VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
|
||||||
_TEST = {
|
_TEST = {
|
||||||
'url': 'https://picarto.tv/Setz',
|
'url': 'https://picarto.tv/Setz',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -34,65 +27,46 @@ class PicartoIE(InfoExtractor):
|
||||||
return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
|
return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
channel_id = self._match_id(url)
|
||||||
channel_id = mobj.group('id')
|
|
||||||
|
|
||||||
metadata = self._download_json(
|
data = self._download_json(
|
||||||
'https://api.picarto.tv/v1/channel/name/' + channel_id,
|
'https://ptvintern.picarto.tv/ptvapi', channel_id, query={
|
||||||
channel_id)
|
'query': '''{
|
||||||
|
channel(name: "%s") {
|
||||||
|
adult
|
||||||
|
id
|
||||||
|
online
|
||||||
|
stream_name
|
||||||
|
title
|
||||||
|
}
|
||||||
|
getLoadBalancerUrl(channel_name: "%s") {
|
||||||
|
url
|
||||||
|
}
|
||||||
|
}''' % (channel_id, channel_id),
|
||||||
|
})['data']
|
||||||
|
metadata = data['channel']
|
||||||
|
|
||||||
if metadata.get('online') is False:
|
if metadata.get('online') == 0:
|
||||||
raise ExtractorError('Stream is offline', expected=True)
|
raise ExtractorError('Stream is offline', expected=True)
|
||||||
|
title = metadata['title']
|
||||||
|
|
||||||
cdn_data = self._download_json(
|
cdn_data = self._download_json(
|
||||||
'https://picarto.tv/process/channel', channel_id,
|
data['getLoadBalancerUrl']['url'] + '/stream/json_' + metadata['stream_name'] + '.js',
|
||||||
data=urlencode_postdata({'loadbalancinginfo': channel_id}),
|
channel_id, 'Downloading load balancing info')
|
||||||
note='Downloading load balancing info')
|
|
||||||
|
|
||||||
token = mobj.group('token') or 'public'
|
|
||||||
params = {
|
|
||||||
'con': int(time.time() * 1000),
|
|
||||||
'token': token,
|
|
||||||
}
|
|
||||||
|
|
||||||
prefered_edge = cdn_data.get('preferedEdge')
|
|
||||||
formats = []
|
formats = []
|
||||||
|
for source in (cdn_data.get('source') or []):
|
||||||
for edge in cdn_data['edges']:
|
source_url = source.get('url')
|
||||||
edge_ep = edge.get('ep')
|
if not source_url:
|
||||||
if not edge_ep or not isinstance(edge_ep, compat_str):
|
|
||||||
continue
|
continue
|
||||||
edge_id = edge.get('id')
|
source_type = source.get('type')
|
||||||
for tech in cdn_data['techs']:
|
if source_type == 'html5/application/vnd.apple.mpegurl':
|
||||||
tech_label = tech.get('label')
|
formats.extend(self._extract_m3u8_formats(
|
||||||
tech_type = tech.get('type')
|
source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||||
preference = 0
|
elif source_type == 'html5/video/mp4':
|
||||||
if edge_id == prefered_edge:
|
formats.append({
|
||||||
preference += 1
|
'url': source_url,
|
||||||
format_id = []
|
})
|
||||||
if edge_id:
|
|
||||||
format_id.append(edge_id)
|
|
||||||
if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
|
|
||||||
format_id.append('hls')
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
update_url_query(
|
|
||||||
'https://%s/hls/%s/index.m3u8'
|
|
||||||
% (edge_ep, channel_id), params),
|
|
||||||
channel_id, 'mp4', preference=preference,
|
|
||||||
m3u8_id='-'.join(format_id), fatal=False))
|
|
||||||
continue
|
|
||||||
elif tech_type == 'video/mp4' or tech_label == 'MP4':
|
|
||||||
format_id.append('mp4')
|
|
||||||
formats.append({
|
|
||||||
'url': update_url_query(
|
|
||||||
'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
|
|
||||||
params),
|
|
||||||
'format_id': '-'.join(format_id),
|
|
||||||
'preference': preference,
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
# rtmp format does not seem to work
|
|
||||||
continue
|
|
||||||
self._sort_formats(formats)
|
self._sort_formats(formats)
|
||||||
|
|
||||||
mature = metadata.get('adult')
|
mature = metadata.get('adult')
|
||||||
|
@ -103,10 +77,10 @@ class PicartoIE(InfoExtractor):
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': channel_id,
|
'id': channel_id,
|
||||||
'title': self._live_title(metadata.get('title') or channel_id),
|
'title': self._live_title(title.strip()),
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']),
|
|
||||||
'channel': channel_id,
|
'channel': channel_id,
|
||||||
|
'channel_id': metadata.get('id'),
|
||||||
'channel_url': 'https://picarto.tv/%s' % channel_id,
|
'channel_url': 'https://picarto.tv/%s' % channel_id,
|
||||||
'age_limit': age_limit,
|
'age_limit': age_limit,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
|
|
|
@ -31,6 +31,7 @@ class PinterestBaseIE(InfoExtractor):
|
||||||
|
|
||||||
title = (data.get('title') or data.get('grid_title') or video_id).strip()
|
title = (data.get('title') or data.get('grid_title') or video_id).strip()
|
||||||
|
|
||||||
|
urls = []
|
||||||
formats = []
|
formats = []
|
||||||
duration = None
|
duration = None
|
||||||
if extract_formats:
|
if extract_formats:
|
||||||
|
@ -38,8 +39,9 @@ class PinterestBaseIE(InfoExtractor):
|
||||||
if not isinstance(format_dict, dict):
|
if not isinstance(format_dict, dict):
|
||||||
continue
|
continue
|
||||||
format_url = url_or_none(format_dict.get('url'))
|
format_url = url_or_none(format_dict.get('url'))
|
||||||
if not format_url:
|
if not format_url or format_url in urls:
|
||||||
continue
|
continue
|
||||||
|
urls.append(format_url)
|
||||||
duration = float_or_none(format_dict.get('duration'), scale=1000)
|
duration = float_or_none(format_dict.get('duration'), scale=1000)
|
||||||
ext = determine_ext(format_url)
|
ext = determine_ext(format_url)
|
||||||
if 'hls' in format_id.lower() or ext == 'm3u8':
|
if 'hls' in format_id.lower() or ext == 'm3u8':
|
||||||
|
|
65
haruhi_dl/extractor/playstuff.py
Normal file
65
haruhi_dl/extractor/playstuff.py
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..compat import compat_str
|
||||||
|
from ..utils import (
|
||||||
|
smuggle_url,
|
||||||
|
try_get,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class PlayStuffIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?play\.stuff\.co\.nz/details/(?P<id>[^/?#&]+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://play.stuff.co.nz/details/608778ac1de1c4001a3fa09a',
|
||||||
|
'md5': 'c82d3669e5247c64bc382577843e5bd0',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6250584958001',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Episode 1: Rotorua/Mt Maunganui/Tauranga',
|
||||||
|
'description': 'md5:c154bafb9f0dd02d01fd4100fb1c1913',
|
||||||
|
'uploader_id': '6005208634001',
|
||||||
|
'timestamp': 1619491027,
|
||||||
|
'upload_date': '20210427',
|
||||||
|
},
|
||||||
|
'add_ie': ['BrightcoveNew'],
|
||||||
|
}, {
|
||||||
|
# geo restricted, bypassable
|
||||||
|
'url': 'https://play.stuff.co.nz/details/_6155660351001',
|
||||||
|
'only_matching': True,
|
||||||
|
}]
|
||||||
|
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
state = self._parse_json(
|
||||||
|
self._search_regex(
|
||||||
|
r'__INITIAL_STATE__\s*=\s*({.+?})\s*;', webpage, 'state'),
|
||||||
|
video_id)
|
||||||
|
|
||||||
|
account_id = try_get(
|
||||||
|
state, lambda x: x['configurations']['accountId'],
|
||||||
|
compat_str) or '6005208634001'
|
||||||
|
player_id = try_get(
|
||||||
|
state, lambda x: x['configurations']['playerId'],
|
||||||
|
compat_str) or 'default'
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for item_id, video in state['items'].items():
|
||||||
|
if not isinstance(video, dict):
|
||||||
|
continue
|
||||||
|
asset_id = try_get(
|
||||||
|
video, lambda x: x['content']['attributes']['assetId'],
|
||||||
|
compat_str)
|
||||||
|
if not asset_id:
|
||||||
|
continue
|
||||||
|
entries.append(self.url_result(
|
||||||
|
smuggle_url(
|
||||||
|
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, asset_id),
|
||||||
|
{'geo_countries': ['NZ']}),
|
||||||
|
'BrightcoveNew', video_id))
|
||||||
|
|
||||||
|
return self.playlist_result(entries, video_id)
|
|
@ -327,7 +327,7 @@ query viewClip {
|
||||||
)
|
)
|
||||||
|
|
||||||
# Some courses also offer widescreen resolution for high quality (see
|
# Some courses also offer widescreen resolution for high quality (see
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/7766)
|
# https://github.com/ytdl-org/youtube-dl/issues/7766)
|
||||||
widescreen = course.get('supportsWideScreenVideoFormats') is True
|
widescreen = course.get('supportsWideScreenVideoFormats') is True
|
||||||
best_quality = 'high-widescreen' if widescreen else 'high'
|
best_quality = 'high-widescreen' if widescreen else 'high'
|
||||||
if widescreen:
|
if widescreen:
|
||||||
|
@ -388,12 +388,12 @@ query viewClip {
|
||||||
|
|
||||||
# Pluralsight tracks multiple sequential calls to ViewClip API and start
|
# Pluralsight tracks multiple sequential calls to ViewClip API and start
|
||||||
# to return 429 HTTP errors after some time (see
|
# to return 429 HTTP errors after some time (see
|
||||||
# https://github.com/ytdl-org/haruhi-dl/pull/6989). Moreover it may even lead
|
# https://github.com/ytdl-org/youtube-dl/pull/6989). Moreover it may even lead
|
||||||
# to account ban (see https://github.com/ytdl-org/haruhi-dl/issues/6842).
|
# to account ban (see https://github.com/ytdl-org/youtube-dl/issues/6842).
|
||||||
# To somewhat reduce the probability of these consequences
|
# To somewhat reduce the probability of these consequences
|
||||||
# we will sleep random amount of time before each call to ViewClip.
|
# we will sleep random amount of time before each call to ViewClip.
|
||||||
self._sleep(
|
self._sleep(
|
||||||
random.randint(2, 5), display_id,
|
random.randint(5, 10), display_id,
|
||||||
'%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')
|
'%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling')
|
||||||
|
|
||||||
if not viewclip:
|
if not viewclip:
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import itertools
|
import itertools
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
|
@ -14,13 +15,44 @@ from ..utils import (
|
||||||
extract_attributes,
|
extract_attributes,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
parse_iso8601,
|
||||||
strip_or_none,
|
strip_or_none,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
unified_timestamp,
|
unified_timestamp,
|
||||||
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class PolskieRadioIE(InfoExtractor):
|
class PolskieRadioBaseExtractor(InfoExtractor):
|
||||||
|
def _extract_webpage_player_entries(self, webpage, playlist_id, base_data):
|
||||||
|
entries = []
|
||||||
|
|
||||||
|
media_urls = set()
|
||||||
|
|
||||||
|
for data_media in re.findall(r'<[^>]+data-media=(["\']?)({[^>]+})\1', webpage):
|
||||||
|
media = self._parse_json(unescapeHTML(data_media[1]), playlist_id, fatal=False)
|
||||||
|
if not media.get('file'):
|
||||||
|
continue
|
||||||
|
media_url = self._proto_relative_url(media['file'], 'https:')
|
||||||
|
if media_url in media_urls:
|
||||||
|
continue
|
||||||
|
media_urls.add(media_url)
|
||||||
|
entry = base_data.copy()
|
||||||
|
entry.update({
|
||||||
|
'id': compat_str(media['id']),
|
||||||
|
'url': media_url,
|
||||||
|
'duration': int_or_none(media.get('length')),
|
||||||
|
'vcodec': 'none' if media.get('provider') == 'audio' else None,
|
||||||
|
})
|
||||||
|
entry_title = compat_urllib_parse_unquote(media['desc'])
|
||||||
|
if entry_title:
|
||||||
|
entry['title'] = entry_title
|
||||||
|
entries.append(entry)
|
||||||
|
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
class PolskieRadioIE(PolskieRadioBaseExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?polskieradio(?:24)?\.pl/\d+/\d+/Artykul/(?P<id>[0-9]+)'
|
_VALID_URL = r'https?://(?:www\.)?polskieradio(?:24)?\.pl/\d+/\d+/Artykul/(?P<id>[0-9]+)'
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
# like data-media={"type":"muzyka"}
|
# like data-media={"type":"muzyka"}
|
||||||
|
@ -59,6 +91,14 @@ class PolskieRadioIE(InfoExtractor):
|
||||||
'upload_date': '20201116',
|
'upload_date': '20201116',
|
||||||
},
|
},
|
||||||
}]
|
}]
|
||||||
|
}, {
|
||||||
|
# PR4 audition - other frontend
|
||||||
|
'url': 'https://www.polskieradio.pl/10/6071/Artykul/2610977,Poglos-29-pazdziernika-godz-2301',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2610977',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Pogłos 29 października godz. 23:01',
|
||||||
|
},
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis',
|
'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -81,39 +121,33 @@ class PolskieRadioIE(InfoExtractor):
|
||||||
|
|
||||||
content = self._search_regex(
|
content = self._search_regex(
|
||||||
r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>',
|
r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>',
|
||||||
webpage, 'content')
|
webpage, 'content', default=None)
|
||||||
|
|
||||||
timestamp = unified_timestamp(self._html_search_regex(
|
timestamp = unified_timestamp(self._html_search_regex(
|
||||||
r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
|
r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
|
||||||
webpage, 'timestamp', fatal=False))
|
webpage, 'timestamp', default=None))
|
||||||
|
|
||||||
thumbnail_url = self._og_search_thumbnail(webpage)
|
thumbnail_url = self._og_search_thumbnail(webpage, default=None)
|
||||||
|
|
||||||
entries = []
|
|
||||||
|
|
||||||
media_urls = set()
|
|
||||||
|
|
||||||
title = self._og_search_title(webpage).strip()
|
title = self._og_search_title(webpage).strip()
|
||||||
|
|
||||||
for data_media in re.findall(r'<[^>]+data-media=(["\']?)({[^>]+})\1', content):
|
description = strip_or_none(self._og_search_description(webpage, default=None))
|
||||||
media = self._parse_json(unescapeHTML(data_media[1]), playlist_id, fatal=False)
|
|
||||||
if not media.get('file'):
|
if not content:
|
||||||
continue
|
return {
|
||||||
media_url = self._proto_relative_url(media['file'], 'http:')
|
'id': playlist_id,
|
||||||
if media_url in media_urls:
|
'url': 'https:' + self._search_regex(r"source:\s*'(//static\.prsa\.pl/[^']+)'", webpage, 'audition record url'),
|
||||||
continue
|
'title': title,
|
||||||
media_urls.add(media_url)
|
'description': description,
|
||||||
entries.append({
|
|
||||||
'id': compat_str(media['id']),
|
|
||||||
'url': media_url,
|
|
||||||
'title': compat_urllib_parse_unquote(media['desc']) or title,
|
|
||||||
'duration': int_or_none(media.get('length')),
|
|
||||||
'vcodec': 'none' if media.get('provider') == 'audio' else None,
|
|
||||||
'timestamp': timestamp,
|
'timestamp': timestamp,
|
||||||
'thumbnail': thumbnail_url
|
'thumbnail': thumbnail_url,
|
||||||
})
|
}
|
||||||
|
|
||||||
description = strip_or_none(self._og_search_description(webpage))
|
entries = self._extract_webpage_player_entries(content, playlist_id, {
|
||||||
|
'title': title,
|
||||||
|
'timestamp': timestamp,
|
||||||
|
'thumbnail': thumbnail_url,
|
||||||
|
})
|
||||||
|
|
||||||
return self.playlist_result(entries, playlist_id, title, description)
|
return self.playlist_result(entries, playlist_id, title, description)
|
||||||
|
|
||||||
|
@ -297,3 +331,122 @@ class PolskieRadioPlayerIE(InfoExtractor):
|
||||||
'thumbnail': '%s/images/%s-color-logo.png' % (self._BASE_URL, channel_url),
|
'thumbnail': '%s/images/%s-color-logo.png' % (self._BASE_URL, channel_url),
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PolskieRadioPodcastBaseExtractor(InfoExtractor):
|
||||||
|
_API_BASE = 'https://apipodcasts.polskieradio.pl/api'
|
||||||
|
|
||||||
|
def _parse_episode(self, data):
|
||||||
|
return {
|
||||||
|
'id': data['guid'],
|
||||||
|
'formats': [{
|
||||||
|
'url': data['url'],
|
||||||
|
'filesize': int_or_none(data.get('fileSize')),
|
||||||
|
}],
|
||||||
|
'title': data['title'],
|
||||||
|
'description': data.get('description'),
|
||||||
|
'duration': int_or_none(data.get('length')),
|
||||||
|
'timestamp': parse_iso8601(data.get('publishDate')),
|
||||||
|
'thumbnail': url_or_none(data.get('image')),
|
||||||
|
'series': data.get('podcastTitle'),
|
||||||
|
'episode': data['title'],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PolskieRadioPodcastListIE(PolskieRadioPodcastBaseExtractor):
|
||||||
|
IE_NAME = 'polskieradio:podcast:list'
|
||||||
|
_VALID_URL = r'https?://podcasty\.polskieradio\.pl/podcast/(?P<id>\d+)'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://podcasty.polskieradio.pl/podcast/19/',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '19',
|
||||||
|
'title': 'Raport o stanie świata',
|
||||||
|
'description': 'Autorski wybór najważniejszych wydarzeń politycznych, społecznych i kulturalnych ostatnich 7 dni na świecie. Z udziałem dziennikarzy, ekspertów, uczestników życia politycznego. Plus dobra muzyka i do tego na temat.',
|
||||||
|
'uploader': 'Dariusz Rosiak',
|
||||||
|
},
|
||||||
|
'playlist_count': 704,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
podcast_id = self._match_id(url)
|
||||||
|
data = self._download_json(
|
||||||
|
'%s/Podcasts/%s/?pageSize=10&page=1' % (self._API_BASE, podcast_id),
|
||||||
|
podcast_id, 'Downloading page #1')
|
||||||
|
entries = [self._parse_episode(ep) for ep in data['items']]
|
||||||
|
if len(entries) < data['itemCount']:
|
||||||
|
for page in range(2, data['itemCount'] // 10 + 2):
|
||||||
|
data = self._download_json(
|
||||||
|
'%s/Podcasts/%s/?pageSize=10&page=%d' % (self._API_BASE, podcast_id, page),
|
||||||
|
podcast_id, 'Downloading page #%d' % page)
|
||||||
|
entries.extend(self._parse_episode(ep) for ep in data['items'])
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
'id': str(data['id']),
|
||||||
|
'title': data['title'],
|
||||||
|
'description': data.get('description'),
|
||||||
|
'uploader': data.get('announcer'),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class PolskieRadioPodcastIE(PolskieRadioPodcastBaseExtractor):
|
||||||
|
IE_NAME = 'polskieradio:podcast'
|
||||||
|
_VALID_URL = r'https?://podcasty\.polskieradio\.pl/track/(?P<id>[a-f\d]{8}(?:-[a-f\d]{4}){4}[a-f\d]{8})'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://podcasty.polskieradio.pl/track/6eafe403-cb8f-4756-b896-4455c3713c32',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '6eafe403-cb8f-4756-b896-4455c3713c32',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Theresa May rezygnuje. Co dalej z brexitem?',
|
||||||
|
'description': 'Brytyjska premier Theresa May zapowiedziała w piątek (24.05), że 7 czerwca ustąpi ze stanowiska szefowej Partii Konserwatywnej, uruchamiając proces wyboru jej następcy. Nowy szef torysów przejmie po niej także urząd premiera. ',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
podcast_id = self._match_id(url)
|
||||||
|
data = self._download_json(
|
||||||
|
'%s/audio' % (self._API_BASE),
|
||||||
|
podcast_id, 'Downloading podcast metadata',
|
||||||
|
data=json.dumps({
|
||||||
|
'guids': [podcast_id],
|
||||||
|
}).encode('utf-8'),
|
||||||
|
headers={
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
})
|
||||||
|
return self._parse_episode(data[0])
|
||||||
|
|
||||||
|
|
||||||
|
class PolskieRadioRadioKierowcowIE(PolskieRadioBaseExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?radiokierowcow\.pl/artykul/(?P<id>[0-9]+)'
|
||||||
|
IE_NAME = 'polskieradio:kierowcow'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://radiokierowcow.pl/artykul/2694529',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '2694529',
|
||||||
|
'title': 'Zielona fala reliktem przeszłości?',
|
||||||
|
'description': 'md5:343950a8717c9818fdfd4bd2b8ca9ff2',
|
||||||
|
},
|
||||||
|
'playlist_count': 3,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
media_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, media_id)
|
||||||
|
nextjs_build = self._search_nextjs_data(webpage, media_id)['buildId']
|
||||||
|
article = self._download_json(
|
||||||
|
'https://radiokierowcow.pl/_next/data/%s/artykul/%s.json?articleId=%s' % (nextjs_build, media_id, media_id),
|
||||||
|
media_id)
|
||||||
|
data = article['pageProps']['data']
|
||||||
|
title = data['title']
|
||||||
|
entries = self._extract_webpage_player_entries(data['content'], media_id, {
|
||||||
|
'title': title,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': media_id,
|
||||||
|
'entries': entries,
|
||||||
|
'title': title,
|
||||||
|
'description': data['lead'],
|
||||||
|
}
|
||||||
|
|
|
@ -31,6 +31,7 @@ from ..utils import (
|
||||||
class PornHubBaseIE(InfoExtractor):
|
class PornHubBaseIE(InfoExtractor):
|
||||||
_REQUIRES_PLAYWRIGHT = True
|
_REQUIRES_PLAYWRIGHT = True
|
||||||
_NETRC_MACHINE = 'pornhub'
|
_NETRC_MACHINE = 'pornhub'
|
||||||
|
_PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubthbh7ap3u\.onion)'
|
||||||
|
|
||||||
def _download_webpage_handle(self, *args, **kwargs):
|
def _download_webpage_handle(self, *args, **kwargs):
|
||||||
def dl(*args, **kwargs):
|
def dl(*args, **kwargs):
|
||||||
|
@ -125,11 +126,13 @@ class PornHubIE(PornHubBaseIE):
|
||||||
_VALID_URL = r'''(?x)
|
_VALID_URL = r'''(?x)
|
||||||
https?://
|
https?://
|
||||||
(?:
|
(?:
|
||||||
(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
(?:[^/]+\.)?
|
||||||
|
%s
|
||||||
|
/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
|
||||||
(?:www\.)?thumbzilla\.com/video/
|
(?:www\.)?thumbzilla\.com/video/
|
||||||
)
|
)
|
||||||
(?P<id>[\da-z]+)
|
(?P<id>[\da-z]+)
|
||||||
'''
|
''' % PornHubBaseIE._PORNHUB_HOST_RE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -169,6 +172,7 @@ class PornHubIE(PornHubBaseIE):
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': True,
|
'skip_download': True,
|
||||||
},
|
},
|
||||||
|
'skip': 'Video has been flagged for verification in accordance with our trust and safety policy',
|
||||||
}, {
|
}, {
|
||||||
# subtitles
|
# subtitles
|
||||||
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a0daf3c370f6',
|
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a0daf3c370f6',
|
||||||
|
@ -237,6 +241,13 @@ class PornHubIE(PornHubBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
|
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
# geo restricted
|
||||||
|
'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156',
|
||||||
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://pornhubthbh7ap3u.onion/view_video.php?viewkey=ph5a9813bfa7156',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -267,7 +278,8 @@ class PornHubIE(PornHubBaseIE):
|
||||||
webpage = dl_webpage('pc')
|
webpage = dl_webpage('pc')
|
||||||
|
|
||||||
error_msg = self._html_search_regex(
|
error_msg = self._html_search_regex(
|
||||||
r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
(r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
|
||||||
|
r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
|
||||||
webpage, 'error message', default=None, group='error')
|
webpage, 'error message', default=None, group='error')
|
||||||
if error_msg:
|
if error_msg:
|
||||||
error_msg = re.sub(r'\s+', ' ', error_msg)
|
error_msg = re.sub(r'\s+', ' ', error_msg)
|
||||||
|
@ -275,6 +287,11 @@ class PornHubIE(PornHubBaseIE):
|
||||||
'PornHub said: %s' % error_msg,
|
'PornHub said: %s' % error_msg,
|
||||||
expected=True, video_id=video_id)
|
expected=True, video_id=video_id)
|
||||||
|
|
||||||
|
if any(re.search(p, webpage) for p in (
|
||||||
|
r'class=["\']geoBlocked["\']',
|
||||||
|
r'>\s*This content is unavailable in your country')):
|
||||||
|
self.raise_geo_restricted()
|
||||||
|
|
||||||
# video_title from flashvars contains whitespace instead of non-ASCII (see
|
# video_title from flashvars contains whitespace instead of non-ASCII (see
|
||||||
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
|
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
|
||||||
# on that anymore.
|
# on that anymore.
|
||||||
|
@ -396,35 +413,49 @@ class PornHubIE(PornHubBaseIE):
|
||||||
|
|
||||||
upload_date = None
|
upload_date = None
|
||||||
formats = []
|
formats = []
|
||||||
|
|
||||||
|
def add_format(format_url, height=None):
|
||||||
|
ext = determine_ext(format_url)
|
||||||
|
if ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
format_url, video_id, mpd_id='dash', fatal=False))
|
||||||
|
return
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
return
|
||||||
|
if not height:
|
||||||
|
height = int_or_none(self._search_regex(
|
||||||
|
r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height',
|
||||||
|
default=None))
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'format_id': '%dp' % height if height else None,
|
||||||
|
'height': height,
|
||||||
|
})
|
||||||
|
|
||||||
for video_url, height in video_urls:
|
for video_url, height in video_urls:
|
||||||
if not upload_date:
|
if not upload_date:
|
||||||
upload_date = self._search_regex(
|
upload_date = self._search_regex(
|
||||||
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
|
r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
|
||||||
if upload_date:
|
if upload_date:
|
||||||
upload_date = upload_date.replace('/', '')
|
upload_date = upload_date.replace('/', '')
|
||||||
ext = determine_ext(video_url)
|
if '/video/get_media' in video_url:
|
||||||
if ext == 'mpd':
|
medias = self._download_json(video_url, video_id, fatal=False)
|
||||||
formats.extend(self._extract_mpd_formats(
|
if isinstance(medias, list):
|
||||||
video_url, video_id, mpd_id='dash', fatal=False))
|
for media in medias:
|
||||||
|
if not isinstance(media, dict):
|
||||||
|
continue
|
||||||
|
video_url = url_or_none(media.get('videoUrl'))
|
||||||
|
if not video_url:
|
||||||
|
continue
|
||||||
|
height = int_or_none(media.get('quality'))
|
||||||
|
add_format(video_url, height)
|
||||||
continue
|
continue
|
||||||
elif ext == 'm3u8':
|
add_format(video_url)
|
||||||
formats.extend(self._extract_m3u8_formats(
|
self._sort_formats(
|
||||||
video_url, video_id, 'mp4', entry_protocol='m3u8_native',
|
formats, field_preference=('height', 'width', 'fps', 'format_id'))
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
continue
|
|
||||||
tbr = None
|
|
||||||
mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
|
|
||||||
if mobj:
|
|
||||||
if not height:
|
|
||||||
height = int(mobj.group('height'))
|
|
||||||
tbr = int(mobj.group('tbr'))
|
|
||||||
formats.append({
|
|
||||||
'url': video_url,
|
|
||||||
'format_id': '%dp' % height if height else None,
|
|
||||||
'height': height,
|
|
||||||
'tbr': tbr,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
video_uploader = self._html_search_regex(
|
video_uploader = self._html_search_regex(
|
||||||
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
|
||||||
|
@ -477,7 +508,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
|
||||||
def _extract_entries(self, webpage, host):
|
def _extract_entries(self, webpage, host):
|
||||||
# Only process container div with main playlist content skipping
|
# Only process container div with main playlist content skipping
|
||||||
# drop-down menu that uses similar pattern for videos (see
|
# drop-down menu that uses similar pattern for videos (see
|
||||||
# https://github.com/ytdl-org/haruhi-dl/issues/11594).
|
# https://github.com/ytdl-org/youtube-dl/issues/11594).
|
||||||
container = self._search_regex(
|
container = self._search_regex(
|
||||||
r'(?s)(<div[^>]+class=["\']container.+)', webpage,
|
r'(?s)(<div[^>]+class=["\']container.+)', webpage,
|
||||||
'container', default=webpage)
|
'container', default=webpage)
|
||||||
|
@ -493,7 +524,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class PornHubUserIE(PornHubPlaylistBaseIE):
|
class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
|
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.pornhub.com/model/zoe_ph',
|
'url': 'https://www.pornhub.com/model/zoe_ph',
|
||||||
'playlist_mincount': 118,
|
'playlist_mincount': 118,
|
||||||
|
@ -515,13 +546,16 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
|
||||||
}, {
|
}, {
|
||||||
# Unavailable via /videos page, but available with direct pagination
|
# Unavailable via /videos page, but available with direct pagination
|
||||||
# on pornstar page (see [1]), requires premium
|
# on pornstar page (see [1]), requires premium
|
||||||
# 1. https://github.com/hdl-org/haruhi-dl/issues/27853
|
# 1. https://github.com/ytdl-org/youtube-dl/issues/27853
|
||||||
'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
|
'url': 'https://www.pornhubpremium.com/pornstar/sienna-west',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
}, {
|
}, {
|
||||||
# Same as before, multi page
|
# Same as before, multi page
|
||||||
'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
|
'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
|
@ -568,7 +602,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
# Some sources may not be available via /videos page,
|
# Some sources may not be available via /videos page,
|
||||||
# trying to fallback to main page pagination (see [1])
|
# trying to fallback to main page pagination (see [1])
|
||||||
# 1. https://github.com/hdl-org/haruhi-dl/issues/27853
|
# 1. https://github.com/ytdl-org/youtube-dl/issues/27853
|
||||||
if is_404(e) and page_num == first_page and VIDEOS in base_url:
|
if is_404(e) and page_num == first_page and VIDEOS in base_url:
|
||||||
base_url = base_url.replace(VIDEOS, '')
|
base_url = base_url.replace(VIDEOS, '')
|
||||||
webpage = download_page(base_url, page_num, fallback=True)
|
webpage = download_page(base_url, page_num, fallback=True)
|
||||||
|
@ -597,7 +631,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||||
_VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
_VALID_URL = r'https?://(?:[^/]+\.)?%s/(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.pornhub.com/model/zoe_ph/videos',
|
'url': 'https://www.pornhub.com/model/zoe_ph/videos',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -702,6 +736,9 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://de.pornhub.com/playlist/4667351',
|
'url': 'https://de.pornhub.com/playlist/4667351',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph/videos',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -712,7 +749,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
|
||||||
|
|
||||||
|
|
||||||
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
||||||
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
|
_VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
|
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
@ -722,4 +759,7 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
|
||||||
}, {
|
}, {
|
||||||
'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
|
'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
}, {
|
||||||
|
'url': 'http://pornhubthbh7ap3u.onion/pornstar/jenny-blighe/videos/upload',
|
||||||
|
'only_matching': True,
|
||||||
}]
|
}]
|
||||||
|
|
|
@ -185,8 +185,8 @@ class ProSiebenSat1IE(ProSiebenSat1BaseIE):
|
||||||
|
|
||||||
_TESTS = [
|
_TESTS = [
|
||||||
{
|
{
|
||||||
# Tests changes introduced in https://github.com/ytdl-org/haruhi-dl/pull/6242
|
# Tests changes introduced in https://github.com/ytdl-org/youtube-dl/pull/6242
|
||||||
# in response to fixing https://github.com/ytdl-org/haruhi-dl/issues/6215:
|
# in response to fixing https://github.com/ytdl-org/youtube-dl/issues/6215:
|
||||||
# - malformed f4m manifest support
|
# - malformed f4m manifest support
|
||||||
# - proper handling of URLs starting with `https?://` in 2.0 manifests
|
# - proper handling of URLs starting with `https?://` in 2.0 manifests
|
||||||
# - recursive child f4m manifests extraction
|
# - recursive child f4m manifests extraction
|
||||||
|
|
|
@ -23,6 +23,8 @@ from ..utils import (
|
||||||
from .libsyn import LibsynIE
|
from .libsyn import LibsynIE
|
||||||
from .xnews import XLinkIE
|
from .xnews import XLinkIE
|
||||||
from .tvp import TVPEmbedIE
|
from .tvp import TVPEmbedIE
|
||||||
|
from .acast import ACastPlayerIE
|
||||||
|
from .videotarget import VideoTargetIE
|
||||||
|
|
||||||
|
|
||||||
class PulseVideoIE(InfoExtractor):
|
class PulseVideoIE(InfoExtractor):
|
||||||
|
@ -53,13 +55,13 @@ class PulseVideoIE(InfoExtractor):
|
||||||
|
|
||||||
def _extract_from_id(self, video_id, webpage=None):
|
def _extract_from_id(self, video_id, webpage=None):
|
||||||
response = self._download_json(
|
response = self._download_json(
|
||||||
'http://qi.ckm.onetapi.pl/', video_id,
|
'https://player-api.dreamlab.pl/', video_id,
|
||||||
query={
|
query={
|
||||||
'body[id]': video_id,
|
'body[id]': video_id,
|
||||||
'body[jsonrpc]': '2.0',
|
'body[jsonrpc]': '2.0',
|
||||||
'body[method]': 'get_asset_detail',
|
'body[method]': 'get_asset_detail',
|
||||||
'body[params][ID_Publikacji]': video_id,
|
'body[params][ID_Publikacji]': video_id,
|
||||||
'body[params][Service]': 'www.onet.pl',
|
'body[params][version]': '2.0',
|
||||||
'content-type': 'application/jsonp',
|
'content-type': 'application/jsonp',
|
||||||
'x-onet-app': 'player.front.onetapi.pl',
|
'x-onet-app': 'player.front.onetapi.pl',
|
||||||
})
|
})
|
||||||
|
@ -82,6 +84,10 @@ class PulseVideoIE(InfoExtractor):
|
||||||
video_url = f.get('url')
|
video_url = f.get('url')
|
||||||
if not video_url:
|
if not video_url:
|
||||||
continue
|
continue
|
||||||
|
# UHD formats are bullshit, they are just duplicates
|
||||||
|
# https://git.sakamoto.pl/laudompat/haruhi-dl/-/issues/45
|
||||||
|
if format_id.endswith('-uhd') and formats_dict.get(format_id[:-len('-uhd')]):
|
||||||
|
continue
|
||||||
ext = determine_ext(video_url)
|
ext = determine_ext(video_url)
|
||||||
if format_id.startswith('ism'):
|
if format_id.startswith('ism'):
|
||||||
formats.extend(self._extract_ism_formats(
|
formats.extend(self._extract_ism_formats(
|
||||||
|
@ -180,6 +186,22 @@ class PulsEmbedIE(InfoExtractor):
|
||||||
'timestamp': 1607174136,
|
'timestamp': 1607174136,
|
||||||
'upload_date': '20201205',
|
'upload_date': '20201205',
|
||||||
},
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'pulsembed:q31qhd1LC',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '601dc897fb37095537d48e6f',
|
||||||
|
'ext': 'mp3',
|
||||||
|
'title': 'Maciej Konieczny: "Podatek medialny to bardziej mechanizm kontroli niż podatkowy”',
|
||||||
|
'upload_date': '20210208',
|
||||||
|
'timestamp': 1612764000,
|
||||||
|
},
|
||||||
|
}, {
|
||||||
|
'url': 'pulsembed:P_Q2gen3E',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '16028',
|
||||||
|
'ext': 'mp4',
|
||||||
|
'title': 'Słabe szanse na taki wzrost gospodarczy o jakim mówi rząd',
|
||||||
|
},
|
||||||
}]
|
}]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -192,6 +214,10 @@ class PulsEmbedIE(InfoExtractor):
|
||||||
return 'TVP'
|
return 'TVP'
|
||||||
if '//html5-player.libsyn.com/' in ext_url:
|
if '//html5-player.libsyn.com/' in ext_url:
|
||||||
return 'Libsyn'
|
return 'Libsyn'
|
||||||
|
if '//player.acast.com/' in ext_url:
|
||||||
|
return 'ACastPlayer'
|
||||||
|
if '//videotarget.pl/' in ext_url:
|
||||||
|
return 'VideoTarget'
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -260,6 +286,8 @@ class PulsEmbedIE(InfoExtractor):
|
||||||
LibsynIE,
|
LibsynIE,
|
||||||
XLinkIE,
|
XLinkIE,
|
||||||
TVPEmbedIE,
|
TVPEmbedIE,
|
||||||
|
ACastPlayerIE,
|
||||||
|
VideoTargetIE,
|
||||||
PulseVideoIE,
|
PulseVideoIE,
|
||||||
):
|
):
|
||||||
embie_urls = embie._extract_urls(webpage, url=referer)
|
embie_urls = embie._extract_urls(webpage, url=referer)
|
||||||
|
|
|
@ -96,7 +96,7 @@ class RadioCanadaIE(InfoExtractor):
|
||||||
return text
|
return text
|
||||||
|
|
||||||
# protectionType does not necessarily mean the video is DRM protected (see
|
# protectionType does not necessarily mean the video is DRM protected (see
|
||||||
# https://github.com/ytdl-org/haruhi-dl/pull/18609).
|
# https://github.com/ytdl-org/youtube-dl/pull/18609).
|
||||||
if get_meta('protectionType'):
|
if get_meta('protectionType'):
|
||||||
self.report_warning('This video is probably DRM protected.')
|
self.report_warning('This video is probably DRM protected.')
|
||||||
|
|
||||||
|
|
100
haruhi_dl/extractor/radiokapital.py
Normal file
100
haruhi_dl/extractor/radiokapital.py
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
unescapeHTML,
|
||||||
|
)
|
||||||
|
|
||||||
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
|
|
||||||
|
class RadioKapitalBaseIE(InfoExtractor):
|
||||||
|
# offtopic: Kapitał did a great job with their frontend, which just works quickly after opening
|
||||||
|
# this just can't be compared to any commercial radio or news services.
|
||||||
|
# also it's the first wordpress page I don't hate.
|
||||||
|
def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}):
|
||||||
|
return self._download_json(
|
||||||
|
f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urlencode(qs)}',
|
||||||
|
video_id, note=note)
|
||||||
|
|
||||||
|
def _parse_episode(self, ep):
|
||||||
|
data = ep['data']
|
||||||
|
release = '%s%s%s' % (data['published'][6:11], data['published'][3:6], data['published'][:3])
|
||||||
|
return {
|
||||||
|
'_type': 'url_transparent',
|
||||||
|
'url': data['mixcloud_url'],
|
||||||
|
'ie_key': 'Mixcloud',
|
||||||
|
'id': str(data['id']),
|
||||||
|
'title': unescapeHTML(data['title']),
|
||||||
|
'description': data.get('content'),
|
||||||
|
'tags': [tag['name'] for tag in data['tags']],
|
||||||
|
'release_date': release,
|
||||||
|
'series': data['show']['title'],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class RadioKapitalIE(RadioKapitalBaseIE):
|
||||||
|
IE_NAME = 'radiokapital'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/[a-z\d-]+/(?P<id>[a-z\d-]+)'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://radiokapital.pl/shows/tutaj-sa-smoki/5-its-okay-to-be-immaterial',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'radiokapital_radio-kapitał-tutaj-są-smoki-5-its-okay-to-be-immaterial-2021-05-20',
|
||||||
|
'ext': 'm4a',
|
||||||
|
'title': '#5: It’s okay to be immaterial',
|
||||||
|
'description': 'md5:2499da5fbfb0e88333b7d37ec8e9e4c4',
|
||||||
|
'uploader': 'Radio Kapitał',
|
||||||
|
'uploader_id': 'radiokapital',
|
||||||
|
'timestamp': 1621640164,
|
||||||
|
'upload_date': '20210521',
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
episode = self._call_api('episodes/%s' % video_id, video_id)
|
||||||
|
return self._parse_episode(episode)
|
||||||
|
|
||||||
|
|
||||||
|
class RadioKapitalShowIE(RadioKapitalBaseIE):
|
||||||
|
IE_NAME = 'radiokapital:show'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/(?P<id>[a-z\d-]+)/?(?:$|[?#])'
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://radiokapital.pl/shows/wesz',
|
||||||
|
'info_dict': {
|
||||||
|
'id': '100',
|
||||||
|
'title': 'WĘSZ',
|
||||||
|
'description': 'md5:9046105f7eeb03b7f01240fbed245df6',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 17,
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
|
||||||
|
page_no = 1
|
||||||
|
page_count = 1
|
||||||
|
entries = []
|
||||||
|
while page_no <= page_count:
|
||||||
|
episode_list = self._call_api(
|
||||||
|
'episodes', video_id,
|
||||||
|
f'Downloading episode list page #{page_no}', qs={
|
||||||
|
'show': video_id,
|
||||||
|
'page': page_no,
|
||||||
|
})
|
||||||
|
page_no += 1
|
||||||
|
page_count = episode_list['max']
|
||||||
|
for ep in episode_list['items']:
|
||||||
|
entries.append(self._parse_episode(ep))
|
||||||
|
|
||||||
|
show = episode_list['items'][0]['data']['show']
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'entries': entries,
|
||||||
|
'id': str(show['id']),
|
||||||
|
'title': show['title'],
|
||||||
|
'description': show['content'],
|
||||||
|
}
|
|
@ -15,17 +15,17 @@ class RDSIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+'
|
_VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
|
# has two 9c9media ContentPackages, the web player selects the first ContentPackage
|
||||||
|
'url': 'https://www.rds.ca/videos/Hockey/NationalHockeyLeague/teams/9/forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande-3.1377606',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '604333',
|
'id': '2083309',
|
||||||
'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
|
'display_id': 'forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
'title': 'Fowler Jr. prend la direction de Jacksonville',
|
'title': 'Forum du 5 à 7 : Kotkaniemi de retour de Finlande',
|
||||||
'description': 'Dante Fowler Jr. est le troisième choix du repêchage 2015 de la NFL. ',
|
'description': 'md5:83fa38ecc4a79b19e433433254077f25',
|
||||||
'timestamp': 1430397346,
|
'timestamp': 1606129030,
|
||||||
'upload_date': '20150430',
|
'upload_date': '20201123',
|
||||||
'duration': 154.354,
|
'duration': 773.039,
|
||||||
'age_limit': 0,
|
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.rds.ca/vid%C3%A9os/un-voyage-positif-3.877934',
|
'url': 'http://www.rds.ca/vid%C3%A9os/un-voyage-positif-3.877934',
|
||||||
|
|
|
@ -133,8 +133,10 @@ class RedBullEmbedIE(RedBullTVIE):
|
||||||
rrn_id = self._match_id(url)
|
rrn_id = self._match_id(url)
|
||||||
asset_id = self._download_json(
|
asset_id = self._download_json(
|
||||||
'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql',
|
'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql',
|
||||||
rrn_id, headers={'API-KEY': 'e90a1ff11335423998b100c929ecc866'},
|
rrn_id, headers={
|
||||||
query={
|
'Accept': 'application/json',
|
||||||
|
'API-KEY': 'e90a1ff11335423998b100c929ecc866',
|
||||||
|
}, query={
|
||||||
'query': '''{
|
'query': '''{
|
||||||
resource(id: "%s", enforceGeoBlocking: false) {
|
resource(id: "%s", enforceGeoBlocking: false) {
|
||||||
%s
|
%s
|
||||||
|
|
|
@ -63,7 +63,7 @@ class RtlNlIE(InfoExtractor):
|
||||||
'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
|
'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.',
|
||||||
}
|
}
|
||||||
}, {
|
}, {
|
||||||
# empty synopsis and missing episodes (see https://github.com/ytdl-org/haruhi-dl/issues/6275)
|
# empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275)
|
||||||
# best format available nettv
|
# best format available nettv
|
||||||
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
|
'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
|
|
|
@ -2,8 +2,9 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import io
|
||||||
import re
|
import re
|
||||||
import time
|
import sys
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from ..compat import (
|
from ..compat import (
|
||||||
|
@ -14,56 +15,13 @@ from ..utils import (
|
||||||
determine_ext,
|
determine_ext,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
float_or_none,
|
float_or_none,
|
||||||
|
qualities,
|
||||||
remove_end,
|
remove_end,
|
||||||
remove_start,
|
remove_start,
|
||||||
sanitized_Request,
|
|
||||||
std_headers,
|
std_headers,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
_bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x))
|
||||||
def _decrypt_url(png):
|
|
||||||
encrypted_data = compat_b64decode(png)
|
|
||||||
text_index = encrypted_data.find(b'tEXt')
|
|
||||||
text_chunk = encrypted_data[text_index - 4:]
|
|
||||||
length = compat_struct_unpack('!I', text_chunk[:4])[0]
|
|
||||||
# Use bytearray to get integers when iterating in both python 2.x and 3.x
|
|
||||||
data = bytearray(text_chunk[8:8 + length])
|
|
||||||
data = [chr(b) for b in data if b != 0]
|
|
||||||
hash_index = data.index('#')
|
|
||||||
alphabet_data = data[:hash_index]
|
|
||||||
url_data = data[hash_index + 1:]
|
|
||||||
if url_data[0] == 'H' and url_data[3] == '%':
|
|
||||||
# remove useless HQ%% at the start
|
|
||||||
url_data = url_data[4:]
|
|
||||||
|
|
||||||
alphabet = []
|
|
||||||
e = 0
|
|
||||||
d = 0
|
|
||||||
for l in alphabet_data:
|
|
||||||
if d == 0:
|
|
||||||
alphabet.append(l)
|
|
||||||
d = e = (e + 1) % 4
|
|
||||||
else:
|
|
||||||
d -= 1
|
|
||||||
url = ''
|
|
||||||
f = 0
|
|
||||||
e = 3
|
|
||||||
b = 1
|
|
||||||
for letter in url_data:
|
|
||||||
if f == 0:
|
|
||||||
l = int(letter) * 10
|
|
||||||
f = 1
|
|
||||||
else:
|
|
||||||
if e == 0:
|
|
||||||
l += int(letter)
|
|
||||||
url += alphabet[l]
|
|
||||||
e = (b + 3) % 4
|
|
||||||
f = 0
|
|
||||||
b += 1
|
|
||||||
else:
|
|
||||||
e -= 1
|
|
||||||
|
|
||||||
return url
|
|
||||||
|
|
||||||
|
|
||||||
class RTVEALaCartaIE(InfoExtractor):
|
class RTVEALaCartaIE(InfoExtractor):
|
||||||
|
@ -79,28 +37,31 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
|
'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
|
||||||
'duration': 5024.566,
|
'duration': 5024.566,
|
||||||
|
'series': 'Balonmano',
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||||
}, {
|
}, {
|
||||||
'note': 'Live stream',
|
'note': 'Live stream',
|
||||||
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
|
'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '1694255',
|
'id': '1694255',
|
||||||
'ext': 'flv',
|
'ext': 'mp4',
|
||||||
'title': 'TODO',
|
'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
|
'is_live': True,
|
||||||
|
},
|
||||||
|
'params': {
|
||||||
|
'skip_download': 'live stream',
|
||||||
},
|
},
|
||||||
'skip': 'The f4m manifest can\'t be used yet',
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
|
'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
|
||||||
'md5': 'e55e162379ad587e9640eda4f7353c0f',
|
'md5': 'd850f3c8731ea53952ebab489cf81cbf',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '4236788',
|
'id': '4236788',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Servir y proteger - Capítulo 104 ',
|
'title': 'Servir y proteger - Capítulo 104',
|
||||||
'duration': 3222.0,
|
'duration': 3222.0,
|
||||||
},
|
},
|
||||||
'params': {
|
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||||
'skip_download': True, # requires ffmpeg
|
|
||||||
},
|
|
||||||
}, {
|
}, {
|
||||||
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
|
||||||
'only_matching': True,
|
'only_matching': True,
|
||||||
|
@ -111,58 +72,102 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_initialize(self):
|
def _real_initialize(self):
|
||||||
user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
|
user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
|
||||||
manager_info = self._download_json(
|
self._manager = self._download_json(
|
||||||
'http://www.rtve.es/odin/loki/' + user_agent_b64,
|
'http://www.rtve.es/odin/loki/' + user_agent_b64,
|
||||||
None, 'Fetching manager info')
|
None, 'Fetching manager info')['manager']
|
||||||
self._manager = manager_info['manager']
|
|
||||||
|
@staticmethod
|
||||||
|
def _decrypt_url(png):
|
||||||
|
encrypted_data = io.BytesIO(compat_b64decode(png)[8:])
|
||||||
|
while True:
|
||||||
|
length = compat_struct_unpack('!I', encrypted_data.read(4))[0]
|
||||||
|
chunk_type = encrypted_data.read(4)
|
||||||
|
if chunk_type == b'IEND':
|
||||||
|
break
|
||||||
|
data = encrypted_data.read(length)
|
||||||
|
if chunk_type == b'tEXt':
|
||||||
|
alphabet_data, text = data.split(b'\0')
|
||||||
|
quality, url_data = text.split(b'%%')
|
||||||
|
alphabet = []
|
||||||
|
e = 0
|
||||||
|
d = 0
|
||||||
|
for l in _bytes_to_chr(alphabet_data):
|
||||||
|
if d == 0:
|
||||||
|
alphabet.append(l)
|
||||||
|
d = e = (e + 1) % 4
|
||||||
|
else:
|
||||||
|
d -= 1
|
||||||
|
url = ''
|
||||||
|
f = 0
|
||||||
|
e = 3
|
||||||
|
b = 1
|
||||||
|
for letter in _bytes_to_chr(url_data):
|
||||||
|
if f == 0:
|
||||||
|
l = int(letter) * 10
|
||||||
|
f = 1
|
||||||
|
else:
|
||||||
|
if e == 0:
|
||||||
|
l += int(letter)
|
||||||
|
url += alphabet[l]
|
||||||
|
e = (b + 3) % 4
|
||||||
|
f = 0
|
||||||
|
b += 1
|
||||||
|
else:
|
||||||
|
e -= 1
|
||||||
|
|
||||||
|
yield quality.decode(), url
|
||||||
|
encrypted_data.read(4) # CRC
|
||||||
|
|
||||||
|
def _extract_png_formats(self, video_id):
|
||||||
|
png = self._download_webpage(
|
||||||
|
'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id),
|
||||||
|
video_id, 'Downloading url information', query={'q': 'v2'})
|
||||||
|
q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
|
||||||
|
formats = []
|
||||||
|
for quality, video_url in self._decrypt_url(png):
|
||||||
|
ext = determine_ext(video_url)
|
||||||
|
if ext == 'm3u8':
|
||||||
|
formats.extend(self._extract_m3u8_formats(
|
||||||
|
video_url, video_id, 'mp4', 'm3u8_native',
|
||||||
|
m3u8_id='hls', fatal=False))
|
||||||
|
elif ext == 'mpd':
|
||||||
|
formats.extend(self._extract_mpd_formats(
|
||||||
|
video_url, video_id, 'dash', fatal=False))
|
||||||
|
else:
|
||||||
|
formats.append({
|
||||||
|
'format_id': quality,
|
||||||
|
'quality': q(quality),
|
||||||
|
'url': video_url,
|
||||||
|
})
|
||||||
|
self._sort_formats(formats)
|
||||||
|
return formats
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
video_id = self._match_id(url)
|
||||||
video_id = mobj.group('id')
|
|
||||||
info = self._download_json(
|
info = self._download_json(
|
||||||
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
||||||
video_id)['page']['items'][0]
|
video_id)['page']['items'][0]
|
||||||
if info['state'] == 'DESPU':
|
if info['state'] == 'DESPU':
|
||||||
raise ExtractorError('The video is no longer available', expected=True)
|
raise ExtractorError('The video is no longer available', expected=True)
|
||||||
title = info['title']
|
title = info['title'].strip()
|
||||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
|
formats = self._extract_png_formats(video_id)
|
||||||
png_request = sanitized_Request(png_url)
|
|
||||||
png_request.add_header('Referer', url)
|
|
||||||
png = self._download_webpage(png_request, video_id, 'Downloading url information')
|
|
||||||
video_url = _decrypt_url(png)
|
|
||||||
ext = determine_ext(video_url)
|
|
||||||
|
|
||||||
formats = []
|
|
||||||
if not video_url.endswith('.f4m') and ext != 'm3u8':
|
|
||||||
if '?' not in video_url:
|
|
||||||
video_url = video_url.replace('resources/', 'auth/resources/')
|
|
||||||
video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
|
|
||||||
|
|
||||||
if ext == 'm3u8':
|
|
||||||
formats.extend(self._extract_m3u8_formats(
|
|
||||||
video_url, video_id, ext='mp4', entry_protocol='m3u8_native',
|
|
||||||
m3u8_id='hls', fatal=False))
|
|
||||||
elif ext == 'f4m':
|
|
||||||
formats.extend(self._extract_f4m_formats(
|
|
||||||
video_url, video_id, f4m_id='hds', fatal=False))
|
|
||||||
else:
|
|
||||||
formats.append({
|
|
||||||
'url': video_url,
|
|
||||||
})
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
subtitles = None
|
subtitles = None
|
||||||
if info.get('sbtFile') is not None:
|
sbt_file = info.get('sbtFile')
|
||||||
subtitles = self.extract_subtitles(video_id, info['sbtFile'])
|
if sbt_file:
|
||||||
|
subtitles = self.extract_subtitles(video_id, sbt_file)
|
||||||
|
|
||||||
|
is_live = info.get('live') is True
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': self._live_title(title) if is_live else title,
|
||||||
'formats': formats,
|
'formats': formats,
|
||||||
'thumbnail': info.get('image'),
|
'thumbnail': info.get('image'),
|
||||||
'page_url': url,
|
|
||||||
'subtitles': subtitles,
|
'subtitles': subtitles,
|
||||||
'duration': float_or_none(info.get('duration'), scale=1000),
|
'duration': float_or_none(info.get('duration'), 1000),
|
||||||
|
'is_live': is_live,
|
||||||
|
'series': info.get('programTitle'),
|
||||||
}
|
}
|
||||||
|
|
||||||
def _get_subtitles(self, video_id, sub_file):
|
def _get_subtitles(self, video_id, sub_file):
|
||||||
|
@ -174,48 +179,26 @@ class RTVEALaCartaIE(InfoExtractor):
|
||||||
for s in subs)
|
for s in subs)
|
||||||
|
|
||||||
|
|
||||||
class RTVEInfantilIE(InfoExtractor):
|
class RTVEInfantilIE(RTVEALaCartaIE):
|
||||||
IE_NAME = 'rtve.es:infantil'
|
IE_NAME = 'rtve.es:infantil'
|
||||||
IE_DESC = 'RTVE infantil'
|
IE_DESC = 'RTVE infantil'
|
||||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/'
|
_VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'
|
||||||
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
|
'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
|
||||||
'md5': '915319587b33720b8e0357caaa6617e6',
|
'md5': '5747454717aedf9f9fdf212d1bcfc48d',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '3040283',
|
'id': '3040283',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 'Maneras de vivir',
|
'title': 'Maneras de vivir',
|
||||||
'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG',
|
'thumbnail': r're:https?://.+/1426182947956\.JPG',
|
||||||
'duration': 357.958,
|
'duration': 357.958,
|
||||||
},
|
},
|
||||||
|
'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
|
||||||
}]
|
}]
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
video_id = self._match_id(url)
|
|
||||||
info = self._download_json(
|
|
||||||
'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
|
|
||||||
video_id)['page']['items'][0]
|
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
class RTVELiveIE(RTVEALaCartaIE):
|
||||||
vidplayer_id = self._search_regex(
|
|
||||||
r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
|
|
||||||
|
|
||||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
|
|
||||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
|
||||||
video_url = _decrypt_url(png)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'id': video_id,
|
|
||||||
'ext': 'mp4',
|
|
||||||
'title': info['title'],
|
|
||||||
'url': video_url,
|
|
||||||
'thumbnail': info.get('image'),
|
|
||||||
'duration': float_or_none(info.get('duration'), scale=1000),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class RTVELiveIE(InfoExtractor):
|
|
||||||
IE_NAME = 'rtve.es:live'
|
IE_NAME = 'rtve.es:live'
|
||||||
IE_DESC = 'RTVE.es live streams'
|
IE_DESC = 'RTVE.es live streams'
|
||||||
_VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
|
_VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
|
||||||
|
@ -225,7 +208,7 @@ class RTVELiveIE(InfoExtractor):
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': 'la-1',
|
'id': 'la-1',
|
||||||
'ext': 'mp4',
|
'ext': 'mp4',
|
||||||
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
|
'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
|
||||||
},
|
},
|
||||||
'params': {
|
'params': {
|
||||||
'skip_download': 'live stream',
|
'skip_download': 'live stream',
|
||||||
|
@ -234,29 +217,22 @@ class RTVELiveIE(InfoExtractor):
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
start_time = time.gmtime()
|
|
||||||
video_id = mobj.group('id')
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
|
title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
|
||||||
title = remove_start(title, 'Estoy viendo ')
|
title = remove_start(title, 'Estoy viendo ')
|
||||||
title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)
|
|
||||||
|
|
||||||
vidplayer_id = self._search_regex(
|
vidplayer_id = self._search_regex(
|
||||||
(r'playerId=player([0-9]+)',
|
(r'playerId=player([0-9]+)',
|
||||||
r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
|
r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
|
||||||
r'data-id=["\'](\d+)'),
|
r'data-id=["\'](\d+)'),
|
||||||
webpage, 'internal video ID')
|
webpage, 'internal video ID')
|
||||||
png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id
|
|
||||||
png = self._download_webpage(png_url, video_id, 'Downloading url information')
|
|
||||||
m3u8_url = _decrypt_url(png)
|
|
||||||
formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
|
|
||||||
self._sort_formats(formats)
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'title': title,
|
'title': self._live_title(title),
|
||||||
'formats': formats,
|
'formats': self._extract_png_formats(vidplayer_id),
|
||||||
'is_live': True,
|
'is_live': True,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue