Compare commits

...

3 Commits

Author SHA1 Message Date
Laura Liberda 40638606b9 [youtube] history, subscriptions 2020-11-24 21:48:21 +01:00
Laura Liberda 75c1755cc1 [youtube] liked, watch later support (#2) 2020-11-24 16:58:50 +01:00
Laura Liberda ea7336113f update readme because why the fuck not 2020-11-22 23:38:10 +01:00
3 changed files with 147 additions and 13 deletions

View File

@ -3,6 +3,8 @@
[![build status](https://img.shields.io/gitlab/pipeline/laudom/haruhi-dl/master?gitlab_url=https%3A%2F%2Fgit.sakamoto.pl&style=flat-square)](https://git.sakamoto.pl/laudom/haruhi-dl/-/pipelines)
[![PyPI Downloads](https://img.shields.io/pypi/dm/haruhi-dl?style=flat-square)](https://pypi.org/project/haruhi-dl/)
[![License: LGPL 3.0 or later](https://img.shields.io/pypi/l/haruhi-dl?style=flat-square)](https://git.sakamoto.pl/laudom/haruhi-dl/-/blob/master/README.md)
[![Sasin stole 70 million PLN](https://img.shields.io/badge/Sasin-stole%2070%20million%20PLN-orange?style=flat-square)](https://www.planeta.pl/Wiadomosci/Polityka/Ile-kosztowaly-karty-wyborcze-Sasin-do-wiezienia-Wybory-odwolane)
![Trans rights!](https://img.shields.io/badge/Trans-rights!-5BCEFA?style=flat-square)
This is a fork of [youtube-dl](https://yt-dl.org/), focused on bringing a fast, steady stream of updates. We'll do our best to merge patches to any site, not only youtube.

View File

@ -1485,8 +1485,12 @@ from .yourupload import YourUploadIE
from .youtube import (
YoutubeIE,
YoutubeChannelIE,
YoutubeHistoryIE,
YoutubeLikedIE,
YoutubePlaylistIE,
YoutubeSearchIE,
YoutubeSubscriptionsIE,
YoutubeWatchLaterIE,
YoutubeTruncatedIDIE,
YoutubeTruncatedURLIE,
)

View File

@ -1,7 +1,9 @@
# coding: utf-8
from __future__ import unicode_literals
from datetime import datetime
import json
import hashlib
import os.path
import random
import re
@ -58,7 +60,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
# If True it will raise an error if no login info is provided
_LOGIN_REQUIRED = False
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
_PLAYLIST_ID_RE = r'(?:LL|WL|(?:PL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,})'
_YOUTUBE_CLIENT_HEADERS = {
'x-youtube-client-name': '1',
@ -2285,9 +2287,11 @@ class YoutubeBaseListInfoExtractor(YoutubeBaseInfoExtractor):
entries = videos['entries']
continuation_token = videos['continuation']
if continuation_token and (not is_search or results):
session_id = self._search_regex(r'ytcfg\.set\({.*?"DELEGATED_SESSION_ID":"(\d+)"',
webpage, 'session id', fatal=False)
page_no = 2
while continuation_token is not None and (len(entries) < results if results else True):
cont_res = self._download_continuation(continuation_token, list_id, page_no)
cont_res = self._download_continuation(continuation_token, list_id, page_no, session_id=session_id)
cont_parser = self._parse_continuation_video_list
if not cont_parser:
cont_parser = self._parse_init_video_list
@ -2301,6 +2305,7 @@ class YoutubeBaseListInfoExtractor(YoutubeBaseInfoExtractor):
info_dict = {
'_type': 'playlist',
'id': list_id,
'entries': [],
}
if 'info_dict' in videos:
info_dict.update(videos['info_dict'])
@ -2310,7 +2315,6 @@ class YoutubeBaseListInfoExtractor(YoutubeBaseInfoExtractor):
else:
info_dict['title'] = self._og_search_title(webpage)
info_dict['entries'] = []
for _entry in entries:
if _entry:
entry = {
@ -2348,15 +2352,29 @@ class YoutubeYti1ListInfoExtractor(YoutubeBaseListInfoExtractor):
},
}
def _download_continuation(self, continuation, list_id, page_no):
return self._download_json(self._ACTION_URL % (self._ACTION_NAME), list_id,
note='Downloading %s page #%d (yti1)' % (self._LIST_NAME, page_no),
headers={
'Content-Type': 'application/json',
}, data=bytes(json.dumps({
def _download_continuation(self, continuation, list_id, page_no, session_id=None):
data = {
'context': self._YTI_CONTEXT,
'continuation': continuation,
}), encoding='utf-8'))
}
headers = {
'Content-Type': 'application/json',
'Origin': 'https://www.youtube.com',
}
if session_id:
data['context'].setdefault('user', {})['onBehalfOfUser'] = session_id
sapisid = self._get_cookies('https://www.youtube.com').get('SAPISID').value
if sapisid:
timestamp = str(int(datetime.now().timestamp()))
sapisidhash = '%s_%s' % (
timestamp,
hashlib.sha1(' '.join((timestamp, sapisid, 'https://www.youtube.com')).encode('utf-8')).hexdigest(),
)
headers['Authorization'] = 'SAPISIDHASH %s' % sapisidhash
return self._download_json(self._ACTION_URL % (self._ACTION_NAME), list_id,
note='Downloading %s page #%d (yti1)' % (self._LIST_NAME, page_no),
headers=headers,
data=bytes(json.dumps(data), encoding='utf-8'))
class YoutubeChannelIE(YoutubeAjaxListInfoExtractor):
@ -2408,7 +2426,7 @@ class YoutubeChannelIE(YoutubeAjaxListInfoExtractor):
}
class YoutubePlaylistIE(YoutubeAjaxListInfoExtractor):
class YoutubePlaylistIE(YoutubeYti1ListInfoExtractor):
IE_NAME = 'youtube:playlist'
_VALID_URL = r'(?:https?://(?:\w+\.)?youtube\.com/(?:playlist\?(?:[^&;]+[&;])*|watch\?(?:[^&;]+[&;])*playnext=1&(?:[^&;]+[&;])*)list=|ytplaylist:)?(?P<id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
_LIST_NAME = 'playlist'
@ -2434,12 +2452,15 @@ class YoutubePlaylistIE(YoutubeAjaxListInfoExtractor):
}
}]
def _handle_url(self, url):
return 'https://www.youtube.com/playlist?list=%s' % self._match_id(url)
def _parse_init_video_list(self, data):
renderer = try_get(data, [
# initial
lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer'],
# continuation ajax
lambda x: x[1]['response']['onResponseReceivedActions'][0]['appendContinuationItemsAction'],
# continuation yti1
lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction'],
])
if not renderer:
raise ExtractorError('Could not extract %s item list renderer' % self._LIST_NAME)
@ -2524,6 +2545,113 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeYti1ListInfoExtractor):
return self._searcher('ytsearch', results=n, query=query)
class YoutubeLikedIE(InfoExtractor):
_VALID_URL = r':yt(?:fav(?:ourites)?|liked)'
_LOGIN_REQUIRED = True
def _real_extract(self, url):
return {
'_type': 'url',
'url': 'ytplaylist:LL',
'ie_key': 'YoutubePlaylist',
}
class YoutubeWatchLaterIE(InfoExtractor):
_VALID_URL = r':ytw(?:atchlater|l)'
_LOGIN_REQUIRED = True
def _real_extract(self, url):
return {
'_type': 'url',
'url': 'ytplaylist:WL',
'ie_key': 'YoutubePlaylist',
}
class YoutubeBaseShelfInfoExtractor(YoutubeYti1ListInfoExtractor):
def _parse_init_video_list(self, data):
shelf_renderer = try_get(data, [
# initial subscriptions
lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['shelfRenderer'],
# initial history
lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['itemSectionRenderer'],
# continuation yti1
lambda x: x['continuationContents']['sectionListContinuation']['contents'][0]['itemSectionRenderer']['contents'][0]['shelfRenderer'],
])
if not shelf_renderer:
raise ExtractorError('Could not extract %s shelf list renderer' % self._LIST_NAME)
entries = []
for shelf in shelf_renderer:
rend_items = try_get(shelf_renderer['content']['gridRenderer'], [
# initial subscriptions
lambda x: x['items'],
# continuation ajax
lambda x: x['continuationItems'],
])
if not rend_items:
raise ExtractorError('Could not extract %s renderer item list' % self._LIST_NAME)
for item in rend_items:
entries.append(self._parse_video(item, entry_key='gridVideoRenderer'))
return {
'entries': entries,
'continuation': try_get(data, [
# initial
lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['continuations'][0]['nextContinuationData']['continuation'],
# continuation yti1
lambda x: x['continuationContents']['sectionListContinuation']['continuations'][0]['nextContinuationData']['continuation']
], expected_type=compat_str),
'info_dict': {
'title': self._LIST_NAME,
},
}
class YoutubeSubscriptionsIE(YoutubeBaseShelfInfoExtractor):
_VALID_URL = r'(?:https?://(?:www\.)youtube\.com/feed/|:yt)(?P<id>subs(?:criptions)?)'
IE_NAME = 'youtube:subscriptions'
_LIST_NAME = 'subscriptions'
_LOGIN_REQUIRED = True
def _handle_url(self, url):
return 'https://www.youtube.com/feed/subscriptions'
class YoutubeHistoryIE(YoutubeYti1ListInfoExtractor):
_VALID_URL = r'(?:https?://(?:www\.)youtube\.com/feed/|:yt)(?P<id>history)'
IE_NAME = 'youtube:history'
_LIST_NAME = 'history'
_LOGIN_REQUIRED = True
def _handle_url(self, url):
return 'https://www.youtube.com/feed/history'
def _parse_init_video_list(self, data):
rend_items = try_get(data, [
# initial
lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'],
# continuation yti1
lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'][0]['itemSectionRenderer']['contents'],
])
if not rend_items:
raise ExtractorError('Could not extract %s renderer item list' % self._LIST_NAME)
entries = []
for item in rend_items:
entries.append(self._parse_video(item, entry_key='videoRenderer'))
return {
'entries': entries,
'continuation': try_get(data, [
# initial
lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
# continuation yti1
lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'][-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
], expected_type=compat_str),
'info_dict': {
'title': self._LIST_NAME,
},
}
class YoutubeTruncatedURLIE(InfoExtractor):
IE_NAME = 'youtube:truncated_url'
IE_DESC = False # Do not list