[youtube] history, subscriptions

[youtube] liked, watch later support (#2 )
update readme because why the fuck not
2020-11-24 21:48:21 +01:00 · 2020-11-24 16:58:50 +01:00 · 2020-11-22 23:38:10 +01:00
3 changed files with 147 additions and 13 deletions
--- a/README.md
+++ b/README.md
@ -3,6 +3,8 @@
 [![build status](https://img.shields.io/gitlab/pipeline/laudom/haruhi-dl/master?gitlab_url=https%3A%2F%2Fgit.sakamoto.pl&style=flat-square)](https://git.sakamoto.pl/laudom/haruhi-dl/-/pipelines)
 [![PyPI Downloads](https://img.shields.io/pypi/dm/haruhi-dl?style=flat-square)](https://pypi.org/project/haruhi-dl/)
 [![License: LGPL 3.0 or later](https://img.shields.io/pypi/l/haruhi-dl?style=flat-square)](https://git.sakamoto.pl/laudom/haruhi-dl/-/blob/master/README.md)
+[![Sasin stole 70 million PLN](https://img.shields.io/badge/Sasin-stole%2070%20million%20PLN-orange?style=flat-square)](https://www.planeta.pl/Wiadomosci/Polityka/Ile-kosztowaly-karty-wyborcze-Sasin-do-wiezienia-Wybory-odwolane)
+![Trans rights!](https://img.shields.io/badge/Trans-rights!-5BCEFA?style=flat-square)

 This is a fork of [youtube-dl](https://yt-dl.org/), focused on bringing a fast, steady stream of updates. We'll do our best to merge patches to any site, not only youtube.

--- a/haruhi_dl/extractor/extractors.py
+++ b/haruhi_dl/extractor/extractors.py
@ -1485,8 +1485,12 @@ from .yourupload import YourUploadIE
 from .youtube import (
    YoutubeIE,
    YoutubeChannelIE,
+    YoutubeHistoryIE,
+    YoutubeLikedIE,
    YoutubePlaylistIE,
    YoutubeSearchIE,
+    YoutubeSubscriptionsIE,
+    YoutubeWatchLaterIE,
    YoutubeTruncatedIDIE,
    YoutubeTruncatedURLIE,
 )
--- a/haruhi_dl/extractor/youtube.py
+++ b/haruhi_dl/extractor/youtube.py
@ -1,7 +1,9 @@
 # coding: utf-8
 from __future__ import unicode_literals

+from datetime import datetime
 import json
+import hashlib
 import os.path
 import random
 import re
@ -58,7 +60,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
    # If True it will raise an error if no login info is provided
    _LOGIN_REQUIRED = False

-    _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
+    _PLAYLIST_ID_RE = r'(?:LL|WL|(?:PL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,})'

    _YOUTUBE_CLIENT_HEADERS = {
        'x-youtube-client-name': '1',
@ -2285,9 +2287,11 @@ class YoutubeBaseListInfoExtractor(YoutubeBaseInfoExtractor):
        entries = videos['entries']
        continuation_token = videos['continuation']
        if continuation_token and (not is_search or results):
+            session_id = self._search_regex(r'ytcfg\.set\({.*?"DELEGATED_SESSION_ID":"(\d+)"',
+                                            webpage, 'session id', fatal=False)
            page_no = 2
            while continuation_token is not None and (len(entries) < results if results else True):
-                cont_res = self._download_continuation(continuation_token, list_id, page_no)
+                cont_res = self._download_continuation(continuation_token, list_id, page_no, session_id=session_id)
                cont_parser = self._parse_continuation_video_list
                if not cont_parser:
                    cont_parser = self._parse_init_video_list
@ -2301,6 +2305,7 @@ class YoutubeBaseListInfoExtractor(YoutubeBaseInfoExtractor):
        info_dict = {
            '_type': 'playlist',
            'id': list_id,
+            'entries': [],
        }
        if 'info_dict' in videos:
            info_dict.update(videos['info_dict'])
@ -2310,7 +2315,6 @@ class YoutubeBaseListInfoExtractor(YoutubeBaseInfoExtractor):
            else:
                info_dict['title'] = self._og_search_title(webpage)

-        info_dict['entries'] = []
        for _entry in entries:
            if _entry:
                entry = {
@ -2348,15 +2352,29 @@ class YoutubeYti1ListInfoExtractor(YoutubeBaseListInfoExtractor):
        },
    }

-    def _download_continuation(self, continuation, list_id, page_no):
-        return self._download_json(self._ACTION_URL % (self._ACTION_NAME), list_id,
-                                   note='Downloading %s page #%d (yti1)' % (self._LIST_NAME, page_no),
-                                   headers={
-                                       'Content-Type': 'application/json',
-        }, data=bytes(json.dumps({
+    def _download_continuation(self, continuation, list_id, page_no, session_id=None):
+        data = {
            'context': self._YTI_CONTEXT,
            'continuation': continuation,
-        }), encoding='utf-8'))
+        }
+        headers = {
+            'Content-Type': 'application/json',
+            'Origin': 'https://www.youtube.com',
+        }
+        if session_id:
+            data['context'].setdefault('user', {})['onBehalfOfUser'] = session_id
+            sapisid = self._get_cookies('https://www.youtube.com').get('SAPISID').value
+            if sapisid:
+                timestamp = str(int(datetime.now().timestamp()))
+                sapisidhash = '%s_%s' % (
+                    timestamp,
+                    hashlib.sha1(' '.join((timestamp, sapisid, 'https://www.youtube.com')).encode('utf-8')).hexdigest(),
+                )
+                headers['Authorization'] = 'SAPISIDHASH %s' % sapisidhash
+        return self._download_json(self._ACTION_URL % (self._ACTION_NAME), list_id,
+                                   note='Downloading %s page #%d (yti1)' % (self._LIST_NAME, page_no),
+                                   headers=headers,
+                                   data=bytes(json.dumps(data), encoding='utf-8'))


 class YoutubeChannelIE(YoutubeAjaxListInfoExtractor):
@ -2408,7 +2426,7 @@ class YoutubeChannelIE(YoutubeAjaxListInfoExtractor):
        }


-class YoutubePlaylistIE(YoutubeAjaxListInfoExtractor):
+class YoutubePlaylistIE(YoutubeYti1ListInfoExtractor):
    IE_NAME = 'youtube:playlist'
    _VALID_URL = r'(?:https?://(?:\w+\.)?youtube\.com/(?:playlist\?(?:[^&;]+[&;])*|watch\?(?:[^&;]+[&;])*playnext=1&(?:[^&;]+[&;])*)list=|ytplaylist:)?(?P<id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
    _LIST_NAME = 'playlist'
@ -2434,12 +2452,15 @@ class YoutubePlaylistIE(YoutubeAjaxListInfoExtractor):
        }
    }]

+    def _handle_url(self, url):
+        return 'https://www.youtube.com/playlist?list=%s' % self._match_id(url)
+
    def _parse_init_video_list(self, data):
        renderer = try_get(data, [
            # initial
            lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer'],
-            # continuation ajax
-            lambda x: x[1]['response']['onResponseReceivedActions'][0]['appendContinuationItemsAction'],
+            # continuation yti1
+            lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction'],
        ])
        if not renderer:
            raise ExtractorError('Could not extract %s item list renderer' % self._LIST_NAME)
@ -2524,6 +2545,113 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeYti1ListInfoExtractor):
        return self._searcher('ytsearch', results=n, query=query)


+class YoutubeLikedIE(InfoExtractor):
+    _VALID_URL = r':yt(?:fav(?:ourites)?|liked)'
+    _LOGIN_REQUIRED = True
+
+    def _real_extract(self, url):
+        return {
+            '_type': 'url',
+            'url': 'ytplaylist:LL',
+            'ie_key': 'YoutubePlaylist',
+        }
+
+
+class YoutubeWatchLaterIE(InfoExtractor):
+    _VALID_URL = r':ytw(?:atchlater|l)'
+    _LOGIN_REQUIRED = True
+
+    def _real_extract(self, url):
+        return {
+            '_type': 'url',
+            'url': 'ytplaylist:WL',
+            'ie_key': 'YoutubePlaylist',
+        }
+
+
+class YoutubeBaseShelfInfoExtractor(YoutubeYti1ListInfoExtractor):
+    def _parse_init_video_list(self, data):
+        shelf_renderer = try_get(data, [
+            # initial subscriptions
+            lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['shelfRenderer'],
+            # initial history
+            lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['itemSectionRenderer'],
+            # continuation yti1
+            lambda x: x['continuationContents']['sectionListContinuation']['contents'][0]['itemSectionRenderer']['contents'][0]['shelfRenderer'],
+        ])
+        if not shelf_renderer:
+            raise ExtractorError('Could not extract %s shelf list renderer' % self._LIST_NAME)
+        entries = []
+        for shelf in shelf_renderer:
+            rend_items = try_get(shelf_renderer['content']['gridRenderer'], [
+                # initial subscriptions
+                lambda x: x['items'],
+                # continuation ajax
+                lambda x: x['continuationItems'],
+            ])
+            if not rend_items:
+                raise ExtractorError('Could not extract %s renderer item list' % self._LIST_NAME)
+            for item in rend_items:
+                entries.append(self._parse_video(item, entry_key='gridVideoRenderer'))
+        return {
+            'entries': entries,
+            'continuation': try_get(data, [
+                # initial
+                lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['continuations'][0]['nextContinuationData']['continuation'],
+                # continuation yti1
+                lambda x: x['continuationContents']['sectionListContinuation']['continuations'][0]['nextContinuationData']['continuation']
+            ], expected_type=compat_str),
+            'info_dict': {
+                'title': self._LIST_NAME,
+            },
+        }
+
+
+class YoutubeSubscriptionsIE(YoutubeBaseShelfInfoExtractor):
+    _VALID_URL = r'(?:https?://(?:www\.)youtube\.com/feed/|:yt)(?P<id>subs(?:criptions)?)'
+    IE_NAME = 'youtube:subscriptions'
+    _LIST_NAME = 'subscriptions'
+    _LOGIN_REQUIRED = True
+
+    def _handle_url(self, url):
+        return 'https://www.youtube.com/feed/subscriptions'
+
+
+class YoutubeHistoryIE(YoutubeYti1ListInfoExtractor):
+    _VALID_URL = r'(?:https?://(?:www\.)youtube\.com/feed/|:yt)(?P<id>history)'
+    IE_NAME = 'youtube:history'
+    _LIST_NAME = 'history'
+    _LOGIN_REQUIRED = True
+
+    def _handle_url(self, url):
+        return 'https://www.youtube.com/feed/history'
+
+    def _parse_init_video_list(self, data):
+        rend_items = try_get(data, [
+            # initial
+            lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'],
+            # continuation yti1
+            lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'][0]['itemSectionRenderer']['contents'],
+        ])
+        if not rend_items:
+            raise ExtractorError('Could not extract %s renderer item list' % self._LIST_NAME)
+        entries = []
+        for item in rend_items:
+            entries.append(self._parse_video(item, entry_key='videoRenderer'))
+        return {
+            'entries': entries,
+            'continuation': try_get(data, [
+                # initial
+                lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
+                # continuation yti1
+                lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'][-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
+            ], expected_type=compat_str),
+            'info_dict': {
+                'title': self._LIST_NAME,
+            },
+        }
+
+
 class YoutubeTruncatedURLIE(InfoExtractor):
    IE_NAME = 'youtube:truncated_url'
    IE_DESC = False  # Do not list
Author	SHA1	Message	Date
Laura Liberda	40638606b9	[youtube] history, subscriptions	2020-11-24 21:48:21 +01:00
Laura Liberda	75c1755cc1	[youtube] liked, watch later support (#2 )	2020-11-24 16:58:50 +01:00
Laura Liberda	ea7336113f	update readme because why the fuck not	2020-11-22 23:38:10 +01:00