From b37f3b7703e0f199ffbd4df1a8d1455f49227e2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=3D=3FUTF-8=3Fq=3FSergey=3D20M=3DE2=3D80=3DA4=3F=3D?= Date: Fri, 26 Feb 2021 18:02:37 +0100 Subject: [PATCH] [pornhub] Implement lazy playlist extraction --- haruhi_dl/extractor/pornhub.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/haruhi_dl/extractor/pornhub.py b/haruhi_dl/extractor/pornhub.py index d2524a090..97b5508eb 100644 --- a/haruhi_dl/extractor/pornhub.py +++ b/haruhi_dl/extractor/pornhub.py @@ -545,13 +545,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): ]+\bid=["\']moreDataBtn ''', webpage) is not None - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - host = mobj.group('host') - item_id = mobj.group('id') - - self._login(host) - + def _entries(self, url, host, item_id): page = self._extract_page(url) VIDEOS = '/videos' @@ -564,7 +558,6 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): def is_404(e): return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404 - entries = [] base_url = url has_page = page is not None first_page = page if has_page else 1 @@ -588,11 +581,19 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): page_entries = self._extract_entries(webpage, host) if not page_entries: break - entries.extend(page_entries) + for e in page_entries: + yield e if not self._has_more(webpage): break - return self.playlist_result(orderedSet(entries), item_id) + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + item_id = mobj.group('id') + + self._login(host) + + return self.playlist_result(self._entries(url, host, item_id), item_id) class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):