From 2c8fa677b235757964ae2cb0014eea079f7fe38a Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Sun, 23 May 2021 17:44:08 +0200 Subject: [PATCH] [tiktok] deduplicate videos --- haruhi_dl/extractor/tiktok.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/haruhi_dl/extractor/tiktok.py b/haruhi_dl/extractor/tiktok.py index 3fb22c4de..6882c1ac4 100644 --- a/haruhi_dl/extractor/tiktok.py +++ b/haruhi_dl/extractor/tiktok.py @@ -155,6 +155,16 @@ class TikTokIE(TikTokBaseIE): class TikTokPlaywrightBaseIE(TikTokBaseIE): + def _dedup_by_ids(self, items): + deduped = [] + dedids = [] + for item in deduped: + if item['id'] in dedids: + continue + dedids.append(item['id']) + deduped.append(item) + return deduped + def _scroll_the_page(self, page, item_list_re, display_id): if page.title() == 'tiktok-verify-page': raise ExtractorError('TikTok requires captcha, use --cookies') @@ -223,6 +233,7 @@ class TikTokUserIE(TikTokPlaywrightBaseIE): next_data_items = try_get(page_props, lambda x: x['items'], expected_type=list) if next_data_items: items = next_data_items + items + items = self._dedup_by_ids(items) info_dict = { '_type': 'playlist', @@ -265,6 +276,7 @@ class TikTokHashtagIE(TikTokPlaywrightBaseIE): next_data_items = try_get(page_props, lambda x: x['items'], expected_type=list) if next_data_items: items = next_data_items + items + items = self._dedup_by_ids(items) return { '_type': 'playlist', @@ -333,6 +345,7 @@ class TikTokMusicIE(TikTokPlaywrightBaseIE): next_data_items = try_get(page_props, lambda x: x['items'], expected_type=list) if next_data_items: items = next_data_items + items + items = self._dedup_by_ids(items) info_dict = { '_type': 'playlist',