From 7e9e7a3c918b46e943caeb659b6f8bbbfd5aca9f Mon Sep 17 00:00:00 2001 From: Dominika Date: Sun, 8 Nov 2020 00:40:17 +0100 Subject: [PATCH] version 2020.11.08 --- ChangeLog | 13 ++++++++++--- haruhi_dl/extractor/polskieradio.py | 7 ++++--- haruhi_dl/extractor/youtube.py | 14 +++++++------- haruhi_dl/version.py | 2 +- 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/ChangeLog b/ChangeLog index 495635ae4..754b3c92d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,11 +1,18 @@ +version 2020.11.08 +Extractors +* [youtube] age-gate fixes +* [aliexpress] Added extractor +* [polskieradio] Fixed title extraction +* [tiktok] Added extractor + version 2020.11.06 Extractors -* youtube fixes -* Added onnetwork extractor +* [youtube] fixes +* [onnetwork] Added extractor version 2020.11.01.01 Extractors -* fixed youtube? +* [youtube] fixes like/dislike extraction version 2020.11.01 Core diff --git a/haruhi_dl/extractor/polskieradio.py b/haruhi_dl/extractor/polskieradio.py index 978d6f813..0d412c571 100644 --- a/haruhi_dl/extractor/polskieradio.py +++ b/haruhi_dl/extractor/polskieradio.py @@ -78,9 +78,11 @@ class PolskieRadioIE(InfoExtractor): media_urls = set() + title = self._og_search_title(webpage).strip() + for data_media in re.findall(r'<[^>]+data-media=({[^>]+})', content): media = self._parse_json(data_media, playlist_id, fatal=False) - if not media.get('file') or not media.get('desc'): + if not media.get('file'): continue media_url = self._proto_relative_url(media['file'], 'http:') if media_url in media_urls: @@ -89,14 +91,13 @@ class PolskieRadioIE(InfoExtractor): entries.append({ 'id': compat_str(media['id']), 'url': media_url, - 'title': compat_urllib_parse_unquote(media['desc']), + 'title': compat_urllib_parse_unquote(media['desc']) or title, 'duration': int_or_none(media.get('length')), 'vcodec': 'none' if media.get('provider') == 'audio' else None, 'timestamp': timestamp, 'thumbnail': thumbnail_url }) - title = self._og_search_title(webpage).strip() description = strip_or_none(self._og_search_description(webpage)) return self.playlist_result(entries, playlist_id, title, description) diff --git a/haruhi_dl/extractor/youtube.py b/haruhi_dl/extractor/youtube.py index f49ec7dd4..f4c68c26f 100644 --- a/haruhi_dl/extractor/youtube.py +++ b/haruhi_dl/extractor/youtube.py @@ -1680,8 +1680,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): data = compat_urllib_parse_urlencode({ 'video_id': video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id, - 'sts': self._search_regex( - r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''), +# 'sts': self._search_regex( + # r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''), }) video_info_url = proto + '://www.youtube.com/get_video_info?' + data try: @@ -1931,11 +1931,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): player_url = self._search_regex( ASSETS_RE, embed_webpage, 'JS player URL') - if player_url is None: - player_url_json = self._search_regex( - r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")', - video_webpage, 'age gate player URL') - player_url = json.loads(player_url_json) + #if player_url is None: + # player_url_json = self._search_regex( + # r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")', + # video_webpage, 'age gate player URL') + # player_url = json.loads(player_url_json) if 'sig' in url_data: url += '&signature=' + url_data['sig'][0] diff --git a/haruhi_dl/version.py b/haruhi_dl/version.py index 20b86980f..a14552f74 100644 --- a/haruhi_dl/version.py +++ b/haruhi_dl/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.11.06' +__version__ = '2020.11.08'