diff --git a/haruhi_dl/__init__.py b/haruhi_dl/__init__.py index f4f6d8c63..95d0967e5 100644 --- a/haruhi_dl/__init__.py +++ b/haruhi_dl/__init__.py @@ -426,6 +426,7 @@ def _real_main(argv=None): 'xattr_set_filesize': opts.xattr_set_filesize, 'match_filter': match_filter, 'no_color': opts.no_color, + 'use_proxy_sites': opts.use_proxy_sites, 'ffmpeg_location': opts.ffmpeg_location, 'hls_prefer_native': opts.hls_prefer_native, 'hls_use_mpegts': opts.hls_use_mpegts, diff --git a/haruhi_dl/extractor/nitter.py b/haruhi_dl/extractor/nitter.py index 9f06e1ac2..4b35158da 100644 --- a/haruhi_dl/extractor/nitter.py +++ b/haruhi_dl/extractor/nitter.py @@ -8,6 +8,7 @@ from ..utils import ( unified_timestamp, remove_end, determine_ext, + ExtractorError, ) @@ -76,12 +77,25 @@ class NitterSHIE(SelfhostedInfoExtractor): host, video_id = self._match_id_and_host(url) base_url = ('http://' if url.startswith('http://') else 'https://') + host + if self._downloader.params.get('use_proxy_sites') is False: + return self.url_result('https://twitter.com/i/web/status/' + video_id, ie='Twitter') + if not webpage or '>Enable hls playback<' in webpage: + if self._downloader.params.get('use_proxy_sites') is None and not url.startswith('nitter:'): + return self.url_result('https://twitter.com/i/web/status/' + video_id, ie='Twitter') + self._set_cookie(host, 'hlsPlayback', 'on') if url.startswith('nitter:'): url = base_url + '/hdl/status/' + video_id webpage = self._download_webpage(url, video_id, - note='Re-downloading webpage for HLS data' if webpage else 'Downloading webpage') + note='Re-downloading webpage for HLS data' if webpage else 'Downloading webpage', + expected_status=(200, 429)) + + if '>Instance has been rate limited.<' in webpage: + if self._downloader.params.get('use_proxy_sites') is False: + raise ExtractorError('Instance has been rate limited', expected=True) + self.report_warning('Instance has been rate limited, falling back to Twitter') + return self.url_result('https://twitter.com/i/web/status/' + video_id, ie='Twitter') video_url = base_url + self._html_search_regex(r'(?:]+data-url|]+src)="([^"]+)"', webpage, 'video url') ext = determine_ext(video_url) diff --git a/haruhi_dl/options.py b/haruhi_dl/options.py index 63e11b517..aeb605b9f 100644 --- a/haruhi_dl/options.py +++ b/haruhi_dl/options.py @@ -198,6 +198,14 @@ def parseOpts(overrideArguments=None): action='store_true', dest='no_color', default=False, help='Do not emit color codes in output') + general.add_option( + '--use-proxy-sites', + action='store_true', dest='use_proxy_sites', + help='Use proxy sites (like Nitter or Invidious) to download videos') + general.add_option( + '--no-use-proxy-sites', + action='store_false', dest='use_proxy_sites', + help='Use source sites to download videos') network = optparse.OptionGroup(parser, 'Network Options') network.add_option(