From 88f5839a3799e7c3954ecedc73eb8afa4acd70ab Mon Sep 17 00:00:00 2001 From: Laura Liberda Date: Sat, 23 Jan 2021 20:14:31 +0100 Subject: [PATCH] [playwright] cookie sync, non-headless option, global playwright instance --- haruhi_dl/__init__.py | 1 + haruhi_dl/options.py | 4 ++++ haruhi_dl/playwright.py | 47 +++++++++++++++++++++++++++++++++++------ 3 files changed, 46 insertions(+), 6 deletions(-) diff --git a/haruhi_dl/__init__.py b/haruhi_dl/__init__.py index a577cba20..b90ca151d 100644 --- a/haruhi_dl/__init__.py +++ b/haruhi_dl/__init__.py @@ -414,6 +414,7 @@ def _real_main(argv=None): 'fixup': opts.fixup, 'source_address': opts.source_address, 'call_home': opts.call_home, + 'headless_playwright': opts.headless_playwright, 'sleep_interval': opts.sleep_interval, 'max_sleep_interval': opts.max_sleep_interval, 'external_downloader': opts.external_downloader, diff --git a/haruhi_dl/options.py b/haruhi_dl/options.py index b83ef9de5..acbef1584 100644 --- a/haruhi_dl/options.py +++ b/haruhi_dl/options.py @@ -675,6 +675,10 @@ def parseOpts(overrideArguments=None): '--no-call-home', dest='call_home', action='store_false', default=False, help='Do NOT contact the haruhi-dl server for debugging') + verbosity.add_option( + '--no-headless-playwright', + dest='headless_playwright', action='store_false', default=False, + help='Show browsers automated with Playwright on the screen') filesystem = optparse.OptionGroup(parser, 'Filesystem Options') filesystem.add_option( diff --git a/haruhi_dl/playwright.py b/haruhi_dl/playwright.py index c41ca5305..838d84c71 100644 --- a/haruhi_dl/playwright.py +++ b/haruhi_dl/playwright.py @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +from .compat import compat_cookiejar_Cookie from .utils import ( ExtractorError, is_outdated_version, @@ -52,27 +53,61 @@ class PlaywrightHelper(): def pw(self): if not self._pw: self._import_pw(fatal=True) - if not self.pw_instance: - self.pw_instance = self._pw().__enter__() - return self.pw_instance + if 'pw_instance' not in self._extractor._downloader.__dict__: + self._extractor._downloader.pw_instance = self._pw().__enter__() + return self._extractor._downloader.pw_instance def pw_stop(self): self.pw_instance.stop() def browser_stop(self): + self._set_cookies_from_browser(self.browser_context.cookies()) self.browser.close() - def open_page(self, url, display_id, browser_used='firefox', note='Opening page in %(browser)s'): + def _get_cookies_for_browser(self): + browser_cookies = [] + for cookie in self._extractor._downloader.cookiejar: + c = { + 'name': cookie.name, + 'value': cookie.value, + 'port': cookie.port, + 'domain': cookie.domain, + 'path': cookie.path, + # 'expires': cookie.expires, + 'secure': cookie.secure, + } + # https://github.com/microsoft/playwright-python/issues/459 + if cookie.expires: + c['expires'] = cookie.expires + browser_cookies.append(c) + return browser_cookies + + def _set_cookies_from_browser(self, cookies): + for cookie in cookies: + self._extractor._downloader.cookiejar.set_cookie( + compat_cookiejar_Cookie(0, cookie['name'], cookie['value'], cookie.get('port'), False, + cookie['domain'], False, cookie['domain'].startswith('.'), + cookie['path'], cookie['path'] != '/', + cookie['secure'], cookie['expires'], + False, None, None, None)) + + def open_page(self, url, display_id, browser_used='firefox', note='Opening page in %(browser)s', html=None): pw = self.pw() self.pw_instance = pw browser = { 'firefox': pw.firefox, 'chromium': pw.chromium, 'webkit': pw.webkit, - }[browser_used].launch() + }[browser_used].launch( + headless=self._extractor._downloader.params.get('headless_playwright', True)) self.browser = browser + browser_context = browser.new_context() + self.browser_context = browser_context + browser_context.add_cookies(self._get_cookies_for_browser()) if not self._extractor._downloader.params.get('quiet'): self._extractor.to_screen('%s: %s' % (display_id, note % {'browser': browser_used})) - page = browser.new_page() + page = browser_context.new_page() + if html: + page.set_content(html) page.goto(url) return page