haruhi-dl/haruhi_dl/playwright.py

116 lines
4.3 KiB
Python

# coding: utf-8
from http.cookiejar import Cookie
from .utils import (
ExtractorError,
is_outdated_version,
)
class PlaywrightHelper():
_pw = None
_pw_version = None
pw_instance = None
_required_pw_version = '1.8.0a1'
_extractor = None
def __init__(self, extractor):
self._extractor = extractor
@classmethod
def _check_version(cls):
if 'a' in cls._required_pw_version:
return not is_outdated_version(cls._pw_version.split('a')[0], cls._required_pw_version.split('a')[0])
return not is_outdated_version(cls._pw_version, cls._required_pw_version)
@classmethod
def _real_import_pw(cls):
from playwright._repo_version import version
cls._pw_version = version
if cls._check_version() is False:
raise ExtractorError('Playwright version %s is required (%s found)' % (cls._required_pw_version, version), expected=True)
from playwright.sync_api import sync_playwright
cls._pw = lambda x: sync_playwright()
@classmethod
def _import_pw(cls, fatal=True):
try:
cls._real_import_pw()
except ImportError as err:
if fatal is True:
if cls._extractor and cls._extractor._downloader.params.get('verbose'):
raise err
raise ExtractorError('Playwright could not be imported', expected=True)
except ExtractorError as err:
if fatal is True:
raise err
@classmethod
def _version(cls):
if not cls._pw_version:
cls._import_pw(fatal=False)
return cls._pw_version
def pw(self):
if not self._pw:
self._import_pw(fatal=True)
if 'pw_instance' not in self._extractor._downloader.__dict__:
self._extractor._downloader.pw_instance = self._pw().__enter__()
return self._extractor._downloader.pw_instance
def pw_stop(self):
self.pw_instance.stop()
def browser_stop(self):
self._set_cookies_from_browser(self.browser_context.cookies())
self.browser.close()
def _get_cookies_for_browser(self):
browser_cookies = []
for cookie in self._extractor._downloader.cookiejar:
c = {
'name': cookie.name,
'value': cookie.value,
'port': cookie.port,
'domain': cookie.domain,
'path': cookie.path,
# 'expires': cookie.expires,
'secure': cookie.secure,
}
# https://github.com/microsoft/playwright-python/issues/459
if cookie.expires:
c['expires'] = cookie.expires
browser_cookies.append(c)
return browser_cookies
def _set_cookies_from_browser(self, cookies):
for cookie in cookies:
self._extractor._downloader.cookiejar.set_cookie(
Cookie(0, cookie['name'], cookie['value'], cookie.get('port'), False,
cookie['domain'], False, cookie['domain'].startswith('.'),
cookie['path'], cookie['path'] != '/',
cookie['secure'], cookie['expires'],
False, None, None, None))
def open_page(self, url, display_id, browser_used='firefox', note='Opening page in %(browser)s', html=None):
pw = self.pw()
self.pw_instance = pw
if self._extractor._downloader.params.get('force_playwright_browser') is not None:
browser_used = self._extractor._downloader.params.get('force_playwright_browser')
browser = {
'firefox': pw.firefox,
'chromium': pw.chromium,
'webkit': pw.webkit,
}[browser_used].launch(
headless=self._extractor._downloader.params.get('headless_playwright', True))
self.browser = browser
browser_context = browser.new_context()
self.browser_context = browser_context
browser_context.add_cookies(self._get_cookies_for_browser())
if not self._extractor._downloader.params.get('quiet'):
self._extractor.to_screen('%s: %s' % (display_id, note % {'browser': browser_used}))
page = browser_context.new_page()
if html:
page.set_content(html)
page.goto(url)
return page