Start work on using browser network stack for recipes

This commit is contained in:
Kovid Goyal 2024-08-06 08:56:41 +05:30
parent bf7c76f79e
commit 5031269baf
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 65 additions and 2 deletions

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
from typing import Union
from qt.core import QNetworkCookie, QObject, Qt, QUrl, pyqtSignal
from qt.webengine import QWebEngineDownloadRequest, QWebEnginePage, QWebEngineUrlRequestInfo, QWebEngineUrlRequestInterceptor
from .simple_backend import create_base_profile
class RequestInterceptor(QWebEngineUrlRequestInterceptor):
def interceptRequest(self, req: QWebEngineUrlRequestInfo) -> None:
pass
class FetchBackend(QWebEnginePage):
request_download = pyqtSignal(object, str)
def __init__(self, output_dir: str, cache_name: str = '', parent: QObject = None) -> None:
self.profile = create_base_profile(cache_name)
self.profile.downloadRequested.connect(self._download_requested)
self.profile.setDownloadPath(output_dir)
super().__init__(self.profile, parent)
self.interceptor = RequestInterceptor(self)
self.profile.setUrlRequestInterceptor(self.interceptor)
self.request_download.connect(self.download, type=Qt.ConnectionType.QueuedConnection)
def download(self, url: Union[str, QUrl], filename_or_path: str = '') -> str:
if isinstance(url, str):
url = QUrl(url)
super().download(url, filename_or_path)
return bytes(url.toEncoded()).decode()
def _download_requested(self, dr: QWebEngineDownloadRequest) -> None:
dr.accept()
dr.isFinishedChanged.connect(self._download_finished)
def _download_finished(self) -> None:
dr: QWebEngineDownloadRequest = self.sender()
s = dr.state()
url = bytes(dr.url().toEncoded()).decode()
if s == QWebEngineDownloadRequest.DownloadState.DownloadInterrupted:
print(99999999, url, dr.interruptReasonString())
elif s == QWebEngineDownloadRequest.DownloadState.DownloadCompleted:
print(1111111, dr, url, dr.downloadFileName())
def set_user_agent(self, new_val: str) -> None:
self.profile.setHttpUserAgent(new_val)
def set_simple_cookie(self, name, value, domain, path='/'):
cs = self.profile.cookieStore()
cookie_string = f'{name}={value}; Domain={domain}; Path={path}'
for c in QNetworkCookie.parseCookies(cookie_string):
cs.setCookie(c)

View File

@ -23,8 +23,7 @@ def canonicalize_qurl(qurl):
return qurl
@lru_cache(maxsize=None)
def create_profile(cache_name='', allow_js=False):
def create_base_profile(cache_name='', allow_js=False):
from calibre.utils.random_ua import random_common_chrome_user_agent
if cache_name:
ans = QWebEngineProfile(cache_name, QApplication.instance())
@ -43,6 +42,12 @@ def create_profile(cache_name='', allow_js=False):
# ensure javascript cannot read from local files
a(QWebEngineSettings.WebAttribute.LocalContentCanAccessFileUrls, False)
a(QWebEngineSettings.WebAttribute.AllowWindowActivationFromJavaScript, False)
return ans
@lru_cache(maxsize=None)
def create_profile(cache_name='', allow_js=False):
ans = create_base_profile(cache_name, allow_js)
js = P('scraper.js', allow_user_override=False, data=True).decode('utf-8')
ans.token = secrets.token_hex()
js = js.replace('TOKEN', ans.token)