mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Start work on using browser network stack for recipes
This commit is contained in:
parent
bf7c76f79e
commit
5031269baf
58
src/calibre/scraper/fetch_backend.py
Normal file
58
src/calibre/scraper/fetch_backend.py
Normal file
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python
|
||||
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
|
||||
from typing import Union
|
||||
|
||||
from qt.core import QNetworkCookie, QObject, Qt, QUrl, pyqtSignal
|
||||
from qt.webengine import QWebEngineDownloadRequest, QWebEnginePage, QWebEngineUrlRequestInfo, QWebEngineUrlRequestInterceptor
|
||||
|
||||
from .simple_backend import create_base_profile
|
||||
|
||||
|
||||
class RequestInterceptor(QWebEngineUrlRequestInterceptor):
|
||||
|
||||
def interceptRequest(self, req: QWebEngineUrlRequestInfo) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class FetchBackend(QWebEnginePage):
|
||||
|
||||
request_download = pyqtSignal(object, str)
|
||||
|
||||
def __init__(self, output_dir: str, cache_name: str = '', parent: QObject = None) -> None:
|
||||
self.profile = create_base_profile(cache_name)
|
||||
self.profile.downloadRequested.connect(self._download_requested)
|
||||
self.profile.setDownloadPath(output_dir)
|
||||
super().__init__(self.profile, parent)
|
||||
self.interceptor = RequestInterceptor(self)
|
||||
self.profile.setUrlRequestInterceptor(self.interceptor)
|
||||
self.request_download.connect(self.download, type=Qt.ConnectionType.QueuedConnection)
|
||||
|
||||
def download(self, url: Union[str, QUrl], filename_or_path: str = '') -> str:
|
||||
if isinstance(url, str):
|
||||
url = QUrl(url)
|
||||
super().download(url, filename_or_path)
|
||||
return bytes(url.toEncoded()).decode()
|
||||
|
||||
def _download_requested(self, dr: QWebEngineDownloadRequest) -> None:
|
||||
dr.accept()
|
||||
dr.isFinishedChanged.connect(self._download_finished)
|
||||
|
||||
def _download_finished(self) -> None:
|
||||
dr: QWebEngineDownloadRequest = self.sender()
|
||||
s = dr.state()
|
||||
url = bytes(dr.url().toEncoded()).decode()
|
||||
if s == QWebEngineDownloadRequest.DownloadState.DownloadInterrupted:
|
||||
print(99999999, url, dr.interruptReasonString())
|
||||
elif s == QWebEngineDownloadRequest.DownloadState.DownloadCompleted:
|
||||
print(1111111, dr, url, dr.downloadFileName())
|
||||
|
||||
def set_user_agent(self, new_val: str) -> None:
|
||||
self.profile.setHttpUserAgent(new_val)
|
||||
|
||||
def set_simple_cookie(self, name, value, domain, path='/'):
|
||||
cs = self.profile.cookieStore()
|
||||
cookie_string = f'{name}={value}; Domain={domain}; Path={path}'
|
||||
for c in QNetworkCookie.parseCookies(cookie_string):
|
||||
cs.setCookie(c)
|
@ -23,8 +23,7 @@ def canonicalize_qurl(qurl):
|
||||
return qurl
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def create_profile(cache_name='', allow_js=False):
|
||||
def create_base_profile(cache_name='', allow_js=False):
|
||||
from calibre.utils.random_ua import random_common_chrome_user_agent
|
||||
if cache_name:
|
||||
ans = QWebEngineProfile(cache_name, QApplication.instance())
|
||||
@ -43,6 +42,12 @@ def create_profile(cache_name='', allow_js=False):
|
||||
# ensure javascript cannot read from local files
|
||||
a(QWebEngineSettings.WebAttribute.LocalContentCanAccessFileUrls, False)
|
||||
a(QWebEngineSettings.WebAttribute.AllowWindowActivationFromJavaScript, False)
|
||||
return ans
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def create_profile(cache_name='', allow_js=False):
|
||||
ans = create_base_profile(cache_name, allow_js)
|
||||
js = P('scraper.js', allow_user_override=False, data=True).decode('utf-8')
|
||||
ans.token = secrets.token_hex()
|
||||
js = js.replace('TOKEN', ans.token)
|
||||
|
Loading…
x
Reference in New Issue
Block a user