mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Start work on using browser network stack for recipes
This commit is contained in:
parent
bf7c76f79e
commit
5031269baf
58
src/calibre/scraper/fetch_backend.py
Normal file
58
src/calibre/scraper/fetch_backend.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
from qt.core import QNetworkCookie, QObject, Qt, QUrl, pyqtSignal
|
||||||
|
from qt.webengine import QWebEngineDownloadRequest, QWebEnginePage, QWebEngineUrlRequestInfo, QWebEngineUrlRequestInterceptor
|
||||||
|
|
||||||
|
from .simple_backend import create_base_profile
|
||||||
|
|
||||||
|
|
||||||
|
class RequestInterceptor(QWebEngineUrlRequestInterceptor):
|
||||||
|
|
||||||
|
def interceptRequest(self, req: QWebEngineUrlRequestInfo) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class FetchBackend(QWebEnginePage):
|
||||||
|
|
||||||
|
request_download = pyqtSignal(object, str)
|
||||||
|
|
||||||
|
def __init__(self, output_dir: str, cache_name: str = '', parent: QObject = None) -> None:
|
||||||
|
self.profile = create_base_profile(cache_name)
|
||||||
|
self.profile.downloadRequested.connect(self._download_requested)
|
||||||
|
self.profile.setDownloadPath(output_dir)
|
||||||
|
super().__init__(self.profile, parent)
|
||||||
|
self.interceptor = RequestInterceptor(self)
|
||||||
|
self.profile.setUrlRequestInterceptor(self.interceptor)
|
||||||
|
self.request_download.connect(self.download, type=Qt.ConnectionType.QueuedConnection)
|
||||||
|
|
||||||
|
def download(self, url: Union[str, QUrl], filename_or_path: str = '') -> str:
|
||||||
|
if isinstance(url, str):
|
||||||
|
url = QUrl(url)
|
||||||
|
super().download(url, filename_or_path)
|
||||||
|
return bytes(url.toEncoded()).decode()
|
||||||
|
|
||||||
|
def _download_requested(self, dr: QWebEngineDownloadRequest) -> None:
|
||||||
|
dr.accept()
|
||||||
|
dr.isFinishedChanged.connect(self._download_finished)
|
||||||
|
|
||||||
|
def _download_finished(self) -> None:
|
||||||
|
dr: QWebEngineDownloadRequest = self.sender()
|
||||||
|
s = dr.state()
|
||||||
|
url = bytes(dr.url().toEncoded()).decode()
|
||||||
|
if s == QWebEngineDownloadRequest.DownloadState.DownloadInterrupted:
|
||||||
|
print(99999999, url, dr.interruptReasonString())
|
||||||
|
elif s == QWebEngineDownloadRequest.DownloadState.DownloadCompleted:
|
||||||
|
print(1111111, dr, url, dr.downloadFileName())
|
||||||
|
|
||||||
|
def set_user_agent(self, new_val: str) -> None:
|
||||||
|
self.profile.setHttpUserAgent(new_val)
|
||||||
|
|
||||||
|
def set_simple_cookie(self, name, value, domain, path='/'):
|
||||||
|
cs = self.profile.cookieStore()
|
||||||
|
cookie_string = f'{name}={value}; Domain={domain}; Path={path}'
|
||||||
|
for c in QNetworkCookie.parseCookies(cookie_string):
|
||||||
|
cs.setCookie(c)
|
@ -23,8 +23,7 @@ def canonicalize_qurl(qurl):
|
|||||||
return qurl
|
return qurl
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=None)
|
def create_base_profile(cache_name='', allow_js=False):
|
||||||
def create_profile(cache_name='', allow_js=False):
|
|
||||||
from calibre.utils.random_ua import random_common_chrome_user_agent
|
from calibre.utils.random_ua import random_common_chrome_user_agent
|
||||||
if cache_name:
|
if cache_name:
|
||||||
ans = QWebEngineProfile(cache_name, QApplication.instance())
|
ans = QWebEngineProfile(cache_name, QApplication.instance())
|
||||||
@ -43,6 +42,12 @@ def create_profile(cache_name='', allow_js=False):
|
|||||||
# ensure javascript cannot read from local files
|
# ensure javascript cannot read from local files
|
||||||
a(QWebEngineSettings.WebAttribute.LocalContentCanAccessFileUrls, False)
|
a(QWebEngineSettings.WebAttribute.LocalContentCanAccessFileUrls, False)
|
||||||
a(QWebEngineSettings.WebAttribute.AllowWindowActivationFromJavaScript, False)
|
a(QWebEngineSettings.WebAttribute.AllowWindowActivationFromJavaScript, False)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=None)
|
||||||
|
def create_profile(cache_name='', allow_js=False):
|
||||||
|
ans = create_base_profile(cache_name, allow_js)
|
||||||
js = P('scraper.js', allow_user_override=False, data=True).decode('utf-8')
|
js = P('scraper.js', allow_user_override=False, data=True).decode('utf-8')
|
||||||
ans.token = secrets.token_hex()
|
ans.token = secrets.token_hex()
|
||||||
js = js.replace('TOKEN', ans.token)
|
js = js.replace('TOKEN', ans.token)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user