diff --git a/src/calibre/scraper/qt.py b/src/calibre/scraper/qt.py index 7b79b30b95..7c4dd0eb8b 100644 --- a/src/calibre/scraper/qt.py +++ b/src/calibre/scraper/qt.py @@ -23,6 +23,8 @@ class FakeResponse: self.queue = Queue() self.done = False self.final_url = '' + self._status = None + self._headers = [] self.data = BytesIO() def _wait(self): @@ -33,6 +35,8 @@ class FakeResponse: if res['action'] == 'input_error': raise Exception(res['error']) self.final_url = res['final_url'] + self._status = res.get('http_code') + self._headers = res['headers'] if 'error' in res: ex = URLError(res['error']) ex.worth_retry = bool(res.get('worth_retry')) @@ -51,10 +55,35 @@ class FakeResponse: def tell(self, *a, **kw): return self.data.tell(*a, **kw) - def geturl(self): + @property + def url(self) -> str: self._wait() return self.final_url + @property + def status(self) -> int | None: + self._wait() + return self._status + code = status + + @property + def headers(self): + self._wait() + from email.message import EmailMessage + ans = EmailMessage() + for k, v in self._headers: + ans[k] = v + return ans + + def getcode(self) -> int | None: + return self.status + + def geturl(self): + return self.url + + def getinfo(self): + return self.headers + def close(self): self.data.close() @@ -206,9 +235,18 @@ class Browser: self.shutdown() -def run_worker(tdir: str, user_agent: str, verify_ssl_certificates: bool): +class WebEngineBrowser(Browser): + + def is_method_ok(self, method: str) -> bool: + return method.upper() in ('GET', 'POST') + + def run_worker(self) -> subprocess.Popen: + return run_worker(self.tdir, self.user_agent, self.verify_ssl_certificates, function='webengine_worker') + + +def run_worker(tdir: str, user_agent: str, verify_ssl_certificates: bool, function: str = 'worker'): from calibre.utils.ipc.simple_worker import start_pipe_worker - return start_pipe_worker(f'from calibre.scraper.qt import worker; worker({tdir!r}, {user_agent!r}, {verify_ssl_certificates!r})') + return start_pipe_worker(f'from calibre.scraper.qt import {function}; {function}({tdir!r}, {user_agent!r}, {verify_ssl_certificates!r})') def worker(*args): @@ -218,6 +256,13 @@ def worker(*args): worker(*args) +def webengine_worker(*args): + from calibre.gui2 import must_use_qt + must_use_qt() + from .webengine_backend import worker + worker(*args) + + def develop(): import sys br = Browser() diff --git a/src/calibre/scraper/qt_backend.py b/src/calibre/scraper/qt_backend.py index 0339005449..3dcefbbef9 100644 --- a/src/calibre/scraper/qt_backend.py +++ b/src/calibre/scraper/qt_backend.py @@ -104,8 +104,17 @@ class DownloadRequest(QObject): def as_result(self) -> dict[str, str]: self.save_data() e = self.reply.error() - result = {'action': 'finished', 'id': self.req_id, 'url': self.url, 'output': self.output_path, - 'final_url': qurl_to_string(self.reply.url())} + result = { + 'action': 'finished', 'id': self.req_id, 'url': self.url, 'output': self.output_path, + 'final_url': qurl_to_string(self.reply.url()), 'headers': [] + } + h = result['headers'] + for (k, v) in self.reply.rawHeaderPairs(): + h.append((bytes(k).decode('utf-8', 'replace'), bytes(v).decode('utf-8', 'replace'))) + if code := self.reply.attribute(QNetworkRequest.Attribute.HttpStatusCodeAttribute): + result['http_code'] = code + if msg := self.reply.attribute(QNetworkRequest.Attribute.HttpReasonPhraseAttribute): + result['http_status_message'] = msg if e != QNetworkReply.NetworkError.NoError: if e in ( @@ -299,10 +308,10 @@ def read_commands(backend: FetchBackend, tdir: str) -> None: backend.input_finished.emit(error_msg) -def worker(tdir: str, user_agent: str, verify_ssl_certificates: bool) -> None: +def worker(tdir: str, user_agent: str, verify_ssl_certificates: bool, backend_class: type = FetchBackend) -> None: app = QApplication.instance() sys.stdout = sys.stderr - backend = FetchBackend(parent=app, user_agent=user_agent, output_dir=tdir, verify_ssl_certificates=verify_ssl_certificates) + backend = backend_class(parent=app, user_agent=user_agent, output_dir=tdir, verify_ssl_certificates=verify_ssl_certificates) try: read_thread = Thread(target=read_commands, args=(backend, tdir), daemon=True) read_thread.start()