From d7404bcf1b75eec8878e6ee32c3beb147b12cee9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 9 Aug 2024 12:53:53 +0530 Subject: [PATCH] More tests including for timeouts --- src/calibre/scraper/fetch_backend.py | 56 +++++++++-------------- src/calibre/scraper/test_fetch_backend.py | 44 ++++++++++++++---- 2 files changed, 56 insertions(+), 44 deletions(-) diff --git a/src/calibre/scraper/fetch_backend.py b/src/calibre/scraper/fetch_backend.py index a720972f70..4f623c5ae2 100644 --- a/src/calibre/scraper/fetch_backend.py +++ b/src/calibre/scraper/fetch_backend.py @@ -26,10 +26,11 @@ class RequestInterceptor(QWebEngineUrlRequestInterceptor): def interceptRequest(self, req: QWebEngineUrlRequestInfo) -> None: fb: FetchBackend = self.parent() if fb: - key = qurl_to_key(req.requestUrl()) - if dr := fb.download_requests.get(key): - for (name, val) in dr.headers: - req.setHttpHeader(name.encode(), val.encode()) + url_key = qurl_to_key(req.requestUrl()) + for dr in fb.live_requests: + if dr.url_key == url_key: + for k, v in dr.headers: + req.setHttpHeader(k.encode(), v.encode()) def qurl_to_string(url: QUrl | str) -> str: @@ -47,21 +48,20 @@ class DownloadRequest: cancel_on_start: bool = False error: str = '' - finished: bool = False worth_retry: bool = False webengine_download_request: QWebEngineDownloadRequest | None = None - def __init__(self, url: str, filename: str, headers: Headers | None = None, timeout: float = default_timeout): + def __init__(self, url: str, filename: str, headers: Headers | None = None, timeout: float = default_timeout, req_id: int = 0): self.url, self.filename = url, filename self.url_key = qurl_to_key(url) self.headers: Headers = headers or [] - self.responses_needed: list[int] = [] + self.req_id: int = req_id self.error_message = '' self.created_at = self.last_activity_at = monotonic() self.timeout = timeout - def as_result(self, req_id: int) -> dict[str, str]: - result = {'action': 'finished', 'id': req_id, 'url': self.url, 'output': os.path.join( + def as_result(self) -> dict[str, str]: + result = {'action': 'finished', 'id': self.req_id, 'url': self.url, 'output': os.path.join( self.webengine_download_request.downloadDirectory(), self.webengine_download_request.downloadFileName()), 'final_url': qurl_to_string(self.webengine_download_request.url()) } @@ -99,7 +99,6 @@ class FetchBackend(QWebEnginePage): profile.setUrlRequestInterceptor(self.interceptor) self.request_download.connect(self.download, type=Qt.ConnectionType.QueuedConnection) self.input_finished.connect(self.on_input_finished, type=Qt.ConnectionType.QueuedConnection) - self.download_requests: dict[str, DownloadRequest] = {} self.live_requests: set[DownloadRequest] = set() self.pending_download_requests: dict[int, DownloadRequest] = {} self.download_requests_by_id: dict[int, DownloadRequest] = {} @@ -133,24 +132,13 @@ class FetchBackend(QWebEnginePage): def download(self, url: str, filename: str, extra_headers: Headers | None = None, timeout: float = default_timeout, req_id: int = 0) -> None: filename = os.path.basename(filename) qurl = QUrl(url) - key = qurl_to_key(qurl) - dr = self.download_requests.get(key) - if dr and not dr.error: - if dr.finished: - result = dr.as_result(req_id) - self.download_finished.emit(result) - self.send_response(result) - else: - dr.responses_needed.append(req_id) - else: - self.download_requests[key] = dr = DownloadRequest(url, filename, extra_headers, timeout) - self.dr_identifier_count += 1 - self.pending_download_requests[self.dr_identifier_count] = dr - self.live_requests.add(dr) - dr.responses_needed.append(req_id) - if not self.timeout_timer.isActive(): - self.timeout_timer.start() - super().download(qurl, str(self.dr_identifier_count)) + dr = DownloadRequest(url, filename, extra_headers, timeout, req_id) + self.dr_identifier_count += 1 + self.pending_download_requests[self.dr_identifier_count] = dr + self.live_requests.add(dr) + if not self.timeout_timer.isActive(): + self.timeout_timer.start() + super().download(qurl, str(self.dr_identifier_count)) def _download_requested(self, wdr: QWebEngineDownloadRequest) -> None: try: @@ -161,7 +149,8 @@ class FetchBackend(QWebEnginePage): try: if dr.cancel_on_start: dr.error = 'Timed out trying to open URL' - dr.finished = True + dr.worth_retry = True + self.send_response(dr.as_result()) return dr.last_activity_at = monotonic() if dr.filename: @@ -189,7 +178,6 @@ class FetchBackend(QWebEnginePage): def report_finish(self, wdr: QWebEngineDownloadRequest, dr: DownloadRequest) -> None: s = wdr.state() dr.last_activity_at = monotonic() - dr.finished = True self.live_requests.discard(dr) has_result = False @@ -214,11 +202,9 @@ class FetchBackend(QWebEnginePage): has_result = True if has_result: - for req_id in dr.responses_needed: - result = dr.as_result(req_id) - self.download_finished.emit(result) - self.send_response(result) - del dr.responses_needed[:] + result = dr.as_result() + self.download_finished.emit(result) + self.send_response(result) def send_response(self, r: dict[str, str]) -> None: with suppress(OSError): diff --git a/src/calibre/scraper/test_fetch_backend.py b/src/calibre/scraper/test_fetch_backend.py index cec5cbd70b..f3b5d86f6c 100644 --- a/src/calibre/scraper/test_fetch_backend.py +++ b/src/calibre/scraper/test_fetch_backend.py @@ -49,27 +49,35 @@ class TestSimpleWebEngineScraper(unittest.TestCase): class Handler(http.server.BaseHTTPRequestHandler): - request_count = 0 + def __init__(self, test_obj, *a): + self.test_obj = test_obj + super().__init__(*a) def do_GET(self): + if self.test_obj.dont_send_response: + return h = {} for k, v in self.headers.items(): h.setdefault(k, []).append(v) + self.test_obj.request_count += 1 ans = { 'path': self.path, 'headers': h, - 'request_count': self.request_count, + 'request_count': self.test_obj.request_count, } data = json.dumps(ans).encode() self.send_response(http.HTTPStatus.OK) self.send_header('Content-type', 'application/json') self.send_header('Content-Length', str(len(data))) + self.send_header('Set-Cookie', 'sc=1') self.end_headers() self.flush_headers() + if self.test_obj.dont_send_body: + return self.wfile.write(data) def log_request(self, code='-', size='-'): - self.request_count += 1 + pass @unittest.skipIf(skip, skip) @@ -83,6 +91,7 @@ class TestFetchBackend(unittest.TestCase): self.server_thread.start() self.server_started.wait(5) self.request_count = 0 + self.dont_send_response = self.dont_send_body = False def tearDown(self): self.server.shutdown() @@ -91,17 +100,35 @@ class TestFetchBackend(unittest.TestCase): def test_recipe_browser(self): def u(path=''): return f'http://localhost:{self.port}{path}' - def get(path=''): - raw = br.open(u(path)).read() + def get(path='', timeout=None): + raw = br.open(u(path), timeout=timeout).read() return json.loads(raw) + def test_with_timeout(no_response=True): + from urllib.error import URLError + self.dont_send_body = True + if no_response: + self.dont_send_response = True + try: + get(timeout=0.02) + except URLError as e: + self.assertTrue(e.worth_retry) + else: + raise AssertionError('Expected timeout not raised') + self.dont_send_body = False + self.dont_send_response = False + br = Browser(user_agent='test-ua', headers=(('th', '1'),), start_worker=True) try: r = get() - self.ae(r['request_count'], 0) - print(r) + self.ae(r['request_count'], 1) self.ae(r['headers']['th'], ['1']) self.ae(r['headers']['User-Agent'], ['test-ua']) self.assertIn('Accept-Encoding', r['headers']) + r = get() + self.ae(r['request_count'], 2) + self.ae(r['headers']['Cookie'], ['sc=1']) + test_with_timeout(True) + test_with_timeout(False) finally: br.shutdown() @@ -109,8 +136,7 @@ class TestFetchBackend(unittest.TestCase): import socketserver def create_handler(*a): - ans = Handler(*a) - ans.backend = self + ans = Handler(self, *a) return ans with socketserver.TCPServer(("", 0), create_handler) as httpd: