More tests including for timeouts

This commit is contained in:
Kovid Goyal 2024-08-09 12:53:53 +05:30
parent d437f1b644
commit d7404bcf1b
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 56 additions and 44 deletions

View File

@ -26,10 +26,11 @@ class RequestInterceptor(QWebEngineUrlRequestInterceptor):
def interceptRequest(self, req: QWebEngineUrlRequestInfo) -> None:
fb: FetchBackend = self.parent()
if fb:
key = qurl_to_key(req.requestUrl())
if dr := fb.download_requests.get(key):
for (name, val) in dr.headers:
req.setHttpHeader(name.encode(), val.encode())
url_key = qurl_to_key(req.requestUrl())
for dr in fb.live_requests:
if dr.url_key == url_key:
for k, v in dr.headers:
req.setHttpHeader(k.encode(), v.encode())
def qurl_to_string(url: QUrl | str) -> str:
@ -47,21 +48,20 @@ class DownloadRequest:
cancel_on_start: bool = False
error: str = ''
finished: bool = False
worth_retry: bool = False
webengine_download_request: QWebEngineDownloadRequest | None = None
def __init__(self, url: str, filename: str, headers: Headers | None = None, timeout: float = default_timeout):
def __init__(self, url: str, filename: str, headers: Headers | None = None, timeout: float = default_timeout, req_id: int = 0):
self.url, self.filename = url, filename
self.url_key = qurl_to_key(url)
self.headers: Headers = headers or []
self.responses_needed: list[int] = []
self.req_id: int = req_id
self.error_message = ''
self.created_at = self.last_activity_at = monotonic()
self.timeout = timeout
def as_result(self, req_id: int) -> dict[str, str]:
result = {'action': 'finished', 'id': req_id, 'url': self.url, 'output': os.path.join(
def as_result(self) -> dict[str, str]:
result = {'action': 'finished', 'id': self.req_id, 'url': self.url, 'output': os.path.join(
self.webengine_download_request.downloadDirectory(), self.webengine_download_request.downloadFileName()),
'final_url': qurl_to_string(self.webengine_download_request.url())
}
@ -99,7 +99,6 @@ class FetchBackend(QWebEnginePage):
profile.setUrlRequestInterceptor(self.interceptor)
self.request_download.connect(self.download, type=Qt.ConnectionType.QueuedConnection)
self.input_finished.connect(self.on_input_finished, type=Qt.ConnectionType.QueuedConnection)
self.download_requests: dict[str, DownloadRequest] = {}
self.live_requests: set[DownloadRequest] = set()
self.pending_download_requests: dict[int, DownloadRequest] = {}
self.download_requests_by_id: dict[int, DownloadRequest] = {}
@ -133,21 +132,10 @@ class FetchBackend(QWebEnginePage):
def download(self, url: str, filename: str, extra_headers: Headers | None = None, timeout: float = default_timeout, req_id: int = 0) -> None:
filename = os.path.basename(filename)
qurl = QUrl(url)
key = qurl_to_key(qurl)
dr = self.download_requests.get(key)
if dr and not dr.error:
if dr.finished:
result = dr.as_result(req_id)
self.download_finished.emit(result)
self.send_response(result)
else:
dr.responses_needed.append(req_id)
else:
self.download_requests[key] = dr = DownloadRequest(url, filename, extra_headers, timeout)
dr = DownloadRequest(url, filename, extra_headers, timeout, req_id)
self.dr_identifier_count += 1
self.pending_download_requests[self.dr_identifier_count] = dr
self.live_requests.add(dr)
dr.responses_needed.append(req_id)
if not self.timeout_timer.isActive():
self.timeout_timer.start()
super().download(qurl, str(self.dr_identifier_count))
@ -161,7 +149,8 @@ class FetchBackend(QWebEnginePage):
try:
if dr.cancel_on_start:
dr.error = 'Timed out trying to open URL'
dr.finished = True
dr.worth_retry = True
self.send_response(dr.as_result())
return
dr.last_activity_at = monotonic()
if dr.filename:
@ -189,7 +178,6 @@ class FetchBackend(QWebEnginePage):
def report_finish(self, wdr: QWebEngineDownloadRequest, dr: DownloadRequest) -> None:
s = wdr.state()
dr.last_activity_at = monotonic()
dr.finished = True
self.live_requests.discard(dr)
has_result = False
@ -214,11 +202,9 @@ class FetchBackend(QWebEnginePage):
has_result = True
if has_result:
for req_id in dr.responses_needed:
result = dr.as_result(req_id)
result = dr.as_result()
self.download_finished.emit(result)
self.send_response(result)
del dr.responses_needed[:]
def send_response(self, r: dict[str, str]) -> None:
with suppress(OSError):

View File

@ -49,27 +49,35 @@ class TestSimpleWebEngineScraper(unittest.TestCase):
class Handler(http.server.BaseHTTPRequestHandler):
request_count = 0
def __init__(self, test_obj, *a):
self.test_obj = test_obj
super().__init__(*a)
def do_GET(self):
if self.test_obj.dont_send_response:
return
h = {}
for k, v in self.headers.items():
h.setdefault(k, []).append(v)
self.test_obj.request_count += 1
ans = {
'path': self.path,
'headers': h,
'request_count': self.request_count,
'request_count': self.test_obj.request_count,
}
data = json.dumps(ans).encode()
self.send_response(http.HTTPStatus.OK)
self.send_header('Content-type', 'application/json')
self.send_header('Content-Length', str(len(data)))
self.send_header('Set-Cookie', 'sc=1')
self.end_headers()
self.flush_headers()
if self.test_obj.dont_send_body:
return
self.wfile.write(data)
def log_request(self, code='-', size='-'):
self.request_count += 1
pass
@unittest.skipIf(skip, skip)
@ -83,6 +91,7 @@ class TestFetchBackend(unittest.TestCase):
self.server_thread.start()
self.server_started.wait(5)
self.request_count = 0
self.dont_send_response = self.dont_send_body = False
def tearDown(self):
self.server.shutdown()
@ -91,17 +100,35 @@ class TestFetchBackend(unittest.TestCase):
def test_recipe_browser(self):
def u(path=''):
return f'http://localhost:{self.port}{path}'
def get(path=''):
raw = br.open(u(path)).read()
def get(path='', timeout=None):
raw = br.open(u(path), timeout=timeout).read()
return json.loads(raw)
def test_with_timeout(no_response=True):
from urllib.error import URLError
self.dont_send_body = True
if no_response:
self.dont_send_response = True
try:
get(timeout=0.02)
except URLError as e:
self.assertTrue(e.worth_retry)
else:
raise AssertionError('Expected timeout not raised')
self.dont_send_body = False
self.dont_send_response = False
br = Browser(user_agent='test-ua', headers=(('th', '1'),), start_worker=True)
try:
r = get()
self.ae(r['request_count'], 0)
print(r)
self.ae(r['request_count'], 1)
self.ae(r['headers']['th'], ['1'])
self.ae(r['headers']['User-Agent'], ['test-ua'])
self.assertIn('Accept-Encoding', r['headers'])
r = get()
self.ae(r['request_count'], 2)
self.ae(r['headers']['Cookie'], ['sc=1'])
test_with_timeout(True)
test_with_timeout(False)
finally:
br.shutdown()
@ -109,8 +136,7 @@ class TestFetchBackend(unittest.TestCase):
import socketserver
def create_handler(*a):
ans = Handler(*a)
ans.backend = self
ans = Handler(self, *a)
return ans
with socketserver.TCPServer(("", 0), create_handler) as httpd: