More tests including for timeouts

This commit is contained in:
Kovid Goyal 2024-08-09 12:53:53 +05:30
parent d437f1b644
commit d7404bcf1b
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 56 additions and 44 deletions

View File

@ -26,10 +26,11 @@ class RequestInterceptor(QWebEngineUrlRequestInterceptor):
def interceptRequest(self, req: QWebEngineUrlRequestInfo) -> None: def interceptRequest(self, req: QWebEngineUrlRequestInfo) -> None:
fb: FetchBackend = self.parent() fb: FetchBackend = self.parent()
if fb: if fb:
key = qurl_to_key(req.requestUrl()) url_key = qurl_to_key(req.requestUrl())
if dr := fb.download_requests.get(key): for dr in fb.live_requests:
for (name, val) in dr.headers: if dr.url_key == url_key:
req.setHttpHeader(name.encode(), val.encode()) for k, v in dr.headers:
req.setHttpHeader(k.encode(), v.encode())
def qurl_to_string(url: QUrl | str) -> str: def qurl_to_string(url: QUrl | str) -> str:
@ -47,21 +48,20 @@ class DownloadRequest:
cancel_on_start: bool = False cancel_on_start: bool = False
error: str = '' error: str = ''
finished: bool = False
worth_retry: bool = False worth_retry: bool = False
webengine_download_request: QWebEngineDownloadRequest | None = None webengine_download_request: QWebEngineDownloadRequest | None = None
def __init__(self, url: str, filename: str, headers: Headers | None = None, timeout: float = default_timeout): def __init__(self, url: str, filename: str, headers: Headers | None = None, timeout: float = default_timeout, req_id: int = 0):
self.url, self.filename = url, filename self.url, self.filename = url, filename
self.url_key = qurl_to_key(url) self.url_key = qurl_to_key(url)
self.headers: Headers = headers or [] self.headers: Headers = headers or []
self.responses_needed: list[int] = [] self.req_id: int = req_id
self.error_message = '' self.error_message = ''
self.created_at = self.last_activity_at = monotonic() self.created_at = self.last_activity_at = monotonic()
self.timeout = timeout self.timeout = timeout
def as_result(self, req_id: int) -> dict[str, str]: def as_result(self) -> dict[str, str]:
result = {'action': 'finished', 'id': req_id, 'url': self.url, 'output': os.path.join( result = {'action': 'finished', 'id': self.req_id, 'url': self.url, 'output': os.path.join(
self.webengine_download_request.downloadDirectory(), self.webengine_download_request.downloadFileName()), self.webengine_download_request.downloadDirectory(), self.webengine_download_request.downloadFileName()),
'final_url': qurl_to_string(self.webengine_download_request.url()) 'final_url': qurl_to_string(self.webengine_download_request.url())
} }
@ -99,7 +99,6 @@ class FetchBackend(QWebEnginePage):
profile.setUrlRequestInterceptor(self.interceptor) profile.setUrlRequestInterceptor(self.interceptor)
self.request_download.connect(self.download, type=Qt.ConnectionType.QueuedConnection) self.request_download.connect(self.download, type=Qt.ConnectionType.QueuedConnection)
self.input_finished.connect(self.on_input_finished, type=Qt.ConnectionType.QueuedConnection) self.input_finished.connect(self.on_input_finished, type=Qt.ConnectionType.QueuedConnection)
self.download_requests: dict[str, DownloadRequest] = {}
self.live_requests: set[DownloadRequest] = set() self.live_requests: set[DownloadRequest] = set()
self.pending_download_requests: dict[int, DownloadRequest] = {} self.pending_download_requests: dict[int, DownloadRequest] = {}
self.download_requests_by_id: dict[int, DownloadRequest] = {} self.download_requests_by_id: dict[int, DownloadRequest] = {}
@ -133,24 +132,13 @@ class FetchBackend(QWebEnginePage):
def download(self, url: str, filename: str, extra_headers: Headers | None = None, timeout: float = default_timeout, req_id: int = 0) -> None: def download(self, url: str, filename: str, extra_headers: Headers | None = None, timeout: float = default_timeout, req_id: int = 0) -> None:
filename = os.path.basename(filename) filename = os.path.basename(filename)
qurl = QUrl(url) qurl = QUrl(url)
key = qurl_to_key(qurl) dr = DownloadRequest(url, filename, extra_headers, timeout, req_id)
dr = self.download_requests.get(key) self.dr_identifier_count += 1
if dr and not dr.error: self.pending_download_requests[self.dr_identifier_count] = dr
if dr.finished: self.live_requests.add(dr)
result = dr.as_result(req_id) if not self.timeout_timer.isActive():
self.download_finished.emit(result) self.timeout_timer.start()
self.send_response(result) super().download(qurl, str(self.dr_identifier_count))
else:
dr.responses_needed.append(req_id)
else:
self.download_requests[key] = dr = DownloadRequest(url, filename, extra_headers, timeout)
self.dr_identifier_count += 1
self.pending_download_requests[self.dr_identifier_count] = dr
self.live_requests.add(dr)
dr.responses_needed.append(req_id)
if not self.timeout_timer.isActive():
self.timeout_timer.start()
super().download(qurl, str(self.dr_identifier_count))
def _download_requested(self, wdr: QWebEngineDownloadRequest) -> None: def _download_requested(self, wdr: QWebEngineDownloadRequest) -> None:
try: try:
@ -161,7 +149,8 @@ class FetchBackend(QWebEnginePage):
try: try:
if dr.cancel_on_start: if dr.cancel_on_start:
dr.error = 'Timed out trying to open URL' dr.error = 'Timed out trying to open URL'
dr.finished = True dr.worth_retry = True
self.send_response(dr.as_result())
return return
dr.last_activity_at = monotonic() dr.last_activity_at = monotonic()
if dr.filename: if dr.filename:
@ -189,7 +178,6 @@ class FetchBackend(QWebEnginePage):
def report_finish(self, wdr: QWebEngineDownloadRequest, dr: DownloadRequest) -> None: def report_finish(self, wdr: QWebEngineDownloadRequest, dr: DownloadRequest) -> None:
s = wdr.state() s = wdr.state()
dr.last_activity_at = monotonic() dr.last_activity_at = monotonic()
dr.finished = True
self.live_requests.discard(dr) self.live_requests.discard(dr)
has_result = False has_result = False
@ -214,11 +202,9 @@ class FetchBackend(QWebEnginePage):
has_result = True has_result = True
if has_result: if has_result:
for req_id in dr.responses_needed: result = dr.as_result()
result = dr.as_result(req_id) self.download_finished.emit(result)
self.download_finished.emit(result) self.send_response(result)
self.send_response(result)
del dr.responses_needed[:]
def send_response(self, r: dict[str, str]) -> None: def send_response(self, r: dict[str, str]) -> None:
with suppress(OSError): with suppress(OSError):

View File

@ -49,27 +49,35 @@ class TestSimpleWebEngineScraper(unittest.TestCase):
class Handler(http.server.BaseHTTPRequestHandler): class Handler(http.server.BaseHTTPRequestHandler):
request_count = 0 def __init__(self, test_obj, *a):
self.test_obj = test_obj
super().__init__(*a)
def do_GET(self): def do_GET(self):
if self.test_obj.dont_send_response:
return
h = {} h = {}
for k, v in self.headers.items(): for k, v in self.headers.items():
h.setdefault(k, []).append(v) h.setdefault(k, []).append(v)
self.test_obj.request_count += 1
ans = { ans = {
'path': self.path, 'path': self.path,
'headers': h, 'headers': h,
'request_count': self.request_count, 'request_count': self.test_obj.request_count,
} }
data = json.dumps(ans).encode() data = json.dumps(ans).encode()
self.send_response(http.HTTPStatus.OK) self.send_response(http.HTTPStatus.OK)
self.send_header('Content-type', 'application/json') self.send_header('Content-type', 'application/json')
self.send_header('Content-Length', str(len(data))) self.send_header('Content-Length', str(len(data)))
self.send_header('Set-Cookie', 'sc=1')
self.end_headers() self.end_headers()
self.flush_headers() self.flush_headers()
if self.test_obj.dont_send_body:
return
self.wfile.write(data) self.wfile.write(data)
def log_request(self, code='-', size='-'): def log_request(self, code='-', size='-'):
self.request_count += 1 pass
@unittest.skipIf(skip, skip) @unittest.skipIf(skip, skip)
@ -83,6 +91,7 @@ class TestFetchBackend(unittest.TestCase):
self.server_thread.start() self.server_thread.start()
self.server_started.wait(5) self.server_started.wait(5)
self.request_count = 0 self.request_count = 0
self.dont_send_response = self.dont_send_body = False
def tearDown(self): def tearDown(self):
self.server.shutdown() self.server.shutdown()
@ -91,17 +100,35 @@ class TestFetchBackend(unittest.TestCase):
def test_recipe_browser(self): def test_recipe_browser(self):
def u(path=''): def u(path=''):
return f'http://localhost:{self.port}{path}' return f'http://localhost:{self.port}{path}'
def get(path=''): def get(path='', timeout=None):
raw = br.open(u(path)).read() raw = br.open(u(path), timeout=timeout).read()
return json.loads(raw) return json.loads(raw)
def test_with_timeout(no_response=True):
from urllib.error import URLError
self.dont_send_body = True
if no_response:
self.dont_send_response = True
try:
get(timeout=0.02)
except URLError as e:
self.assertTrue(e.worth_retry)
else:
raise AssertionError('Expected timeout not raised')
self.dont_send_body = False
self.dont_send_response = False
br = Browser(user_agent='test-ua', headers=(('th', '1'),), start_worker=True) br = Browser(user_agent='test-ua', headers=(('th', '1'),), start_worker=True)
try: try:
r = get() r = get()
self.ae(r['request_count'], 0) self.ae(r['request_count'], 1)
print(r)
self.ae(r['headers']['th'], ['1']) self.ae(r['headers']['th'], ['1'])
self.ae(r['headers']['User-Agent'], ['test-ua']) self.ae(r['headers']['User-Agent'], ['test-ua'])
self.assertIn('Accept-Encoding', r['headers']) self.assertIn('Accept-Encoding', r['headers'])
r = get()
self.ae(r['request_count'], 2)
self.ae(r['headers']['Cookie'], ['sc=1'])
test_with_timeout(True)
test_with_timeout(False)
finally: finally:
br.shutdown() br.shutdown()
@ -109,8 +136,7 @@ class TestFetchBackend(unittest.TestCase):
import socketserver import socketserver
def create_handler(*a): def create_handler(*a):
ans = Handler(*a) ans = Handler(self, *a)
ans.backend = self
return ans return ans
with socketserver.TCPServer(("", 0), create_handler) as httpd: with socketserver.TCPServer(("", 0), create_handler) as httpd: