From ca441f419ca2e3a4a6cf90ac853a3ff6d69df967 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Fri, 21 Nov 2025 08:16:24 +0100 Subject: [PATCH] [fix] engines - set hard timouts in *sub-request* (#5460) The requests changed here all run outside of the network context timeout, thereby preventing the engine's timeout from being applied (the engine's timeout can become longer than it was configured). Signed-off-by: Markus Heiser --- searx/engines/archlinux.py | 2 +- searx/engines/azure.py | 2 +- searx/engines/presearch.py | 2 +- searx/engines/public_domain_image_archive.py | 2 +- searx/engines/pubmed.py | 2 +- searx/engines/semantic_scholar.py | 2 +- searx/engines/seznam.py | 2 +- searx/engines/soundcloud.py | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index 9b8e39a05..9e3adf154 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -120,7 +120,7 @@ def fetch_traits(engine_traits: EngineTraits): 'zh': 'Special:搜索', } - resp = get('https://wiki.archlinux.org/') + resp = get('https://wiki.archlinux.org/', timeout=3) if not resp.ok: # type: ignore print("ERROR: response from wiki.archlinux.org is not OK.") diff --git a/searx/engines/azure.py b/searx/engines/azure.py index 35538b561..4c916bc3b 100644 --- a/searx/engines/azure.py +++ b/searx/engines/azure.py @@ -95,7 +95,7 @@ def authenticate(t_id: str, c_id: str, c_secret: str) -> str: "scope": "https://management.azure.com/.default", } - resp: SXNG_Response = http_post(url, body) + resp: SXNG_Response = http_post(url, body, timeout=5) if resp.status_code != 200: raise RuntimeError(f"Azure authentication failed (status {resp.status_code}): {resp.text}") return resp.json()["access_token"] diff --git a/searx/engines/presearch.py b/searx/engines/presearch.py index 69fbdfa91..c6abff115 100644 --- a/searx/engines/presearch.py +++ b/searx/engines/presearch.py @@ -140,7 +140,7 @@ def _get_request_id(query, params): if l.territory: headers['Accept-Language'] = f"{l.language}-{l.territory},{l.language};" "q=0.9,*;" "q=0.5" - resp = get(url, headers=headers) + resp = get(url, headers=headers, timout=5) for line in resp.text.split("\n"): if "window.searchId = " in line: diff --git a/searx/engines/public_domain_image_archive.py b/searx/engines/public_domain_image_archive.py index e3f8edb59..81beb418d 100644 --- a/searx/engines/public_domain_image_archive.py +++ b/searx/engines/public_domain_image_archive.py @@ -64,7 +64,7 @@ def _get_algolia_api_url(): return __CACHED_API_URL # fake request to extract api url - resp = get(f"{pdia_base_url}/search/?q=") + resp = get(f"{pdia_base_url}/search/?q=", timeout=3) if resp.status_code != 200: raise LookupError("Failed to fetch config location (and as such the API url) for PDImageArchive") pdia_config_filepart = extr(resp.text, pdia_config_start, pdia_config_end) diff --git a/searx/engines/pubmed.py b/searx/engines/pubmed.py index 6fcfaa9a3..7e5ef2ce1 100644 --- a/searx/engines/pubmed.py +++ b/searx/engines/pubmed.py @@ -73,7 +73,7 @@ def request(query: str, params: "OnlineParams") -> None: ) esearch_url = f"{eutils_api}/esearch.fcgi?{args}" # DTD: https://eutils.ncbi.nlm.nih.gov/eutils/dtd/20060628/esearch.dtd - esearch_resp: "SXNG_Response" = get(esearch_url) + esearch_resp: "SXNG_Response" = get(esearch_url, timeout=3) pmids_results = etree.XML(esearch_resp.content) pmids: list[str] = [i.text for i in pmids_results.xpath("//eSearchResult/IdList/Id")] diff --git a/searx/engines/semantic_scholar.py b/searx/engines/semantic_scholar.py index 985ebd8a3..390eccff3 100644 --- a/searx/engines/semantic_scholar.py +++ b/searx/engines/semantic_scholar.py @@ -66,7 +66,7 @@ def setup(engine_settings: dict[str, t.Any]) -> bool: def get_ui_version() -> str: ret_val: str = CACHE.get("X-S2-UI-Version") if not ret_val: - resp = get(base_url) + resp = get(base_url, timeout=3) if not resp.ok: raise RuntimeError("Can't determine Semantic Scholar UI version") diff --git a/searx/engines/seznam.py b/searx/engines/seznam.py index c7e0fe75f..713697709 100644 --- a/searx/engines/seznam.py +++ b/searx/engines/seznam.py @@ -27,7 +27,7 @@ base_url = 'https://search.seznam.cz/' def request(query, params): - response_index = get(base_url, headers=params['headers'], raise_for_httperror=True) + response_index = get(base_url, headers=params['headers'], raise_for_httperror=True, timout=3) dom = html.fromstring(response_index.text) url_params = { diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py index b1bb329e1..b3819807d 100644 --- a/searx/engines/soundcloud.py +++ b/searx/engines/soundcloud.py @@ -124,7 +124,7 @@ def get_client_id() -> str | None: client_id = "" url = "https://soundcloud.com" - resp = http_get(url, timeout=10) + resp = http_get(url, timeout=3) if not resp.ok: logger.error("init: GET %s failed", url)