From 54a2b553f46df6dc98a4b8410788e35165e0fa3e Mon Sep 17 00:00:00 2001 From: mggh0139 <223163273+mggh0139@users.noreply.github.com> Date: Mon, 28 Jul 2025 01:03:01 -0400 Subject: [PATCH] [fix] tracker pattern: let startup continue if url fetch fails (#5055) Use Python exception to prevent startup crash in case of fetch ClearURL failure. Also add some logs. Closes: https://github.com/searxng/searxng/issues/5054 --- searx/data/tracker_patterns.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/searx/data/tracker_patterns.py b/searx/data/tracker_patterns.py index 31b0af9b1..544031f4e 100644 --- a/searx/data/tracker_patterns.py +++ b/searx/data/tracker_patterns.py @@ -10,6 +10,8 @@ import re from collections.abc import Iterator from urllib.parse import urlparse, urlunparse, parse_qsl, urlencode +from httpx import HTTPError + from searx.data.core import get_cache, log from searx.network import get as http_get @@ -70,10 +72,19 @@ class TrackerPatternsDB: def iter_clear_list(self) -> Iterator[RuleType]: resp = None for url in self.CLEAR_LIST_URL: - resp = http_get(url, timeout=3) - if resp.status_code == 200: - break - log.warning(f"TRACKER_PATTERNS: ClearURL ignore HTTP {resp.status_code} {url}") + log.debug("TRACKER_PATTERNS: Trying to fetch %s...", url) + try: + resp = http_get(url, timeout=3) + + except HTTPError as exc: + log.warning("TRACKER_PATTERNS: HTTPError (%s) occured while fetching %s", url, exc) + continue + + if resp.status_code != 200: + log.warning(f"TRACKER_PATTERNS: ClearURL ignore HTTP {resp.status_code} {url}") + continue + + break if resp is None: log.error("TRACKER_PATTERNS: failed fetching ClearURL rule lists")