From 313fda426cb39f9bda48da9288f7ca3cc93ce6ba Mon Sep 17 00:00:00 2001 From: Bnyro Date: Tue, 20 Jan 2026 13:33:36 +0100 Subject: [PATCH] [fix] annas archive: rotate between available backup domains - closes https://github.com/searxng/searxng/issues/5633 --- searx/engines/annas_archive.py | 29 +++++++++++++++++++++++------ searx/settings.yml | 3 +++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/searx/engines/annas_archive.py b/searx/engines/annas_archive.py index d3eda6800..e99a5cdb0 100644 --- a/searx/engines/annas_archive.py +++ b/searx/engines/annas_archive.py @@ -34,6 +34,9 @@ Implementations =============== """ + +import random + import typing as t from urllib.parse import urlencode @@ -66,7 +69,9 @@ categories = ["files", "books"] paging: bool = True # search-url -base_url: str = "https://annas-archive.org" +base_url: list[str] | str = [] +"""List of Anna's archive domains or a single domain (as string).""" + aa_content: str = "" """Anan's search form field **Content** / possible values:: @@ -98,6 +103,9 @@ def setup(engine_settings: dict[str, t.Any]) -> bool: # pylint: disable=unused- """Check of engine's settings.""" traits = EngineTraits(**ENGINE_TRAITS["annas archive"]) + if not base_url: + raise ValueError("missing required config `base_url`") + if aa_content and aa_content not in traits.custom["content"]: raise ValueError(f"invalid setting content: {aa_content}") @@ -110,6 +118,13 @@ def setup(engine_settings: dict[str, t.Any]) -> bool: # pylint: disable=unused- return True +def get_base_url_choice() -> str: + if isinstance(base_url, list): + return random.choice(base_url) + + return base_url + + def request(query: str, params: "OnlineParams") -> None: lang = traits.get_language(params["searxng_locale"], traits.all_locale) args = { @@ -122,7 +137,9 @@ def request(query: str, params: "OnlineParams") -> None: } # filter out None and empty values filtered_args = dict((k, v) for k, v in args.items() if v) - params["url"] = f"{base_url}/search?{urlencode(filtered_args)}" + + params["base_url"] = get_base_url_choice() + params["url"] = f"{params['base_url']}/search?{urlencode(filtered_args)}" def response(resp: "SXNG_Response") -> EngineResults: @@ -136,16 +153,16 @@ def response(resp: "SXNG_Response") -> EngineResults: for item in eval_xpath_list(dom, "//main//div[contains(@class, 'js-aarecord-list-outer')]/div"): try: - kwargs: dict[str, t.Any] = _get_result(item) + kwargs: dict[str, t.Any] = _get_result(item, resp.search_params["base_url"]) except SearxEngineXPathException: continue res.add(res.types.Paper(**kwargs)) return res -def _get_result(item: ElementBase) -> dict[str, t.Any]: +def _get_result(item: ElementBase, base_url_choice) -> dict[str, t.Any]: return { - "url": base_url + eval_xpath_getindex(item, "./a/@href", 0), + "url": base_url_choice + eval_xpath_getindex(item, "./a/@href", 0), "title": extract_text(eval_xpath(item, "./div//a[starts-with(@href, '/md5')]")), "authors": [extract_text(eval_xpath_getindex(item, ".//a[starts-with(@href, '/search')]", 0))], "publisher": extract_text( @@ -169,7 +186,7 @@ def fetch_traits(engine_traits: EngineTraits): engine_traits.custom["ext"] = [] engine_traits.custom["sort"] = [] - resp = get(base_url + "/search") + resp = get(get_base_url_choice() + "/search") if not resp.ok: raise RuntimeError("Response from Anna's search page is not OK.") dom = html.fromstring(resp.text) diff --git a/searx/settings.yml b/searx/settings.yml index ff9ab34b2..9c7afda12 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -403,6 +403,9 @@ engines: - name: annas archive engine: annas_archive + base_url: + - https://annas-archive.li + - https://annas-archive.pm disabled: true shortcut: aa timeout: 5