From 848c8d0544eb0138da4414a750f79b22883f91b2 Mon Sep 17 00:00:00 2001 From: Markus Heiser Date: Sun, 25 May 2025 10:40:57 +0200 Subject: [PATCH] [mod] data: implement a simple currencies (SQL) database (#4836) To reduce the memory footprint, this patch no longer loads the JSON data completely into memory. Instead, there is an SQL database based on `ExpireCacheSQLite`. The class CurrenciesDB is a simple DB application that encapsulates the DB (queries and initialization) and provides convenient methods like `name_to_iso4217` and `iso4217_to_name`. Related: - https://github.com/searxng/searxng/discussions/1892 - https://github.com/searxng/searxng/pull/3458#issuecomment-2900807671 - https://github.com/searxng/searxng/pull/4650 Signed-off-by: Markus Heiser --- searx/data/__init__.py | 16 +++---- searx/data/core.py | 29 ++++++++++++ searx/data/currencies.py | 55 ++++++++++++++++++++++ searx/engines/currency_convert.py | 11 +++-- searx/search/processors/online_currency.py | 24 +++------- searxng_extra/update/update_currencies.py | 6 +-- 6 files changed, 106 insertions(+), 35 deletions(-) create mode 100644 searx/data/core.py create mode 100644 searx/data/currencies.py diff --git a/searx/data/__init__.py b/searx/data/__init__.py index 48455ebe3..9be1cd67e 100644 --- a/searx/data/__init__.py +++ b/searx/data/__init__.py @@ -4,21 +4,17 @@ make data.all """ - +from __future__ import annotations __all__ = ["ahmia_blacklist_loader"] import json -from pathlib import Path import typing -from searx import logger +from .core import log, data_dir +from .currencies import CurrenciesDB -log = logger.getChild("data") - -data_dir = Path(__file__).parent - -CURRENCIES: dict[str, typing.Any] +CURRENCIES: CurrenciesDB USER_AGENTS: dict[str, typing.Any] EXTERNAL_URLS: dict[str, typing.Any] WIKIDATA_UNITS: dict[str, typing.Any] @@ -29,7 +25,7 @@ ENGINE_TRAITS: dict[str, typing.Any] LOCALES: dict[str, typing.Any] lazy_globals = { - "CURRENCIES": None, + "CURRENCIES": CurrenciesDB(), "USER_AGENTS": None, "EXTERNAL_URLS": None, "WIKIDATA_UNITS": None, @@ -41,7 +37,6 @@ lazy_globals = { } data_json_files = { - "CURRENCIES": "currencies.json", "USER_AGENTS": "useragents.json", "EXTERNAL_URLS": "external_urls.json", "WIKIDATA_UNITS": "wikidata_units.json", @@ -63,6 +58,7 @@ def __getattr__(name): return data log.debug("init searx.data.%s", name) + with open(data_dir / data_json_files[name], encoding='utf-8') as f: lazy_globals[name] = json.load(f) diff --git a/searx/data/core.py b/searx/data/core.py new file mode 100644 index 000000000..14cc77eb7 --- /dev/null +++ b/searx/data/core.py @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +# pylint: disable=missing-module-docstring +from __future__ import annotations + +import pathlib + +from searx import logger +from searx.cache import ExpireCacheCfg, ExpireCacheSQLite + +log = logger.getChild("data") + +data_dir = pathlib.Path(__file__).parent + +_DATA_CACHE: ExpireCacheSQLite = None # type: ignore + + +def get_cache(): + + global _DATA_CACHE # pylint: disable=global-statement + + if _DATA_CACHE is None: + _DATA_CACHE = ExpireCacheSQLite.build_cache( + ExpireCacheCfg( + name="DATA_CACHE", + # MAX_VALUE_LEN=1024 * 200, # max. 200kB length for a *serialized* value. + # MAXHOLD_TIME=60 * 60 * 24 * 7 * 4, # 4 weeks + ) + ) + return _DATA_CACHE diff --git a/searx/data/currencies.py b/searx/data/currencies.py new file mode 100644 index 000000000..a4f2901f0 --- /dev/null +++ b/searx/data/currencies.py @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Simple implementation to store currencies data in a SQL database.""" + +from __future__ import annotations + +__all__ = ["CurrenciesDB"] + +import json +import pathlib + +from .core import get_cache, log + + +class CurrenciesDB: + # pylint: disable=missing-class-docstring + + ctx_names = "data_currencies_names" + ctx_iso4217 = "data_currencies_iso4217" + + json_file = pathlib.Path(__file__).parent / "currencies.json" + + def __init__(self): + self.cache = get_cache() + + def init(self): + if self.cache.properties("currencies loaded") != "OK": + self.load() + self.cache.properties.set("currencies loaded", "OK") + # F I X M E: + # do we need a maintenance .. rember: database is stored + # in /tmp and will be rebuild during the reboot anyway + + def load(self): + log.debug("init searx.data.CURRENCIES") + with open(self.json_file, encoding="utf-8") as f: + data_dict = json.load(f) + for key, value in data_dict["names"].items(): + self.cache.set(key=key, value=value, ctx=self.ctx_names, expire=None) + for key, value in data_dict["iso4217"].items(): + self.cache.set(key=key, value=value, ctx=self.ctx_iso4217, expire=None) + + def name_to_iso4217(self, name): + self.init() + + ret_val = self.cache.get(key=name, default=name, ctx=self.ctx_names) + if isinstance(ret_val, list): + # if more alternatives, use the last in the list + ret_val = ret_val[-1] + return ret_val + + def iso4217_to_name(self, iso4217, language): + self.init() + + iso4217_languages: dict = self.cache.get(key=iso4217, default={}, ctx=self.ctx_names) + return iso4217_languages.get(language, iso4217) diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index cbb1d6cc4..c4c757e3f 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -3,6 +3,7 @@ """ import json +from searx.result_types import EngineResults # about about = { @@ -28,13 +29,15 @@ def request(_query, params): return params -def response(resp): +def response(resp) -> EngineResults: + res = EngineResults() + # remove first and last lines to get only json json_resp = resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2] try: conversion_rate = float(json.loads(json_resp)["to"][0]["mid"]) except IndexError: - return [] + return res answer = '{0} {1} = {2} {3}, 1 {1} ({5}) = {4} {3} ({6})'.format( resp.search_params['amount'], resp.search_params['from'], @@ -46,5 +49,5 @@ def response(resp): ) url = f"https://duckduckgo.com/?q={resp.search_params['from']}+to+{resp.search_params['to']}" - - return [{"answer": answer, "url": url}] + res.add(res.types.Answer(answer=answer, url=url)) + return res diff --git a/searx/search/processors/online_currency.py b/searx/search/processors/online_currency.py index 197e0e061..0d7900616 100644 --- a/searx/search/processors/online_currency.py +++ b/searx/search/processors/online_currency.py @@ -12,24 +12,13 @@ from .online import OnlineProcessor parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) -def normalize_name(name): +def normalize_name(name: str): + name = name.strip() name = name.lower().replace('-', ' ').rstrip('s') name = re.sub(' +', ' ', name) return unicodedata.normalize('NFKD', name).lower() -def name_to_iso4217(name): - name = normalize_name(name) - currency = CURRENCIES['names'].get(name, [name]) - if isinstance(currency, str): - return currency - return currency[-1] - - -def iso4217_to_name(iso4217, language): - return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217) - - class OnlineCurrencyProcessor(OnlineProcessor): """Processor class used by ``online_currency`` engines.""" @@ -52,14 +41,15 @@ class OnlineCurrencyProcessor(OnlineProcessor): amount = float(amount_str) except ValueError: return None - from_currency = name_to_iso4217(from_currency.strip()) - to_currency = name_to_iso4217(to_currency.strip()) + + from_currency = CURRENCIES.name_to_iso4217(normalize_name(from_currency)) + to_currency = CURRENCIES.name_to_iso4217(normalize_name(to_currency)) params['amount'] = amount params['from'] = from_currency params['to'] = to_currency - params['from_name'] = iso4217_to_name(from_currency, 'en') - params['to_name'] = iso4217_to_name(to_currency, 'en') + params['from_name'] = CURRENCIES.iso4217_to_name(from_currency, "en") + params['to_name'] = CURRENCIES.iso4217_to_name(to_currency, "en") return params def get_default_tests(self): diff --git a/searxng_extra/update/update_currencies.py b/searxng_extra/update/update_currencies.py index 903bdcb44..288f0994e 100755 --- a/searxng_extra/update/update_currencies.py +++ b/searxng_extra/update/update_currencies.py @@ -15,9 +15,7 @@ import json from searx.locales import LOCALE_NAMES, locales_initialize from searx.engines import wikidata, set_loggers -from searx.data import data_dir - -DATA_FILE = data_dir / 'currencies.json' +from searx.data.currencies import CurrenciesDB set_loggers(wikidata, 'wikidata') locales_initialize() @@ -149,7 +147,7 @@ def main(): if len(db['names'][name]) == 1: db['names'][name] = db['names'][name][0] - with DATA_FILE.open('w', encoding='utf8') as f: + with CurrenciesDB.json_file.open('w', encoding='utf8') as f: json.dump(db, f, indent=4, sort_keys=True, ensure_ascii=False)