[mod] data: implement a simple currencies (SQL) database (#4836)

To reduce the memory footprint, this patch no longer loads the JSON data
completely into memory.  Instead, there is an SQL database based on
`ExpireCacheSQLite`.

The class CurrenciesDB is a simple DB application that encapsulates the
DB (queries and initialization) and provides convenient methods like
`name_to_iso4217` and `iso4217_to_name`.

Related:

- https://github.com/searxng/searxng/discussions/1892
- https://github.com/searxng/searxng/pull/3458#issuecomment-2900807671
- https://github.com/searxng/searxng/pull/4650

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2025-05-25 10:40:57 +02:00 committed by GitHub
parent e46187e3ce
commit 848c8d0544
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 106 additions and 35 deletions

View File

@ -4,21 +4,17 @@
make data.all
"""
from __future__ import annotations
__all__ = ["ahmia_blacklist_loader"]
import json
from pathlib import Path
import typing
from searx import logger
from .core import log, data_dir
from .currencies import CurrenciesDB
log = logger.getChild("data")
data_dir = Path(__file__).parent
CURRENCIES: dict[str, typing.Any]
CURRENCIES: CurrenciesDB
USER_AGENTS: dict[str, typing.Any]
EXTERNAL_URLS: dict[str, typing.Any]
WIKIDATA_UNITS: dict[str, typing.Any]
@ -29,7 +25,7 @@ ENGINE_TRAITS: dict[str, typing.Any]
LOCALES: dict[str, typing.Any]
lazy_globals = {
"CURRENCIES": None,
"CURRENCIES": CurrenciesDB(),
"USER_AGENTS": None,
"EXTERNAL_URLS": None,
"WIKIDATA_UNITS": None,
@ -41,7 +37,6 @@ lazy_globals = {
}
data_json_files = {
"CURRENCIES": "currencies.json",
"USER_AGENTS": "useragents.json",
"EXTERNAL_URLS": "external_urls.json",
"WIKIDATA_UNITS": "wikidata_units.json",
@ -63,6 +58,7 @@ def __getattr__(name):
return data
log.debug("init searx.data.%s", name)
with open(data_dir / data_json_files[name], encoding='utf-8') as f:
lazy_globals[name] = json.load(f)

29
searx/data/core.py Normal file
View File

@ -0,0 +1,29 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
# pylint: disable=missing-module-docstring
from __future__ import annotations
import pathlib
from searx import logger
from searx.cache import ExpireCacheCfg, ExpireCacheSQLite
log = logger.getChild("data")
data_dir = pathlib.Path(__file__).parent
_DATA_CACHE: ExpireCacheSQLite = None # type: ignore
def get_cache():
global _DATA_CACHE # pylint: disable=global-statement
if _DATA_CACHE is None:
_DATA_CACHE = ExpireCacheSQLite.build_cache(
ExpireCacheCfg(
name="DATA_CACHE",
# MAX_VALUE_LEN=1024 * 200, # max. 200kB length for a *serialized* value.
# MAXHOLD_TIME=60 * 60 * 24 * 7 * 4, # 4 weeks
)
)
return _DATA_CACHE

55
searx/data/currencies.py Normal file
View File

@ -0,0 +1,55 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
"""Simple implementation to store currencies data in a SQL database."""
from __future__ import annotations
__all__ = ["CurrenciesDB"]
import json
import pathlib
from .core import get_cache, log
class CurrenciesDB:
# pylint: disable=missing-class-docstring
ctx_names = "data_currencies_names"
ctx_iso4217 = "data_currencies_iso4217"
json_file = pathlib.Path(__file__).parent / "currencies.json"
def __init__(self):
self.cache = get_cache()
def init(self):
if self.cache.properties("currencies loaded") != "OK":
self.load()
self.cache.properties.set("currencies loaded", "OK")
# F I X M E:
# do we need a maintenance .. rember: database is stored
# in /tmp and will be rebuild during the reboot anyway
def load(self):
log.debug("init searx.data.CURRENCIES")
with open(self.json_file, encoding="utf-8") as f:
data_dict = json.load(f)
for key, value in data_dict["names"].items():
self.cache.set(key=key, value=value, ctx=self.ctx_names, expire=None)
for key, value in data_dict["iso4217"].items():
self.cache.set(key=key, value=value, ctx=self.ctx_iso4217, expire=None)
def name_to_iso4217(self, name):
self.init()
ret_val = self.cache.get(key=name, default=name, ctx=self.ctx_names)
if isinstance(ret_val, list):
# if more alternatives, use the last in the list
ret_val = ret_val[-1]
return ret_val
def iso4217_to_name(self, iso4217, language):
self.init()
iso4217_languages: dict = self.cache.get(key=iso4217, default={}, ctx=self.ctx_names)
return iso4217_languages.get(language, iso4217)

View File

@ -3,6 +3,7 @@
"""
import json
from searx.result_types import EngineResults
# about
about = {
@ -28,13 +29,15 @@ def request(_query, params):
return params
def response(resp):
def response(resp) -> EngineResults:
res = EngineResults()
# remove first and last lines to get only json
json_resp = resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2]
try:
conversion_rate = float(json.loads(json_resp)["to"][0]["mid"])
except IndexError:
return []
return res
answer = '{0} {1} = {2} {3}, 1 {1} ({5}) = {4} {3} ({6})'.format(
resp.search_params['amount'],
resp.search_params['from'],
@ -46,5 +49,5 @@ def response(resp):
)
url = f"https://duckduckgo.com/?q={resp.search_params['from']}+to+{resp.search_params['to']}"
return [{"answer": answer, "url": url}]
res.add(res.types.Answer(answer=answer, url=url))
return res

View File

@ -12,24 +12,13 @@ from .online import OnlineProcessor
parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
def normalize_name(name):
def normalize_name(name: str):
name = name.strip()
name = name.lower().replace('-', ' ').rstrip('s')
name = re.sub(' +', ' ', name)
return unicodedata.normalize('NFKD', name).lower()
def name_to_iso4217(name):
name = normalize_name(name)
currency = CURRENCIES['names'].get(name, [name])
if isinstance(currency, str):
return currency
return currency[-1]
def iso4217_to_name(iso4217, language):
return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217)
class OnlineCurrencyProcessor(OnlineProcessor):
"""Processor class used by ``online_currency`` engines."""
@ -52,14 +41,15 @@ class OnlineCurrencyProcessor(OnlineProcessor):
amount = float(amount_str)
except ValueError:
return None
from_currency = name_to_iso4217(from_currency.strip())
to_currency = name_to_iso4217(to_currency.strip())
from_currency = CURRENCIES.name_to_iso4217(normalize_name(from_currency))
to_currency = CURRENCIES.name_to_iso4217(normalize_name(to_currency))
params['amount'] = amount
params['from'] = from_currency
params['to'] = to_currency
params['from_name'] = iso4217_to_name(from_currency, 'en')
params['to_name'] = iso4217_to_name(to_currency, 'en')
params['from_name'] = CURRENCIES.iso4217_to_name(from_currency, "en")
params['to_name'] = CURRENCIES.iso4217_to_name(to_currency, "en")
return params
def get_default_tests(self):

View File

@ -15,9 +15,7 @@ import json
from searx.locales import LOCALE_NAMES, locales_initialize
from searx.engines import wikidata, set_loggers
from searx.data import data_dir
DATA_FILE = data_dir / 'currencies.json'
from searx.data.currencies import CurrenciesDB
set_loggers(wikidata, 'wikidata')
locales_initialize()
@ -149,7 +147,7 @@ def main():
if len(db['names'][name]) == 1:
db['names'][name] = db['names'][name][0]
with DATA_FILE.open('w', encoding='utf8') as f:
with CurrenciesDB.json_file.open('w', encoding='utf8') as f:
json.dump(db, f, indent=4, sort_keys=True, ensure_ascii=False)