mirror of
https://github.com/searxng/searxng.git
synced 2025-06-23 15:30:35 -04:00
[mod] data: implement a simple currencies (SQL) database (#4836)
To reduce the memory footprint, this patch no longer loads the JSON data completely into memory. Instead, there is an SQL database based on `ExpireCacheSQLite`. The class CurrenciesDB is a simple DB application that encapsulates the DB (queries and initialization) and provides convenient methods like `name_to_iso4217` and `iso4217_to_name`. Related: - https://github.com/searxng/searxng/discussions/1892 - https://github.com/searxng/searxng/pull/3458#issuecomment-2900807671 - https://github.com/searxng/searxng/pull/4650 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
e46187e3ce
commit
848c8d0544
@ -4,21 +4,17 @@
|
|||||||
make data.all
|
make data.all
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
__all__ = ["ahmia_blacklist_loader"]
|
__all__ = ["ahmia_blacklist_loader"]
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
|
||||||
import typing
|
import typing
|
||||||
|
|
||||||
from searx import logger
|
from .core import log, data_dir
|
||||||
|
from .currencies import CurrenciesDB
|
||||||
|
|
||||||
log = logger.getChild("data")
|
CURRENCIES: CurrenciesDB
|
||||||
|
|
||||||
data_dir = Path(__file__).parent
|
|
||||||
|
|
||||||
CURRENCIES: dict[str, typing.Any]
|
|
||||||
USER_AGENTS: dict[str, typing.Any]
|
USER_AGENTS: dict[str, typing.Any]
|
||||||
EXTERNAL_URLS: dict[str, typing.Any]
|
EXTERNAL_URLS: dict[str, typing.Any]
|
||||||
WIKIDATA_UNITS: dict[str, typing.Any]
|
WIKIDATA_UNITS: dict[str, typing.Any]
|
||||||
@ -29,7 +25,7 @@ ENGINE_TRAITS: dict[str, typing.Any]
|
|||||||
LOCALES: dict[str, typing.Any]
|
LOCALES: dict[str, typing.Any]
|
||||||
|
|
||||||
lazy_globals = {
|
lazy_globals = {
|
||||||
"CURRENCIES": None,
|
"CURRENCIES": CurrenciesDB(),
|
||||||
"USER_AGENTS": None,
|
"USER_AGENTS": None,
|
||||||
"EXTERNAL_URLS": None,
|
"EXTERNAL_URLS": None,
|
||||||
"WIKIDATA_UNITS": None,
|
"WIKIDATA_UNITS": None,
|
||||||
@ -41,7 +37,6 @@ lazy_globals = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
data_json_files = {
|
data_json_files = {
|
||||||
"CURRENCIES": "currencies.json",
|
|
||||||
"USER_AGENTS": "useragents.json",
|
"USER_AGENTS": "useragents.json",
|
||||||
"EXTERNAL_URLS": "external_urls.json",
|
"EXTERNAL_URLS": "external_urls.json",
|
||||||
"WIKIDATA_UNITS": "wikidata_units.json",
|
"WIKIDATA_UNITS": "wikidata_units.json",
|
||||||
@ -63,6 +58,7 @@ def __getattr__(name):
|
|||||||
return data
|
return data
|
||||||
|
|
||||||
log.debug("init searx.data.%s", name)
|
log.debug("init searx.data.%s", name)
|
||||||
|
|
||||||
with open(data_dir / data_json_files[name], encoding='utf-8') as f:
|
with open(data_dir / data_json_files[name], encoding='utf-8') as f:
|
||||||
lazy_globals[name] = json.load(f)
|
lazy_globals[name] = json.load(f)
|
||||||
|
|
||||||
|
29
searx/data/core.py
Normal file
29
searx/data/core.py
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
# pylint: disable=missing-module-docstring
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
from searx import logger
|
||||||
|
from searx.cache import ExpireCacheCfg, ExpireCacheSQLite
|
||||||
|
|
||||||
|
log = logger.getChild("data")
|
||||||
|
|
||||||
|
data_dir = pathlib.Path(__file__).parent
|
||||||
|
|
||||||
|
_DATA_CACHE: ExpireCacheSQLite = None # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
def get_cache():
|
||||||
|
|
||||||
|
global _DATA_CACHE # pylint: disable=global-statement
|
||||||
|
|
||||||
|
if _DATA_CACHE is None:
|
||||||
|
_DATA_CACHE = ExpireCacheSQLite.build_cache(
|
||||||
|
ExpireCacheCfg(
|
||||||
|
name="DATA_CACHE",
|
||||||
|
# MAX_VALUE_LEN=1024 * 200, # max. 200kB length for a *serialized* value.
|
||||||
|
# MAXHOLD_TIME=60 * 60 * 24 * 7 * 4, # 4 weeks
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return _DATA_CACHE
|
55
searx/data/currencies.py
Normal file
55
searx/data/currencies.py
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
"""Simple implementation to store currencies data in a SQL database."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
__all__ = ["CurrenciesDB"]
|
||||||
|
|
||||||
|
import json
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
from .core import get_cache, log
|
||||||
|
|
||||||
|
|
||||||
|
class CurrenciesDB:
|
||||||
|
# pylint: disable=missing-class-docstring
|
||||||
|
|
||||||
|
ctx_names = "data_currencies_names"
|
||||||
|
ctx_iso4217 = "data_currencies_iso4217"
|
||||||
|
|
||||||
|
json_file = pathlib.Path(__file__).parent / "currencies.json"
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.cache = get_cache()
|
||||||
|
|
||||||
|
def init(self):
|
||||||
|
if self.cache.properties("currencies loaded") != "OK":
|
||||||
|
self.load()
|
||||||
|
self.cache.properties.set("currencies loaded", "OK")
|
||||||
|
# F I X M E:
|
||||||
|
# do we need a maintenance .. rember: database is stored
|
||||||
|
# in /tmp and will be rebuild during the reboot anyway
|
||||||
|
|
||||||
|
def load(self):
|
||||||
|
log.debug("init searx.data.CURRENCIES")
|
||||||
|
with open(self.json_file, encoding="utf-8") as f:
|
||||||
|
data_dict = json.load(f)
|
||||||
|
for key, value in data_dict["names"].items():
|
||||||
|
self.cache.set(key=key, value=value, ctx=self.ctx_names, expire=None)
|
||||||
|
for key, value in data_dict["iso4217"].items():
|
||||||
|
self.cache.set(key=key, value=value, ctx=self.ctx_iso4217, expire=None)
|
||||||
|
|
||||||
|
def name_to_iso4217(self, name):
|
||||||
|
self.init()
|
||||||
|
|
||||||
|
ret_val = self.cache.get(key=name, default=name, ctx=self.ctx_names)
|
||||||
|
if isinstance(ret_val, list):
|
||||||
|
# if more alternatives, use the last in the list
|
||||||
|
ret_val = ret_val[-1]
|
||||||
|
return ret_val
|
||||||
|
|
||||||
|
def iso4217_to_name(self, iso4217, language):
|
||||||
|
self.init()
|
||||||
|
|
||||||
|
iso4217_languages: dict = self.cache.get(key=iso4217, default={}, ctx=self.ctx_names)
|
||||||
|
return iso4217_languages.get(language, iso4217)
|
@ -3,6 +3,7 @@
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
from searx.result_types import EngineResults
|
||||||
|
|
||||||
# about
|
# about
|
||||||
about = {
|
about = {
|
||||||
@ -28,13 +29,15 @@ def request(_query, params):
|
|||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
||||||
def response(resp):
|
def response(resp) -> EngineResults:
|
||||||
|
res = EngineResults()
|
||||||
|
|
||||||
# remove first and last lines to get only json
|
# remove first and last lines to get only json
|
||||||
json_resp = resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2]
|
json_resp = resp.text[resp.text.find('\n') + 1 : resp.text.rfind('\n') - 2]
|
||||||
try:
|
try:
|
||||||
conversion_rate = float(json.loads(json_resp)["to"][0]["mid"])
|
conversion_rate = float(json.loads(json_resp)["to"][0]["mid"])
|
||||||
except IndexError:
|
except IndexError:
|
||||||
return []
|
return res
|
||||||
answer = '{0} {1} = {2} {3}, 1 {1} ({5}) = {4} {3} ({6})'.format(
|
answer = '{0} {1} = {2} {3}, 1 {1} ({5}) = {4} {3} ({6})'.format(
|
||||||
resp.search_params['amount'],
|
resp.search_params['amount'],
|
||||||
resp.search_params['from'],
|
resp.search_params['from'],
|
||||||
@ -46,5 +49,5 @@ def response(resp):
|
|||||||
)
|
)
|
||||||
|
|
||||||
url = f"https://duckduckgo.com/?q={resp.search_params['from']}+to+{resp.search_params['to']}"
|
url = f"https://duckduckgo.com/?q={resp.search_params['from']}+to+{resp.search_params['to']}"
|
||||||
|
res.add(res.types.Answer(answer=answer, url=url))
|
||||||
return [{"answer": answer, "url": url}]
|
return res
|
||||||
|
@ -12,24 +12,13 @@ from .online import OnlineProcessor
|
|||||||
parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
|
parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
|
||||||
|
|
||||||
|
|
||||||
def normalize_name(name):
|
def normalize_name(name: str):
|
||||||
|
name = name.strip()
|
||||||
name = name.lower().replace('-', ' ').rstrip('s')
|
name = name.lower().replace('-', ' ').rstrip('s')
|
||||||
name = re.sub(' +', ' ', name)
|
name = re.sub(' +', ' ', name)
|
||||||
return unicodedata.normalize('NFKD', name).lower()
|
return unicodedata.normalize('NFKD', name).lower()
|
||||||
|
|
||||||
|
|
||||||
def name_to_iso4217(name):
|
|
||||||
name = normalize_name(name)
|
|
||||||
currency = CURRENCIES['names'].get(name, [name])
|
|
||||||
if isinstance(currency, str):
|
|
||||||
return currency
|
|
||||||
return currency[-1]
|
|
||||||
|
|
||||||
|
|
||||||
def iso4217_to_name(iso4217, language):
|
|
||||||
return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217)
|
|
||||||
|
|
||||||
|
|
||||||
class OnlineCurrencyProcessor(OnlineProcessor):
|
class OnlineCurrencyProcessor(OnlineProcessor):
|
||||||
"""Processor class used by ``online_currency`` engines."""
|
"""Processor class used by ``online_currency`` engines."""
|
||||||
|
|
||||||
@ -52,14 +41,15 @@ class OnlineCurrencyProcessor(OnlineProcessor):
|
|||||||
amount = float(amount_str)
|
amount = float(amount_str)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return None
|
return None
|
||||||
from_currency = name_to_iso4217(from_currency.strip())
|
|
||||||
to_currency = name_to_iso4217(to_currency.strip())
|
from_currency = CURRENCIES.name_to_iso4217(normalize_name(from_currency))
|
||||||
|
to_currency = CURRENCIES.name_to_iso4217(normalize_name(to_currency))
|
||||||
|
|
||||||
params['amount'] = amount
|
params['amount'] = amount
|
||||||
params['from'] = from_currency
|
params['from'] = from_currency
|
||||||
params['to'] = to_currency
|
params['to'] = to_currency
|
||||||
params['from_name'] = iso4217_to_name(from_currency, 'en')
|
params['from_name'] = CURRENCIES.iso4217_to_name(from_currency, "en")
|
||||||
params['to_name'] = iso4217_to_name(to_currency, 'en')
|
params['to_name'] = CURRENCIES.iso4217_to_name(to_currency, "en")
|
||||||
return params
|
return params
|
||||||
|
|
||||||
def get_default_tests(self):
|
def get_default_tests(self):
|
||||||
|
@ -15,9 +15,7 @@ import json
|
|||||||
|
|
||||||
from searx.locales import LOCALE_NAMES, locales_initialize
|
from searx.locales import LOCALE_NAMES, locales_initialize
|
||||||
from searx.engines import wikidata, set_loggers
|
from searx.engines import wikidata, set_loggers
|
||||||
from searx.data import data_dir
|
from searx.data.currencies import CurrenciesDB
|
||||||
|
|
||||||
DATA_FILE = data_dir / 'currencies.json'
|
|
||||||
|
|
||||||
set_loggers(wikidata, 'wikidata')
|
set_loggers(wikidata, 'wikidata')
|
||||||
locales_initialize()
|
locales_initialize()
|
||||||
@ -149,7 +147,7 @@ def main():
|
|||||||
if len(db['names'][name]) == 1:
|
if len(db['names'][name]) == 1:
|
||||||
db['names'][name] = db['names'][name][0]
|
db['names'][name] = db['names'][name][0]
|
||||||
|
|
||||||
with DATA_FILE.open('w', encoding='utf8') as f:
|
with CurrenciesDB.json_file.open('w', encoding='utf8') as f:
|
||||||
json.dump(db, f, indent=4, sort_keys=True, ensure_ascii=False)
|
json.dump(db, f, indent=4, sort_keys=True, ensure_ascii=False)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user