[mod] searx.data: lazy load of the data objects (databases) (#4834)

In the previous implementation, all databases were loaded into memory when
importing the searx.data package, regardless of whether they were ever needed.

Regardless of this, it is an antipattern to load entire databases into memory
when importing a package or module; databases should be loaded when needed.

Lazy loading is a first step toward improving memory usage and also improves
performance when setting up the runtime environment.  Building on this,
subsequent PRs will be able to further optimize memory behavior, e.g., by using
a real database application such as the one already available via

    searx.cache.ExpireCache

Related:

- https://github.com/searxng/searxng/discussions/1892
- https://github.com/searxng/searxng/pull/3458
- https://github.com/searxng/searxng/pull/4650

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2025-05-22 16:23:30 +02:00 committed by GitHub
parent 861f9c4be5
commit d29cf64ce4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -5,28 +5,68 @@
""" """
__all__ = [
'ENGINE_TRAITS', __all__ = ["ahmia_blacklist_loader"]
'CURRENCIES',
'USER_AGENTS',
'EXTERNAL_URLS',
'WIKIDATA_UNITS',
'EXTERNAL_BANGS',
'OSM_KEYS_TAGS',
'ENGINE_DESCRIPTIONS',
'LOCALES',
'ahmia_blacklist_loader',
]
import json import json
from pathlib import Path from pathlib import Path
import typing
from searx import logger
log = logger.getChild("data")
data_dir = Path(__file__).parent data_dir = Path(__file__).parent
CURRENCIES: dict[str, typing.Any]
USER_AGENTS: dict[str, typing.Any]
EXTERNAL_URLS: dict[str, typing.Any]
WIKIDATA_UNITS: dict[str, typing.Any]
EXTERNAL_BANGS: dict[str, typing.Any]
OSM_KEYS_TAGS: dict[str, typing.Any]
ENGINE_DESCRIPTIONS: dict[str, typing.Any]
ENGINE_TRAITS: dict[str, typing.Any]
LOCALES: dict[str, typing.Any]
def _load(filename): lazy_globals = {
with open(data_dir / filename, encoding='utf-8') as f: "CURRENCIES": None,
return json.load(f) "USER_AGENTS": None,
"EXTERNAL_URLS": None,
"WIKIDATA_UNITS": None,
"EXTERNAL_BANGS": None,
"OSM_KEYS_TAGS": None,
"ENGINE_DESCRIPTIONS": None,
"ENGINE_TRAITS": None,
"LOCALES": None,
}
data_json_files = {
"CURRENCIES": "currencies.json",
"USER_AGENTS": "useragents.json",
"EXTERNAL_URLS": "external_urls.json",
"WIKIDATA_UNITS": "wikidata_units.json",
"EXTERNAL_BANGS": "external_bangs.json",
"OSM_KEYS_TAGS": "osm_keys_tags.json",
"ENGINE_DESCRIPTIONS": "engine_descriptions.json",
"ENGINE_TRAITS": "engine_traits.json",
"LOCALES": "locales.json",
}
def __getattr__(name):
# lazy init of the global objects
if name not in lazy_globals:
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
data = lazy_globals[name]
if data is not None:
return data
log.debug("init searx.data.%s", name)
with open(data_dir / data_json_files[name], encoding='utf-8') as f:
lazy_globals[name] = json.load(f)
return lazy_globals[name]
def ahmia_blacklist_loader(): def ahmia_blacklist_loader():
@ -40,14 +80,3 @@ def ahmia_blacklist_loader():
""" """
with open(data_dir / 'ahmia_blacklist.txt', encoding='utf-8') as f: with open(data_dir / 'ahmia_blacklist.txt', encoding='utf-8') as f:
return f.read().split() return f.read().split()
CURRENCIES = _load('currencies.json')
USER_AGENTS = _load('useragents.json')
EXTERNAL_URLS = _load('external_urls.json')
WIKIDATA_UNITS = _load('wikidata_units.json')
EXTERNAL_BANGS = _load('external_bangs.json')
OSM_KEYS_TAGS = _load('osm_keys_tags.json')
ENGINE_DESCRIPTIONS = _load('engine_descriptions.json')
ENGINE_TRAITS = _load('engine_traits.json')
LOCALES = _load('locales.json')