mirror of
https://github.com/searxng/searxng.git
synced 2025-05-24 01:12:56 -04:00
In the previous implementation, all databases were loaded into memory when importing the searx.data package, regardless of whether they were ever needed. Regardless of this, it is an antipattern to load entire databases into memory when importing a package or module; databases should be loaded when needed. Lazy loading is a first step toward improving memory usage and also improves performance when setting up the runtime environment. Building on this, subsequent PRs will be able to further optimize memory behavior, e.g., by using a real database application such as the one already available via searx.cache.ExpireCache Related: - https://github.com/searxng/searxng/discussions/1892 - https://github.com/searxng/searxng/pull/3458 - https://github.com/searxng/searxng/pull/4650 Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
83 lines
2.1 KiB
Python
83 lines
2.1 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
"""This module holds the *data* created by::
|
|
|
|
make data.all
|
|
|
|
"""
|
|
|
|
|
|
__all__ = ["ahmia_blacklist_loader"]
|
|
|
|
import json
|
|
from pathlib import Path
|
|
import typing
|
|
|
|
from searx import logger
|
|
|
|
log = logger.getChild("data")
|
|
|
|
data_dir = Path(__file__).parent
|
|
|
|
CURRENCIES: dict[str, typing.Any]
|
|
USER_AGENTS: dict[str, typing.Any]
|
|
EXTERNAL_URLS: dict[str, typing.Any]
|
|
WIKIDATA_UNITS: dict[str, typing.Any]
|
|
EXTERNAL_BANGS: dict[str, typing.Any]
|
|
OSM_KEYS_TAGS: dict[str, typing.Any]
|
|
ENGINE_DESCRIPTIONS: dict[str, typing.Any]
|
|
ENGINE_TRAITS: dict[str, typing.Any]
|
|
LOCALES: dict[str, typing.Any]
|
|
|
|
lazy_globals = {
|
|
"CURRENCIES": None,
|
|
"USER_AGENTS": None,
|
|
"EXTERNAL_URLS": None,
|
|
"WIKIDATA_UNITS": None,
|
|
"EXTERNAL_BANGS": None,
|
|
"OSM_KEYS_TAGS": None,
|
|
"ENGINE_DESCRIPTIONS": None,
|
|
"ENGINE_TRAITS": None,
|
|
"LOCALES": None,
|
|
}
|
|
|
|
data_json_files = {
|
|
"CURRENCIES": "currencies.json",
|
|
"USER_AGENTS": "useragents.json",
|
|
"EXTERNAL_URLS": "external_urls.json",
|
|
"WIKIDATA_UNITS": "wikidata_units.json",
|
|
"EXTERNAL_BANGS": "external_bangs.json",
|
|
"OSM_KEYS_TAGS": "osm_keys_tags.json",
|
|
"ENGINE_DESCRIPTIONS": "engine_descriptions.json",
|
|
"ENGINE_TRAITS": "engine_traits.json",
|
|
"LOCALES": "locales.json",
|
|
}
|
|
|
|
|
|
def __getattr__(name):
|
|
# lazy init of the global objects
|
|
if name not in lazy_globals:
|
|
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
|
|
data = lazy_globals[name]
|
|
if data is not None:
|
|
return data
|
|
|
|
log.debug("init searx.data.%s", name)
|
|
with open(data_dir / data_json_files[name], encoding='utf-8') as f:
|
|
lazy_globals[name] = json.load(f)
|
|
|
|
return lazy_globals[name]
|
|
|
|
|
|
def ahmia_blacklist_loader():
|
|
"""Load data from `ahmia_blacklist.txt` and return a list of MD5 values of onion
|
|
names. The MD5 values are fetched by::
|
|
|
|
searxng_extra/update/update_ahmia_blacklist.py
|
|
|
|
This function is used by :py:mod:`searx.plugins.ahmia_filter`.
|
|
|
|
"""
|
|
with open(data_dir / 'ahmia_blacklist.txt', encoding='utf-8') as f:
|
|
return f.read().split()
|