diff --git a/searx/cache.py b/searx/cache.py index 0de482363..ea58c9328 100644 --- a/searx/cache.py +++ b/searx/cache.py @@ -29,6 +29,8 @@ from searx import get_setting log = logger.getChild("cache") +CacheRowType: typing.TypeAlias = tuple[str, typing.Any, int | None] + class ExpireCacheCfg(msgspec.Struct): # pylint: disable=too-few-public-methods """Configuration of a :py:obj:`ExpireCache` cache.""" @@ -81,7 +83,7 @@ class ExpireCacheCfg(msgspec.Struct): # pylint: disable=too-few-public-methods class ExpireCacheStats: """Dataclass which provides information on the status of the cache.""" - cached_items: dict[str, list[tuple[str, typing.Any, int]]] + cached_items: dict[str, list[CacheRowType]] """Values in the cache mapped by context name. .. code: python @@ -108,7 +110,9 @@ class ExpireCacheStats: continue for key, value, expire in kv_list: - valid_until = datetime.datetime.fromtimestamp(expire).strftime("%Y-%m-%d %H:%M:%S") + valid_until = "" + if expire: + valid_until = datetime.datetime.fromtimestamp(expire).strftime("%Y-%m-%d %H:%M:%S") c_kv += 1 lines.append(f"[{ctx_name:20s}] {valid_until} {key:12}" f" --> ({type(value).__name__}) {value} ") @@ -339,38 +343,97 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache): exists, it will be created (on demand) by :py:obj:`self.create_table `. """ + c, err_msg_list = self._setmany([(key, value, expire)], ctx=ctx) + if c: + log.debug("%s -- %s: key '%s' updated or inserted (%s errors)", self.cfg.name, ctx, key, len(err_msg_list)) + else: + for msg in err_msg_list: + log.error("%s -- %s: %s", self.cfg.name, ctx, msg) + return bool(c) + + def setmany( + self, + opt_list: list[CacheRowType], + ctx: str | None = None, + ) -> int: + """Efficient bootload of the cache from a list of options. The list + contains tuples with the arguments described in + :py:obj:`ExpireCacheSQLite.set`.""" + _start = time.time() + c, err_msg_list = self._setmany(opt_list=opt_list, ctx=ctx) + _end = time.time() + for msg in err_msg_list: + log.error("%s -- %s: %s", self.cfg.name, ctx, msg) + + log.debug( + "%s -- %s: %s/%s key/value pairs updated or inserted in %s sec (%s errors)", + self.cfg.name, + ctx, + c, + len(opt_list), + _end - _start, + len(err_msg_list), + ) + return c + + def _setmany( + self, + opt_list: list[CacheRowType], + ctx: str | None = None, + ) -> tuple[int, list[str]]: + table = ctx self.maintenance() - value = self.serialize(value=value) - if len(value) > self.cfg.MAX_VALUE_LEN: - log.warning("ExpireCache.set(): %s.key='%s' - value too big to cache (len: %s) ", table, value, len(value)) - return False - - if not expire: - expire = self.cfg.MAXHOLD_TIME - expire = int(time.time()) + expire - table_name = table if not table_name: table_name = self.normalize_name(self.cfg.name) self.create_table(table_name) - sql = ( + sql_str = ( f"INSERT INTO {table_name} (key, value, expire) VALUES (?, ?, ?)" f" ON CONFLICT DO " f"UPDATE SET value=?, expire=?" ) + sql_rows: list[ + tuple[ + str, # key + typing.Any, # value + int | None, # expire + typing.Any, # value + int | None, # expire + ] + ] = [] + + err_msg_list: list[str] = [] + for key, _val, expire in opt_list: + + value: bytes = self.serialize(value=_val) + if len(value) > self.cfg.MAX_VALUE_LEN: + err_msg_list.append(f"{table}.key='{key}' - serialized value too big to cache (len: {len(value)}) ") + continue + + if not expire: + expire = self.cfg.MAXHOLD_TIME + expire = int(time.time()) + expire + + # positional arguments of the INSERT INTO statement + sql_args = (key, value, expire, value, expire) + sql_rows.append(sql_args) + + if not sql_rows: + return 0, err_msg_list + if table: with self.DB: - self.DB.execute(sql, (key, value, expire, value, expire)) + self.DB.executemany(sql_str, sql_rows) else: with self.connect() as conn: - conn.execute(sql, (key, value, expire, value, expire)) + conn.executemany(sql_str, sql_rows) conn.close() - return True + return len(sql_rows), err_msg_list def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any: """Get value of ``key`` from table given by argument ``ctx``. If @@ -410,7 +473,7 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache): yield row[0], self.deserialize(row[1]) def state(self) -> ExpireCacheStats: - cached_items: dict[str, list[tuple[str, typing.Any, int]]] = {} + cached_items: dict[str, list[CacheRowType]] = {} for table in self.table_names: cached_items[table] = [] for row in self.DB.execute(f"SELECT key, value, expire FROM {table}"): diff --git a/searx/data/__main__.py b/searx/data/__main__.py new file mode 100644 index 000000000..8e7852751 --- /dev/null +++ b/searx/data/__main__.py @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Command line implementation""" + +import typer + +from .core import get_cache + +app = typer.Typer() + + +@app.command() +def state(): + """show state of the cache""" + cache = get_cache() + for table in cache.table_names: + for row in cache.DB.execute(f"SELECT count(*) FROM {table}"): + print(f"cache table {table} holds {row[0]} key/value pairs") + + +app() diff --git a/searx/data/currencies.py b/searx/data/currencies.py index 33aa9530c..538900762 100644 --- a/searx/data/currencies.py +++ b/searx/data/currencies.py @@ -6,10 +6,12 @@ __all__ = ["CurrenciesDB"] import typing as t import json import pathlib -import time from .core import get_cache, log +if t.TYPE_CHECKING: + from searx.cache import CacheRowType + @t.final class CurrenciesDB: @@ -33,19 +35,14 @@ class CurrenciesDB: # in /tmp and will be rebuild during the reboot anyway def load(self): - _start = time.time() log.debug("init searx.data.CURRENCIES") with open(self.json_file, encoding="utf-8") as f: data_dict: dict[str, dict[str, str]] = json.load(f) - for key, value in data_dict["names"].items(): - self.cache.set(key=key, value=value, ctx=self.ctx_names, expire=None) - for key, value in data_dict["iso4217"].items(): - self.cache.set(key=key, value=value, ctx=self.ctx_iso4217, expire=None) - log.debug( - "init searx.data.CURRENCIES added %s items in %s sec.", - len(data_dict["names"]) + len(data_dict["iso4217"]), - time.time() - _start, - ) + + rows: "list[CacheRowType]" = [(k, v, None) for k, v in data_dict["names"].items()] + self.cache.setmany(rows, ctx=self.ctx_names) + rows = [(k, v, None) for k, v in data_dict["iso4217"].items()] + self.cache.setmany(rows, ctx=self.ctx_iso4217) def name_to_iso4217(self, name: str) -> str | None: self.init() diff --git a/searx/data/tracker_patterns.py b/searx/data/tracker_patterns.py index 2966c0f31..fd4746e5c 100644 --- a/searx/data/tracker_patterns.py +++ b/searx/data/tracker_patterns.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """Simple implementation to store TrackerPatterns data in a SQL database.""" -import typing +import typing as t __all__ = ["TrackerPatternsDB"] @@ -14,9 +14,14 @@ from httpx import HTTPError from searx.data.core import get_cache, log from searx.network import get as http_get +if t.TYPE_CHECKING: + from searx.cache import CacheRowType + + RuleType = tuple[str, list[str], list[str]] +@t.final class TrackerPatternsDB: # pylint: disable=missing-class-docstring @@ -31,9 +36,9 @@ class TrackerPatternsDB: class Fields: # pylint: disable=too-few-public-methods, invalid-name - url_regexp: typing.Final = 0 # URL (regular expression) match condition of the link - url_ignore: typing.Final = 1 # URL (regular expression) to ignore - del_args: typing.Final = 2 # list of URL arguments (regular expression) to delete + url_regexp: t.Final = 0 # URL (regular expression) match condition of the link + url_ignore: t.Final = 1 # URL (regular expression) to ignore + del_args: t.Final = 2 # list of URL arguments (regular expression) to delete def __init__(self): self.cache = get_cache() @@ -49,19 +54,25 @@ class TrackerPatternsDB: def load(self): log.debug("init searx.data.TRACKER_PATTERNS") - for rule in self.iter_clear_list(): - self.add(rule) + rows: "list[CacheRowType]" = [] - def add(self, rule: RuleType): - self.cache.set( - key=rule[self.Fields.url_regexp], - value=( + for rule in self.iter_clear_list(): + key = rule[self.Fields.url_regexp] + value = ( rule[self.Fields.url_ignore], rule[self.Fields.del_args], - ), - ctx=self.ctx_name, - expire=None, + ) + rows.append((key, value, None)) + + self.cache.setmany(rows, ctx=self.ctx_name) + + def add(self, rule: RuleType): + key = rule[self.Fields.url_regexp] + value = ( + rule[self.Fields.url_ignore], + rule[self.Fields.del_args], ) + self.cache.set(key=key, value=value, ctx=self.ctx_name, expire=None) def rules(self) -> Iterator[RuleType]: self.init()