mirror of
https://github.com/searxng/searxng.git
synced 2025-11-22 22:43:08 -05:00
[mod] ExpireCacheSQLite - implement .setmany() for bulk loading
[1] https://github.com/searxng/searxng/issues/5223#issuecomment-3328597147 Suggested-by: Ivan G <igabaldon@inetol.net> [1] Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
4f4de3fc87
commit
18a58943cc
@ -29,6 +29,8 @@ from searx import get_setting
|
|||||||
|
|
||||||
log = logger.getChild("cache")
|
log = logger.getChild("cache")
|
||||||
|
|
||||||
|
CacheRowType: typing.TypeAlias = tuple[str, typing.Any, int | None]
|
||||||
|
|
||||||
|
|
||||||
class ExpireCacheCfg(msgspec.Struct): # pylint: disable=too-few-public-methods
|
class ExpireCacheCfg(msgspec.Struct): # pylint: disable=too-few-public-methods
|
||||||
"""Configuration of a :py:obj:`ExpireCache` cache."""
|
"""Configuration of a :py:obj:`ExpireCache` cache."""
|
||||||
@ -81,7 +83,7 @@ class ExpireCacheCfg(msgspec.Struct): # pylint: disable=too-few-public-methods
|
|||||||
class ExpireCacheStats:
|
class ExpireCacheStats:
|
||||||
"""Dataclass which provides information on the status of the cache."""
|
"""Dataclass which provides information on the status of the cache."""
|
||||||
|
|
||||||
cached_items: dict[str, list[tuple[str, typing.Any, int]]]
|
cached_items: dict[str, list[CacheRowType]]
|
||||||
"""Values in the cache mapped by context name.
|
"""Values in the cache mapped by context name.
|
||||||
|
|
||||||
.. code: python
|
.. code: python
|
||||||
@ -108,7 +110,9 @@ class ExpireCacheStats:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
for key, value, expire in kv_list:
|
for key, value, expire in kv_list:
|
||||||
valid_until = datetime.datetime.fromtimestamp(expire).strftime("%Y-%m-%d %H:%M:%S")
|
valid_until = ""
|
||||||
|
if expire:
|
||||||
|
valid_until = datetime.datetime.fromtimestamp(expire).strftime("%Y-%m-%d %H:%M:%S")
|
||||||
c_kv += 1
|
c_kv += 1
|
||||||
lines.append(f"[{ctx_name:20s}] {valid_until} {key:12}" f" --> ({type(value).__name__}) {value} ")
|
lines.append(f"[{ctx_name:20s}] {valid_until} {key:12}" f" --> ({type(value).__name__}) {value} ")
|
||||||
|
|
||||||
@ -339,38 +343,97 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
|
|||||||
exists, it will be created (on demand) by :py:obj:`self.create_table
|
exists, it will be created (on demand) by :py:obj:`self.create_table
|
||||||
<ExpireCacheSQLite.create_table>`.
|
<ExpireCacheSQLite.create_table>`.
|
||||||
"""
|
"""
|
||||||
|
c, err_msg_list = self._setmany([(key, value, expire)], ctx=ctx)
|
||||||
|
if c:
|
||||||
|
log.debug("%s -- %s: key '%s' updated or inserted (%s errors)", self.cfg.name, ctx, key, len(err_msg_list))
|
||||||
|
else:
|
||||||
|
for msg in err_msg_list:
|
||||||
|
log.error("%s -- %s: %s", self.cfg.name, ctx, msg)
|
||||||
|
return bool(c)
|
||||||
|
|
||||||
|
def setmany(
|
||||||
|
self,
|
||||||
|
opt_list: list[CacheRowType],
|
||||||
|
ctx: str | None = None,
|
||||||
|
) -> int:
|
||||||
|
"""Efficient bootload of the cache from a list of options. The list
|
||||||
|
contains tuples with the arguments described in
|
||||||
|
:py:obj:`ExpireCacheSQLite.set`."""
|
||||||
|
_start = time.time()
|
||||||
|
c, err_msg_list = self._setmany(opt_list=opt_list, ctx=ctx)
|
||||||
|
_end = time.time()
|
||||||
|
for msg in err_msg_list:
|
||||||
|
log.error("%s -- %s: %s", self.cfg.name, ctx, msg)
|
||||||
|
|
||||||
|
log.debug(
|
||||||
|
"%s -- %s: %s/%s key/value pairs updated or inserted in %s sec (%s errors)",
|
||||||
|
self.cfg.name,
|
||||||
|
ctx,
|
||||||
|
c,
|
||||||
|
len(opt_list),
|
||||||
|
_end - _start,
|
||||||
|
len(err_msg_list),
|
||||||
|
)
|
||||||
|
return c
|
||||||
|
|
||||||
|
def _setmany(
|
||||||
|
self,
|
||||||
|
opt_list: list[CacheRowType],
|
||||||
|
ctx: str | None = None,
|
||||||
|
) -> tuple[int, list[str]]:
|
||||||
|
|
||||||
table = ctx
|
table = ctx
|
||||||
self.maintenance()
|
self.maintenance()
|
||||||
|
|
||||||
value = self.serialize(value=value)
|
|
||||||
if len(value) > self.cfg.MAX_VALUE_LEN:
|
|
||||||
log.warning("ExpireCache.set(): %s.key='%s' - value too big to cache (len: %s) ", table, value, len(value))
|
|
||||||
return False
|
|
||||||
|
|
||||||
if not expire:
|
|
||||||
expire = self.cfg.MAXHOLD_TIME
|
|
||||||
expire = int(time.time()) + expire
|
|
||||||
|
|
||||||
table_name = table
|
table_name = table
|
||||||
if not table_name:
|
if not table_name:
|
||||||
table_name = self.normalize_name(self.cfg.name)
|
table_name = self.normalize_name(self.cfg.name)
|
||||||
self.create_table(table_name)
|
self.create_table(table_name)
|
||||||
|
|
||||||
sql = (
|
sql_str = (
|
||||||
f"INSERT INTO {table_name} (key, value, expire) VALUES (?, ?, ?)"
|
f"INSERT INTO {table_name} (key, value, expire) VALUES (?, ?, ?)"
|
||||||
f" ON CONFLICT DO "
|
f" ON CONFLICT DO "
|
||||||
f"UPDATE SET value=?, expire=?"
|
f"UPDATE SET value=?, expire=?"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
sql_rows: list[
|
||||||
|
tuple[
|
||||||
|
str, # key
|
||||||
|
typing.Any, # value
|
||||||
|
int | None, # expire
|
||||||
|
typing.Any, # value
|
||||||
|
int | None, # expire
|
||||||
|
]
|
||||||
|
] = []
|
||||||
|
|
||||||
|
err_msg_list: list[str] = []
|
||||||
|
for key, _val, expire in opt_list:
|
||||||
|
|
||||||
|
value: bytes = self.serialize(value=_val)
|
||||||
|
if len(value) > self.cfg.MAX_VALUE_LEN:
|
||||||
|
err_msg_list.append(f"{table}.key='{key}' - serialized value too big to cache (len: {len(value)}) ")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not expire:
|
||||||
|
expire = self.cfg.MAXHOLD_TIME
|
||||||
|
expire = int(time.time()) + expire
|
||||||
|
|
||||||
|
# positional arguments of the INSERT INTO statement
|
||||||
|
sql_args = (key, value, expire, value, expire)
|
||||||
|
sql_rows.append(sql_args)
|
||||||
|
|
||||||
|
if not sql_rows:
|
||||||
|
return 0, err_msg_list
|
||||||
|
|
||||||
if table:
|
if table:
|
||||||
with self.DB:
|
with self.DB:
|
||||||
self.DB.execute(sql, (key, value, expire, value, expire))
|
self.DB.executemany(sql_str, sql_rows)
|
||||||
else:
|
else:
|
||||||
with self.connect() as conn:
|
with self.connect() as conn:
|
||||||
conn.execute(sql, (key, value, expire, value, expire))
|
conn.executemany(sql_str, sql_rows)
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
return True
|
return len(sql_rows), err_msg_list
|
||||||
|
|
||||||
def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any:
|
def get(self, key: str, default: typing.Any = None, ctx: str | None = None) -> typing.Any:
|
||||||
"""Get value of ``key`` from table given by argument ``ctx``. If
|
"""Get value of ``key`` from table given by argument ``ctx``. If
|
||||||
@ -410,7 +473,7 @@ class ExpireCacheSQLite(sqlitedb.SQLiteAppl, ExpireCache):
|
|||||||
yield row[0], self.deserialize(row[1])
|
yield row[0], self.deserialize(row[1])
|
||||||
|
|
||||||
def state(self) -> ExpireCacheStats:
|
def state(self) -> ExpireCacheStats:
|
||||||
cached_items: dict[str, list[tuple[str, typing.Any, int]]] = {}
|
cached_items: dict[str, list[CacheRowType]] = {}
|
||||||
for table in self.table_names:
|
for table in self.table_names:
|
||||||
cached_items[table] = []
|
cached_items[table] = []
|
||||||
for row in self.DB.execute(f"SELECT key, value, expire FROM {table}"):
|
for row in self.DB.execute(f"SELECT key, value, expire FROM {table}"):
|
||||||
|
|||||||
20
searx/data/__main__.py
Normal file
20
searx/data/__main__.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
|
"""Command line implementation"""
|
||||||
|
|
||||||
|
import typer
|
||||||
|
|
||||||
|
from .core import get_cache
|
||||||
|
|
||||||
|
app = typer.Typer()
|
||||||
|
|
||||||
|
|
||||||
|
@app.command()
|
||||||
|
def state():
|
||||||
|
"""show state of the cache"""
|
||||||
|
cache = get_cache()
|
||||||
|
for table in cache.table_names:
|
||||||
|
for row in cache.DB.execute(f"SELECT count(*) FROM {table}"):
|
||||||
|
print(f"cache table {table} holds {row[0]} key/value pairs")
|
||||||
|
|
||||||
|
|
||||||
|
app()
|
||||||
@ -6,10 +6,12 @@ __all__ = ["CurrenciesDB"]
|
|||||||
import typing as t
|
import typing as t
|
||||||
import json
|
import json
|
||||||
import pathlib
|
import pathlib
|
||||||
import time
|
|
||||||
|
|
||||||
from .core import get_cache, log
|
from .core import get_cache, log
|
||||||
|
|
||||||
|
if t.TYPE_CHECKING:
|
||||||
|
from searx.cache import CacheRowType
|
||||||
|
|
||||||
|
|
||||||
@t.final
|
@t.final
|
||||||
class CurrenciesDB:
|
class CurrenciesDB:
|
||||||
@ -33,19 +35,14 @@ class CurrenciesDB:
|
|||||||
# in /tmp and will be rebuild during the reboot anyway
|
# in /tmp and will be rebuild during the reboot anyway
|
||||||
|
|
||||||
def load(self):
|
def load(self):
|
||||||
_start = time.time()
|
|
||||||
log.debug("init searx.data.CURRENCIES")
|
log.debug("init searx.data.CURRENCIES")
|
||||||
with open(self.json_file, encoding="utf-8") as f:
|
with open(self.json_file, encoding="utf-8") as f:
|
||||||
data_dict: dict[str, dict[str, str]] = json.load(f)
|
data_dict: dict[str, dict[str, str]] = json.load(f)
|
||||||
for key, value in data_dict["names"].items():
|
|
||||||
self.cache.set(key=key, value=value, ctx=self.ctx_names, expire=None)
|
rows: "list[CacheRowType]" = [(k, v, None) for k, v in data_dict["names"].items()]
|
||||||
for key, value in data_dict["iso4217"].items():
|
self.cache.setmany(rows, ctx=self.ctx_names)
|
||||||
self.cache.set(key=key, value=value, ctx=self.ctx_iso4217, expire=None)
|
rows = [(k, v, None) for k, v in data_dict["iso4217"].items()]
|
||||||
log.debug(
|
self.cache.setmany(rows, ctx=self.ctx_iso4217)
|
||||||
"init searx.data.CURRENCIES added %s items in %s sec.",
|
|
||||||
len(data_dict["names"]) + len(data_dict["iso4217"]),
|
|
||||||
time.time() - _start,
|
|
||||||
)
|
|
||||||
|
|
||||||
def name_to_iso4217(self, name: str) -> str | None:
|
def name_to_iso4217(self, name: str) -> str | None:
|
||||||
self.init()
|
self.init()
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
"""Simple implementation to store TrackerPatterns data in a SQL database."""
|
"""Simple implementation to store TrackerPatterns data in a SQL database."""
|
||||||
|
|
||||||
import typing
|
import typing as t
|
||||||
|
|
||||||
__all__ = ["TrackerPatternsDB"]
|
__all__ = ["TrackerPatternsDB"]
|
||||||
|
|
||||||
@ -14,9 +14,14 @@ from httpx import HTTPError
|
|||||||
from searx.data.core import get_cache, log
|
from searx.data.core import get_cache, log
|
||||||
from searx.network import get as http_get
|
from searx.network import get as http_get
|
||||||
|
|
||||||
|
if t.TYPE_CHECKING:
|
||||||
|
from searx.cache import CacheRowType
|
||||||
|
|
||||||
|
|
||||||
RuleType = tuple[str, list[str], list[str]]
|
RuleType = tuple[str, list[str], list[str]]
|
||||||
|
|
||||||
|
|
||||||
|
@t.final
|
||||||
class TrackerPatternsDB:
|
class TrackerPatternsDB:
|
||||||
# pylint: disable=missing-class-docstring
|
# pylint: disable=missing-class-docstring
|
||||||
|
|
||||||
@ -31,9 +36,9 @@ class TrackerPatternsDB:
|
|||||||
|
|
||||||
class Fields:
|
class Fields:
|
||||||
# pylint: disable=too-few-public-methods, invalid-name
|
# pylint: disable=too-few-public-methods, invalid-name
|
||||||
url_regexp: typing.Final = 0 # URL (regular expression) match condition of the link
|
url_regexp: t.Final = 0 # URL (regular expression) match condition of the link
|
||||||
url_ignore: typing.Final = 1 # URL (regular expression) to ignore
|
url_ignore: t.Final = 1 # URL (regular expression) to ignore
|
||||||
del_args: typing.Final = 2 # list of URL arguments (regular expression) to delete
|
del_args: t.Final = 2 # list of URL arguments (regular expression) to delete
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.cache = get_cache()
|
self.cache = get_cache()
|
||||||
@ -49,19 +54,25 @@ class TrackerPatternsDB:
|
|||||||
|
|
||||||
def load(self):
|
def load(self):
|
||||||
log.debug("init searx.data.TRACKER_PATTERNS")
|
log.debug("init searx.data.TRACKER_PATTERNS")
|
||||||
for rule in self.iter_clear_list():
|
rows: "list[CacheRowType]" = []
|
||||||
self.add(rule)
|
|
||||||
|
|
||||||
def add(self, rule: RuleType):
|
for rule in self.iter_clear_list():
|
||||||
self.cache.set(
|
key = rule[self.Fields.url_regexp]
|
||||||
key=rule[self.Fields.url_regexp],
|
value = (
|
||||||
value=(
|
|
||||||
rule[self.Fields.url_ignore],
|
rule[self.Fields.url_ignore],
|
||||||
rule[self.Fields.del_args],
|
rule[self.Fields.del_args],
|
||||||
),
|
)
|
||||||
ctx=self.ctx_name,
|
rows.append((key, value, None))
|
||||||
expire=None,
|
|
||||||
|
self.cache.setmany(rows, ctx=self.ctx_name)
|
||||||
|
|
||||||
|
def add(self, rule: RuleType):
|
||||||
|
key = rule[self.Fields.url_regexp]
|
||||||
|
value = (
|
||||||
|
rule[self.Fields.url_ignore],
|
||||||
|
rule[self.Fields.del_args],
|
||||||
)
|
)
|
||||||
|
self.cache.set(key=key, value=value, ctx=self.ctx_name, expire=None)
|
||||||
|
|
||||||
def rules(self) -> Iterator[RuleType]:
|
def rules(self) -> Iterator[RuleType]:
|
||||||
self.init()
|
self.init()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user