Create a really good async cache

2025-07-09 03:04:20 -04:00 · 2024-01-05 14:45:52 +01:00 · 2024-01-05 14:45:52 +01:00 · 5f787bedfe
commit 5f787bedfe
parent b552ca7c51
3 changed files with 124 additions and 4 deletions
--- a/scanner/Dockerfile
+++ b/scanner/Dockerfile
@ -1,4 +1,4 @@
-FROM python:3.10
+FROM python:3.12
 WORKDIR /app

 COPY ./requirements.txt .
--- a/scanner/scanner/cache.py
+++ b/scanner/scanner/cache.py
@ -0,0 +1,119 @@
+import asyncio
+from datetime import datetime, timedelta
+from functools import wraps
+from typing import Any, Optional, Tuple
+from providers.utils import ProviderError
+
+type Cache = dict[Any, Tuple[Optional[asyncio.Event], Optional[datetime], Any]]
+
+def cache(ttl: timedelta, cache: Cache={}, typed=False):
+	"""
+	A cache decorator for async methods. If the same method is called twice with
+	the same args, the underlying method will only be called once and the first
+	result will be cached.
+
+	Args:
+		typed: same as functools.lru_cache
+		ttl: how many time should the cached value be considered valid?
+
+	"""
+
+	def wrap(f):
+		@wraps(f)
+		async def wrapper(*args, **kwargs):
+			key = make_key(args, kwargs, typed)
+
+			ret = cache.get(key, (None, None, None))
+			# First check if the same method is already running and wait for it.
+			if ret[0] is not None:
+				await ret[0].wait()
+				ret = cache.get(key, (None, None, None))
+				if ret[2] is None:
+					# ret[2] can be None if the cached method failed. if that is the case, run again.
+					return wrapper(*args, **kwargs)
+				return ret[2]
+			# Return the cached result if it exits and is not expired
+			if (
+				ret[2] is not None
+				and ret[1] is not None
+				and ret[1] - datetime.now() < ttl
+			):
+				return ret[2]
+
+			return await exec_as_cache(cache, key, lambda: f(*args, **kwargs))
+
+		return wrapper
+
+	return wrap
+
+async def exec_as_cache(cache: Cache, key, f):
+	event = asyncio.Event()
+	cache[key] = (event, None, None)
+	try:
+		result = await f()
+	except:
+		del cache[key]
+		event.set()
+		raise
+
+	event.set()
+	cache[key] = (None, datetime.now(), result)
+	return result
+
+
+# Code bellow was stolen from https://github.com/python/cpython/blob/3.12/Lib/functools.py#L432
+
+
+class _HashedSeq(list):
+	"""This class guarantees that hash() will be called no more than once
+	per element.  This is important because the lru_cache() will hash
+	the key multiple times on a cache miss.
+
+	"""
+
+	__slots__ = "hashvalue"
+
+	def __init__(self, tup, hash=hash):
+		self[:] = tup
+		self.hashvalue = hash(tup)
+
+	def __hash__(self):
+		return self.hashvalue
+
+
+def make_key(
+	args,
+	kwds={},
+	typed=False,
+	kwd_mark=(object(),),
+	fasttypes={int, str},
+	tuple=tuple,
+	type=type,
+	len=len,
+):
+	"""Make a cache key from optionally typed positional and keyword arguments
+
+	The key is constructed in a way that is flat as possible rather than
+	as a nested structure that would take more memory.
+
+	If there is only a single argument and its data type is known to cache
+	its hash value, then that argument is returned without a wrapper.  This
+	saves space and improves lookup speed.
+
+	"""
+	# All of code below relies on kwds preserving the order input by the user.
+	# Formerly, we sorted() the kwds before looping.  The new way is *much*
+	# faster; however, it means that f(x=1, y=2) will now be treated as a
+	# distinct call from f(y=2, x=1) which will be cached separately.
+	key = args
+	if kwds:
+		key += kwd_mark
+		for item in kwds.items():
+			key += item
+	if typed:
+		key += tuple(type(v) for v in args)
+		if kwds:
+			key += tuple(type(v) for v in kwds.values())
+	elif len(key) == 1 and type(key[0]) in fasttypes:
+		return key[0]
+	return _HashedSeq(key)
--- a/shell.nix
+++ b/shell.nix
@ -6,6 +6,7 @@
      sdk_7_0
      aspnetcore_7_0
    ];
+  python = pkgs.python312;
 in
  pkgs.mkShell {
    packages = with pkgs; [
@ -14,8 +15,8 @@ in
      nodePackages.eas-cli
      nodePackages.expo-cli
      dotnet
-      python3
-      python3Packages.pip
+      python
+      python312Packages.pip
      cargo
      cargo-watch
      rustfmt
@ -37,7 +38,7 @@ in
      # Install python modules
      SOURCE_DATE_EPOCH=$(date +%s)
      if [ ! -d "${venvDir}" ]; then
-          ${pkgs.python3}/bin/python3 -m venv ${toString ./.}/${venvDir}
+          ${python}/bin/python3 -m venv ${toString ./.}/${venvDir}
          source ${venvDir}/bin/activate
          export PIP_DISABLE_PIP_VERSION_CHECK=1
          pip install -r ${pythonPkgs} >&2