Create a really good async cache

2025-11-18 12:33:20 -05:00 · 2024-01-05 14:45:52 +01:00 · 2024-01-05 14:45:52 +01:00 · 5f787bedfe
commit 5f787bedfe
parent b552ca7c51
3 changed files with 124 additions and 4 deletions
--- a/scanner/Dockerfile
+++ b/scanner/Dockerfile
@ -1,4 +1,4 @@
-FROM python:3.10
+FROM python:3.12
 WORKDIR /app
 COPY ./requirements.txt .
--- a/scanner/scanner/cache.py
+++ b/scanner/scanner/cache.py
@ -0,0 +1,119 @@
 import asyncio
 from datetime import datetime, timedelta
 from functools import wraps
 from typing import Any, Optional, Tuple
 from providers.utils import ProviderError
 type Cache = dict[Any, Tuple[Optional[asyncio.Event], Optional[datetime], Any]]
 def cache(ttl: timedelta, cache: Cache={}, typed=False):
 	"""
 	A cache decorator for async methods. If the same method is called twice with
 	the same args, the underlying method will only be called once and the first
 	result will be cached.
 	Args:
 		typed: same as functools.lru_cache
 		ttl: how many time should the cached value be considered valid?
 	"""
 	def wrap(f):
 		@wraps(f)
 		async def wrapper(*args, **kwargs):
 			key = make_key(args, kwargs, typed)
 			ret = cache.get(key, (None, None, None))
 			# First check if the same method is already running and wait for it.
 			if ret[0] is not None:
 				await ret[0].wait()
 				ret = cache.get(key, (None, None, None))
 				if ret[2] is None:
 					# ret[2] can be None if the cached method failed. if that is the case, run again.
 					return wrapper(*args, **kwargs)
 				return ret[2]
 			# Return the cached result if it exits and is not expired
 			if (
 				ret[2] is not None
 				and ret[1] is not None
 				and ret[1] - datetime.now() < ttl
 			):
 				return ret[2]
 			return await exec_as_cache(cache, key, lambda: f(*args, **kwargs))
 		return wrapper
 	return wrap
 async def exec_as_cache(cache: Cache, key, f):
 	event = asyncio.Event()
 	cache[key] = (event, None, None)
 	try:
 		result = await f()
 	except:
 		del cache[key]
 		event.set()
 		raise
 	event.set()
 	cache[key] = (None, datetime.now(), result)
 	return result
 # Code bellow was stolen from https://github.com/python/cpython/blob/3.12/Lib/functools.py#L432
 class _HashedSeq(list):
 	"""This class guarantees that hash() will be called no more than once
 	per element.  This is important because the lru_cache() will hash
 	the key multiple times on a cache miss.
 	"""
 	__slots__ = "hashvalue"
 	def __init__(self, tup, hash=hash):
 		self[:] = tup
 		self.hashvalue = hash(tup)
 	def __hash__(self):
 		return self.hashvalue
 def make_key(
 	args,
 	kwds={},
 	typed=False,
 	kwd_mark=(object(),),
 	fasttypes={int, str},
 	tuple=tuple,
 	type=type,
 	len=len,
 ):
 	"""Make a cache key from optionally typed positional and keyword arguments
 	The key is constructed in a way that is flat as possible rather than
 	as a nested structure that would take more memory.
 	If there is only a single argument and its data type is known to cache
 	its hash value, then that argument is returned without a wrapper.  This
 	saves space and improves lookup speed.
 	"""
 	# All of code below relies on kwds preserving the order input by the user.
 	# Formerly, we sorted() the kwds before looping.  The new way is *much*
 	# faster; however, it means that f(x=1, y=2) will now be treated as a
 	# distinct call from f(y=2, x=1) which will be cached separately.
 	key = args
 	if kwds:
 		key += kwd_mark
 		for item in kwds.items():
 			key += item
 	if typed:
 		key += tuple(type(v) for v in args)
 		if kwds:
 			key += tuple(type(v) for v in kwds.values())
 	elif len(key) == 1 and type(key[0]) in fasttypes:
 		return key[0]
 	return _HashedSeq(key)
--- a/shell.nix
+++ b/shell.nix
@ -6,6 +6,7 @@
      sdk_7_0
      aspnetcore_7_0
    ];
  python = pkgs.python312;
 in
  pkgs.mkShell {
    packages = with pkgs; [
@ -14,8 +15,8 @@ in
      nodePackages.eas-cli
      nodePackages.expo-cli
      dotnet
-      python3
+      python
-      python3Packages.pip
+      python312Packages.pip
      cargo
      cargo-watch
      rustfmt
@ -37,7 +38,7 @@ in
      # Install python modules
      SOURCE_DATE_EPOCH=$(date +%s)
      if [ ! -d "${venvDir}" ]; then
-          ${pkgs.python3}/bin/python3 -m venv ${toString ./.}/${venvDir}
+          ${python}/bin/python3 -m venv ${toString ./.}/${venvDir}
          source ${venvDir}/bin/activate
          export PIP_DISABLE_PIP_VERSION_CHECK=1
          pip install -r ${pythonPkgs} >&2