Create a really good async cache

This commit is contained in:
Zoe Roux 2024-01-05 14:45:52 +01:00
parent b552ca7c51
commit 5f787bedfe
3 changed files with 124 additions and 4 deletions

View File

@ -1,4 +1,4 @@
FROM python:3.10
FROM python:3.12
WORKDIR /app
COPY ./requirements.txt .

119
scanner/scanner/cache.py Normal file
View File

@ -0,0 +1,119 @@
import asyncio
from datetime import datetime, timedelta
from functools import wraps
from typing import Any, Optional, Tuple
from providers.utils import ProviderError
type Cache = dict[Any, Tuple[Optional[asyncio.Event], Optional[datetime], Any]]
def cache(ttl: timedelta, cache: Cache={}, typed=False):
"""
A cache decorator for async methods. If the same method is called twice with
the same args, the underlying method will only be called once and the first
result will be cached.
Args:
typed: same as functools.lru_cache
ttl: how many time should the cached value be considered valid?
"""
def wrap(f):
@wraps(f)
async def wrapper(*args, **kwargs):
key = make_key(args, kwargs, typed)
ret = cache.get(key, (None, None, None))
# First check if the same method is already running and wait for it.
if ret[0] is not None:
await ret[0].wait()
ret = cache.get(key, (None, None, None))
if ret[2] is None:
# ret[2] can be None if the cached method failed. if that is the case, run again.
return wrapper(*args, **kwargs)
return ret[2]
# Return the cached result if it exits and is not expired
if (
ret[2] is not None
and ret[1] is not None
and ret[1] - datetime.now() < ttl
):
return ret[2]
return await exec_as_cache(cache, key, lambda: f(*args, **kwargs))
return wrapper
return wrap
async def exec_as_cache(cache: Cache, key, f):
event = asyncio.Event()
cache[key] = (event, None, None)
try:
result = await f()
except:
del cache[key]
event.set()
raise
event.set()
cache[key] = (None, datetime.now(), result)
return result
# Code bellow was stolen from https://github.com/python/cpython/blob/3.12/Lib/functools.py#L432
class _HashedSeq(list):
"""This class guarantees that hash() will be called no more than once
per element. This is important because the lru_cache() will hash
the key multiple times on a cache miss.
"""
__slots__ = "hashvalue"
def __init__(self, tup, hash=hash):
self[:] = tup
self.hashvalue = hash(tup)
def __hash__(self):
return self.hashvalue
def make_key(
args,
kwds={},
typed=False,
kwd_mark=(object(),),
fasttypes={int, str},
tuple=tuple,
type=type,
len=len,
):
"""Make a cache key from optionally typed positional and keyword arguments
The key is constructed in a way that is flat as possible rather than
as a nested structure that would take more memory.
If there is only a single argument and its data type is known to cache
its hash value, then that argument is returned without a wrapper. This
saves space and improves lookup speed.
"""
# All of code below relies on kwds preserving the order input by the user.
# Formerly, we sorted() the kwds before looping. The new way is *much*
# faster; however, it means that f(x=1, y=2) will now be treated as a
# distinct call from f(y=2, x=1) which will be cached separately.
key = args
if kwds:
key += kwd_mark
for item in kwds.items():
key += item
if typed:
key += tuple(type(v) for v in args)
if kwds:
key += tuple(type(v) for v in kwds.values())
elif len(key) == 1 and type(key[0]) in fasttypes:
return key[0]
return _HashedSeq(key)

View File

@ -6,6 +6,7 @@
sdk_7_0
aspnetcore_7_0
];
python = pkgs.python312;
in
pkgs.mkShell {
packages = with pkgs; [
@ -14,8 +15,8 @@ in
nodePackages.eas-cli
nodePackages.expo-cli
dotnet
python3
python3Packages.pip
python
python312Packages.pip
cargo
cargo-watch
rustfmt
@ -37,7 +38,7 @@ in
# Install python modules
SOURCE_DATE_EPOCH=$(date +%s)
if [ ! -d "${venvDir}" ]; then
${pkgs.python3}/bin/python3 -m venv ${toString ./.}/${venvDir}
${python}/bin/python3 -m venv ${toString ./.}/${venvDir}
source ${venvDir}/bin/activate
export PIP_DISABLE_PIP_VERSION_CHECK=1
pip install -r ${pythonPkgs} >&2