Nicer fix for lru_cache() that does not require meta_path trickery

2025-07-09 03:04:10 -04:00 · 2019-04-13 10:01:46 +05:30 · 2019-04-13 10:01:46 +05:30 · b177f0a109
commit b177f0a109
parent b968cbdb5d
5 changed files with 203 additions and 195 deletions
--- a/src/backports/init.py
+++ b/src/backports/init.py
--- a/src/backports/functools_lru_cache.py
+++ b/src/backports/functools_lru_cache.py
@ -0,0 +1,199 @@
 #!/usr/bin/env python2
 # vim:fileencoding=utf-8
 # License: GPL v3 Copyright: 2019, Kovid Goyal <kovid at kovidgoyal.net>
 from __future__ import absolute_import, division, print_function, unicode_literals
 import functools
 from collections import namedtuple
 from threading import RLock
 _CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
@functools.wraps(functools.update_wrapper)
 def update_wrapper(
    wrapper,
    wrapped,
    assigned=functools.WRAPPER_ASSIGNMENTS,
    updated=functools.WRAPPER_UPDATES
 ):
    """
    Patch two bugs in functools.update_wrapper.
    """
    # workaround for http://bugs.python.org/issue3445
    assigned = tuple(attr for attr in assigned if hasattr(wrapped, attr))
    wrapper = functools.update_wrapper(wrapper, wrapped, assigned, updated)
    # workaround for https://bugs.python.org/issue17482
    wrapper.__wrapped__ = wrapped
    return wrapper
 class _HashedSeq(list):
    __slots__ = 'hashvalue'
    def __init__(self, tup, hash=hash):
        self[:] = tup
        self.hashvalue = hash(tup)
    def __hash__(self):
        return self.hashvalue
 def _make_key(
    args,
    kwds,
    typed,
    kwd_mark=(object(), ),
    fasttypes=set([int, str, frozenset, type(None)]),
    sorted=sorted,
    tuple=tuple,
    type=type,
    len=len
 ):
    'Make a cache key from optionally typed positional and keyword arguments'
    key = args
    if kwds:
        sorted_items = sorted(kwds.items())
        key += kwd_mark
        for item in sorted_items:
            key += item
    if typed:
        key += tuple(type(v) for v in args)
        if kwds:
            key += tuple(type(v) for k, v in sorted_items)
    elif len(key) == 1 and type(key[0]) in fasttypes:
        return key[0]
    return _HashedSeq(key)
 def lru_cache(maxsize=100, typed=False):
    """Least-recently-used cache decorator.
    If *maxsize* is set to None, the LRU features are disabled and the cache
    can grow without bound.
    If *typed* is True, arguments of different types will be cached separately.
    For example, f(3.0) and f(3) will be treated as distinct calls with
    distinct results.
    Arguments to the cached function must be hashable.
    View the cache statistics named tuple (hits, misses, maxsize, currsize) with
    f.cache_info().  Clear the cache and statistics with f.cache_clear().
    Access the underlying function with f.__wrapped__.
    See:  http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
    """
    # Users should only access the lru_cache through its public API:
    #       cache_info, cache_clear, and f.__wrapped__
    # The internals of the lru_cache are encapsulated for thread safety and
    # to allow the implementation to change (including a possible C version).
    def decorating_function(user_function):
        cache = dict()
        stats = [0, 0]  # make statistics updateable non-locally
        HITS, MISSES = 0, 1  # names for the stats fields
        make_key = _make_key
        cache_get = cache.get  # bound method to lookup key or return None
        _len = len  # localize the global len() function
        lock = RLock()  # because linkedlist updates aren't threadsafe
        root = []  # root of the circular doubly linked list
        root[:] = [root, root, None, None]  # initialize by pointing to self
        nonlocal_root = [root]  # make updateable non-locally
        PREV, NEXT, KEY, RESULT = 0, 1, 2, 3  # names for the link fields
        if maxsize == 0:
            def wrapper(*args, **kwds):
                # no caching, just do a statistics update after a successful call
                result = user_function(*args, **kwds)
                stats[MISSES] += 1
                return result
        elif maxsize is None:
            def wrapper(*args, **kwds):
                # simple caching without ordering or size limit
                key = make_key(args, kwds, typed)
                result = cache_get(
                    key, root
                )  # root used here as a unique not-found sentinel
                if result is not root:
                    stats[HITS] += 1
                    return result
                result = user_function(*args, **kwds)
                cache[key] = result
                stats[MISSES] += 1
                return result
        else:
            def wrapper(*args, **kwds):
                # size limited caching that tracks accesses by recency
                key = make_key(args, kwds, typed) if kwds or typed else args
                with lock:
                    link = cache_get(key)
                    if link is not None:
                        # record recent use of the key by moving it to the front of the list
                        root, = nonlocal_root
                        link_prev, link_next, key, result = link
                        link_prev[NEXT] = link_next
                        link_next[PREV] = link_prev
                        last = root[PREV]
                        last[NEXT] = root[PREV] = link
                        link[PREV] = last
                        link[NEXT] = root
                        stats[HITS] += 1
                        return result
                result = user_function(*args, **kwds)
                with lock:
                    root, = nonlocal_root
                    if key in cache:
                        # getting here means that this same key was added to the
                        # cache while the lock was released.  since the link
                        # update is already done, we need only return the
                        # computed result and update the count of misses.
                        pass
                    elif _len(cache) >= maxsize:
                        # use the old root to store the new key and result
                        oldroot = root
                        oldroot[KEY] = key
                        oldroot[RESULT] = result
                        # empty the oldest link and make it the new root
                        root = nonlocal_root[0] = oldroot[NEXT]
                        oldkey = root[KEY]
                        root[KEY] = root[RESULT] = None
                        # now update the cache dictionary for the new links
                        del cache[oldkey]
                        cache[key] = oldroot
                    else:
                        # put result in a new link at the front of the list
                        last = root[PREV]
                        link = [last, root, key, result]
                        last[NEXT] = root[PREV] = cache[key] = link
                    stats[MISSES] += 1
                return result
        def cache_info():
            """Report cache statistics"""
            with lock:
                return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache))
        def cache_clear():
            """Clear the cache and cache statistics"""
            with lock:
                cache.clear()
                root = nonlocal_root[0]
                root[:] = [root, root, None, None]
                stats[:] = [0, 0]
        wrapper.__wrapped__ = user_function
        wrapper.cache_info = cache_info
        wrapper.cache_clear = cache_clear
        return update_wrapper(wrapper, user_function)
    return decorating_function
--- a/src/calibre/startup.py
+++ b/src/calibre/startup.py
@ -56,14 +56,12 @@ if not _run_once:
        else:
            def find_module(self, fullname, path=None):
-                if fullname in ('calibre.web.feeds.feedparser', 'backports.functools_lru_cache') or fullname.startswith('calibre.ebooks.markdown'):
+                if fullname == 'calibre.web.feeds.feedparser' or fullname.startswith('calibre.ebooks.markdown'):
                    return self
            def load_module(self, fullname):
                if fullname == 'calibre.web.feeds.feedparser':
                    return import_module('feedparser')
                if fullname == 'backports.functools_lru_cache':
                    return import_module('polyglot.functools')
                return import_module(fullname[len('calibre.ebooks.'):])
    sys.meta_path.insert(0, DeVendor())
--- a/src/calibre/test_build.py
+++ b/src/calibre/test_build.py
@ -74,9 +74,8 @@ class BuildTest(unittest.TestCase):
        parse('<p>xxx')
    def test_soupsieve(self):
        import calibre.startup as s
        import soupsieve
-        del soupsieve, s
+        del soupsieve
    def test_plugins(self):
        exclusions = set()
--- a/src/polyglot/functools.py
+++ b/src/polyglot/functools.py
@ -8,194 +8,6 @@ from polyglot.builtins import is_py3
 if is_py3:
    from functools import lru_cache
 else:
-    import functools
+    from backports.functools_lru_cache import lru_cache
    from collections import namedtuple
    from threading import RLock
-    _CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
+lru_cache
    @functools.wraps(functools.update_wrapper)
    def update_wrapper(
        wrapper,
        wrapped,
        assigned=functools.WRAPPER_ASSIGNMENTS,
        updated=functools.WRAPPER_UPDATES
    ):
        """
        Patch two bugs in functools.update_wrapper.
        """
        # workaround for http://bugs.python.org/issue3445
        assigned = tuple(attr for attr in assigned if hasattr(wrapped, attr))
        wrapper = functools.update_wrapper(wrapper, wrapped, assigned, updated)
        # workaround for https://bugs.python.org/issue17482
        wrapper.__wrapped__ = wrapped
        return wrapper
    class _HashedSeq(list):
        __slots__ = 'hashvalue'
        def __init__(self, tup, hash=hash):
            self[:] = tup
            self.hashvalue = hash(tup)
        def __hash__(self):
            return self.hashvalue
    def _make_key(
        args,
        kwds,
        typed,
        kwd_mark=(object(), ),
        fasttypes=set([int, str, frozenset, type(None)]),
        sorted=sorted,
        tuple=tuple,
        type=type,
        len=len
    ):
        'Make a cache key from optionally typed positional and keyword arguments'
        key = args
        if kwds:
            sorted_items = sorted(kwds.items())
            key += kwd_mark
            for item in sorted_items:
                key += item
        if typed:
            key += tuple(type(v) for v in args)
            if kwds:
                key += tuple(type(v) for k, v in sorted_items)
        elif len(key) == 1 and type(key[0]) in fasttypes:
            return key[0]
        return _HashedSeq(key)
    def lru_cache(maxsize=100, typed=False):
        """Least-recently-used cache decorator.
        If *maxsize* is set to None, the LRU features are disabled and the cache
        can grow without bound.
        If *typed* is True, arguments of different types will be cached separately.
        For example, f(3.0) and f(3) will be treated as distinct calls with
        distinct results.
        Arguments to the cached function must be hashable.
        View the cache statistics named tuple (hits, misses, maxsize, currsize) with
        f.cache_info().  Clear the cache and statistics with f.cache_clear().
        Access the underlying function with f.__wrapped__.
        See:  http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
        """
        # Users should only access the lru_cache through its public API:
        #       cache_info, cache_clear, and f.__wrapped__
        # The internals of the lru_cache are encapsulated for thread safety and
        # to allow the implementation to change (including a possible C version).
        def decorating_function(user_function):
            cache = dict()
            stats = [0, 0]  # make statistics updateable non-locally
            HITS, MISSES = 0, 1  # names for the stats fields
            make_key = _make_key
            cache_get = cache.get  # bound method to lookup key or return None
            _len = len  # localize the global len() function
            lock = RLock()  # because linkedlist updates aren't threadsafe
            root = []  # root of the circular doubly linked list
            root[:] = [root, root, None, None]  # initialize by pointing to self
            nonlocal_root = [root]  # make updateable non-locally
            PREV, NEXT, KEY, RESULT = 0, 1, 2, 3  # names for the link fields
            if maxsize == 0:
                def wrapper(*args, **kwds):
                    # no caching, just do a statistics update after a successful call
                    result = user_function(*args, **kwds)
                    stats[MISSES] += 1
                    return result
            elif maxsize is None:
                def wrapper(*args, **kwds):
                    # simple caching without ordering or size limit
                    key = make_key(args, kwds, typed)
                    result = cache_get(
                        key, root
                    )  # root used here as a unique not-found sentinel
                    if result is not root:
                        stats[HITS] += 1
                        return result
                    result = user_function(*args, **kwds)
                    cache[key] = result
                    stats[MISSES] += 1
                    return result
            else:
                def wrapper(*args, **kwds):
                    # size limited caching that tracks accesses by recency
                    key = make_key(args, kwds, typed) if kwds or typed else args
                    with lock:
                        link = cache_get(key)
                        if link is not None:
                            # record recent use of the key by moving it to the front of the list
                            root, = nonlocal_root
                            link_prev, link_next, key, result = link
                            link_prev[NEXT] = link_next
                            link_next[PREV] = link_prev
                            last = root[PREV]
                            last[NEXT] = root[PREV] = link
                            link[PREV] = last
                            link[NEXT] = root
                            stats[HITS] += 1
                            return result
                    result = user_function(*args, **kwds)
                    with lock:
                        root, = nonlocal_root
                        if key in cache:
                            # getting here means that this same key was added to the
                            # cache while the lock was released.  since the link
                            # update is already done, we need only return the
                            # computed result and update the count of misses.
                            pass
                        elif _len(cache) >= maxsize:
                            # use the old root to store the new key and result
                            oldroot = root
                            oldroot[KEY] = key
                            oldroot[RESULT] = result
                            # empty the oldest link and make it the new root
                            root = nonlocal_root[0] = oldroot[NEXT]
                            oldkey = root[KEY]
                            root[KEY] = root[RESULT] = None
                            # now update the cache dictionary for the new links
                            del cache[oldkey]
                            cache[key] = oldroot
                        else:
                            # put result in a new link at the front of the list
                            last = root[PREV]
                            link = [last, root, key, result]
                            last[NEXT] = root[PREV] = cache[key] = link
                        stats[MISSES] += 1
                    return result
            def cache_info():
                """Report cache statistics"""
                with lock:
                    return _CacheInfo(
                        stats[HITS], stats[MISSES], maxsize, len(cache)
                    )
            def cache_clear():
                """Clear the cache and cache statistics"""
                with lock:
                    cache.clear()
                    root = nonlocal_root[0]
                    root[:] = [root, root, None, None]
                    stats[:] = [0, 0]
            wrapper.__wrapped__ = user_function
            wrapper.cache_info = cache_info
            wrapper.cache_clear = cache_clear
            return update_wrapper(wrapper, user_function)
        return decorating_function