From b177f0a1096b4fdabd8772dd9edc66662a69e683 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 13 Apr 2019 10:01:46 +0530 Subject: [PATCH] Nicer fix for lru_cache() that does not require meta_path trickery --- src/backports/__init__.py | 0 src/backports/functools_lru_cache.py | 199 +++++++++++++++++++++++++++ src/calibre/startup.py | 4 +- src/calibre/test_build.py | 3 +- src/polyglot/functools.py | 192 +------------------------- 5 files changed, 203 insertions(+), 195 deletions(-) create mode 100644 src/backports/__init__.py create mode 100644 src/backports/functools_lru_cache.py diff --git a/src/backports/__init__.py b/src/backports/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/backports/functools_lru_cache.py b/src/backports/functools_lru_cache.py new file mode 100644 index 0000000000..dc9ccb3866 --- /dev/null +++ b/src/backports/functools_lru_cache.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPL v3 Copyright: 2019, Kovid Goyal + +from __future__ import absolute_import, division, print_function, unicode_literals + +import functools +from collections import namedtuple +from threading import RLock + +_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"]) + + +@functools.wraps(functools.update_wrapper) +def update_wrapper( + wrapper, + wrapped, + assigned=functools.WRAPPER_ASSIGNMENTS, + updated=functools.WRAPPER_UPDATES +): + """ + Patch two bugs in functools.update_wrapper. + """ + # workaround for http://bugs.python.org/issue3445 + assigned = tuple(attr for attr in assigned if hasattr(wrapped, attr)) + wrapper = functools.update_wrapper(wrapper, wrapped, assigned, updated) + # workaround for https://bugs.python.org/issue17482 + wrapper.__wrapped__ = wrapped + return wrapper + + +class _HashedSeq(list): + __slots__ = 'hashvalue' + + def __init__(self, tup, hash=hash): + self[:] = tup + self.hashvalue = hash(tup) + + def __hash__(self): + return self.hashvalue + + +def _make_key( + args, + kwds, + typed, + kwd_mark=(object(), ), + fasttypes=set([int, str, frozenset, type(None)]), + sorted=sorted, + tuple=tuple, + type=type, + len=len +): + 'Make a cache key from optionally typed positional and keyword arguments' + key = args + if kwds: + sorted_items = sorted(kwds.items()) + key += kwd_mark + for item in sorted_items: + key += item + if typed: + key += tuple(type(v) for v in args) + if kwds: + key += tuple(type(v) for k, v in sorted_items) + elif len(key) == 1 and type(key[0]) in fasttypes: + return key[0] + return _HashedSeq(key) + + +def lru_cache(maxsize=100, typed=False): + """Least-recently-used cache decorator. + + If *maxsize* is set to None, the LRU features are disabled and the cache + can grow without bound. + + If *typed* is True, arguments of different types will be cached separately. + For example, f(3.0) and f(3) will be treated as distinct calls with + distinct results. + + Arguments to the cached function must be hashable. + + View the cache statistics named tuple (hits, misses, maxsize, currsize) with + f.cache_info(). Clear the cache and statistics with f.cache_clear(). + Access the underlying function with f.__wrapped__. + + See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used + + """ + + # Users should only access the lru_cache through its public API: + # cache_info, cache_clear, and f.__wrapped__ + # The internals of the lru_cache are encapsulated for thread safety and + # to allow the implementation to change (including a possible C version). + + def decorating_function(user_function): + + cache = dict() + stats = [0, 0] # make statistics updateable non-locally + HITS, MISSES = 0, 1 # names for the stats fields + make_key = _make_key + cache_get = cache.get # bound method to lookup key or return None + _len = len # localize the global len() function + lock = RLock() # because linkedlist updates aren't threadsafe + root = [] # root of the circular doubly linked list + root[:] = [root, root, None, None] # initialize by pointing to self + nonlocal_root = [root] # make updateable non-locally + PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields + + if maxsize == 0: + + def wrapper(*args, **kwds): + # no caching, just do a statistics update after a successful call + result = user_function(*args, **kwds) + stats[MISSES] += 1 + return result + + elif maxsize is None: + + def wrapper(*args, **kwds): + # simple caching without ordering or size limit + key = make_key(args, kwds, typed) + result = cache_get( + key, root + ) # root used here as a unique not-found sentinel + if result is not root: + stats[HITS] += 1 + return result + result = user_function(*args, **kwds) + cache[key] = result + stats[MISSES] += 1 + return result + + else: + + def wrapper(*args, **kwds): + # size limited caching that tracks accesses by recency + key = make_key(args, kwds, typed) if kwds or typed else args + with lock: + link = cache_get(key) + if link is not None: + # record recent use of the key by moving it to the front of the list + root, = nonlocal_root + link_prev, link_next, key, result = link + link_prev[NEXT] = link_next + link_next[PREV] = link_prev + last = root[PREV] + last[NEXT] = root[PREV] = link + link[PREV] = last + link[NEXT] = root + stats[HITS] += 1 + return result + result = user_function(*args, **kwds) + with lock: + root, = nonlocal_root + if key in cache: + # getting here means that this same key was added to the + # cache while the lock was released. since the link + # update is already done, we need only return the + # computed result and update the count of misses. + pass + elif _len(cache) >= maxsize: + # use the old root to store the new key and result + oldroot = root + oldroot[KEY] = key + oldroot[RESULT] = result + # empty the oldest link and make it the new root + root = nonlocal_root[0] = oldroot[NEXT] + oldkey = root[KEY] + root[KEY] = root[RESULT] = None + # now update the cache dictionary for the new links + del cache[oldkey] + cache[key] = oldroot + else: + # put result in a new link at the front of the list + last = root[PREV] + link = [last, root, key, result] + last[NEXT] = root[PREV] = cache[key] = link + stats[MISSES] += 1 + return result + + def cache_info(): + """Report cache statistics""" + with lock: + return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache)) + + def cache_clear(): + """Clear the cache and cache statistics""" + with lock: + cache.clear() + root = nonlocal_root[0] + root[:] = [root, root, None, None] + stats[:] = [0, 0] + + wrapper.__wrapped__ = user_function + wrapper.cache_info = cache_info + wrapper.cache_clear = cache_clear + return update_wrapper(wrapper, user_function) + + return decorating_function diff --git a/src/calibre/startup.py b/src/calibre/startup.py index c4d9b1c759..02ef35deee 100644 --- a/src/calibre/startup.py +++ b/src/calibre/startup.py @@ -56,14 +56,12 @@ if not _run_once: else: def find_module(self, fullname, path=None): - if fullname in ('calibre.web.feeds.feedparser', 'backports.functools_lru_cache') or fullname.startswith('calibre.ebooks.markdown'): + if fullname == 'calibre.web.feeds.feedparser' or fullname.startswith('calibre.ebooks.markdown'): return self def load_module(self, fullname): if fullname == 'calibre.web.feeds.feedparser': return import_module('feedparser') - if fullname == 'backports.functools_lru_cache': - return import_module('polyglot.functools') return import_module(fullname[len('calibre.ebooks.'):]) sys.meta_path.insert(0, DeVendor()) diff --git a/src/calibre/test_build.py b/src/calibre/test_build.py index e7d4950b32..0ba6e07b9a 100644 --- a/src/calibre/test_build.py +++ b/src/calibre/test_build.py @@ -74,9 +74,8 @@ class BuildTest(unittest.TestCase): parse('

xxx') def test_soupsieve(self): - import calibre.startup as s import soupsieve - del soupsieve, s + del soupsieve def test_plugins(self): exclusions = set() diff --git a/src/polyglot/functools.py b/src/polyglot/functools.py index 0c1bb593d8..f0cdf08ca7 100644 --- a/src/polyglot/functools.py +++ b/src/polyglot/functools.py @@ -8,194 +8,6 @@ from polyglot.builtins import is_py3 if is_py3: from functools import lru_cache else: - import functools - from collections import namedtuple - from threading import RLock + from backports.functools_lru_cache import lru_cache - _CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"]) - - @functools.wraps(functools.update_wrapper) - def update_wrapper( - wrapper, - wrapped, - assigned=functools.WRAPPER_ASSIGNMENTS, - updated=functools.WRAPPER_UPDATES - ): - """ - Patch two bugs in functools.update_wrapper. - """ - # workaround for http://bugs.python.org/issue3445 - assigned = tuple(attr for attr in assigned if hasattr(wrapped, attr)) - wrapper = functools.update_wrapper(wrapper, wrapped, assigned, updated) - # workaround for https://bugs.python.org/issue17482 - wrapper.__wrapped__ = wrapped - return wrapper - - class _HashedSeq(list): - __slots__ = 'hashvalue' - - def __init__(self, tup, hash=hash): - self[:] = tup - self.hashvalue = hash(tup) - - def __hash__(self): - return self.hashvalue - - def _make_key( - args, - kwds, - typed, - kwd_mark=(object(), ), - fasttypes=set([int, str, frozenset, type(None)]), - sorted=sorted, - tuple=tuple, - type=type, - len=len - ): - 'Make a cache key from optionally typed positional and keyword arguments' - key = args - if kwds: - sorted_items = sorted(kwds.items()) - key += kwd_mark - for item in sorted_items: - key += item - if typed: - key += tuple(type(v) for v in args) - if kwds: - key += tuple(type(v) for k, v in sorted_items) - elif len(key) == 1 and type(key[0]) in fasttypes: - return key[0] - return _HashedSeq(key) - - def lru_cache(maxsize=100, typed=False): - """Least-recently-used cache decorator. - - If *maxsize* is set to None, the LRU features are disabled and the cache - can grow without bound. - - If *typed* is True, arguments of different types will be cached separately. - For example, f(3.0) and f(3) will be treated as distinct calls with - distinct results. - - Arguments to the cached function must be hashable. - - View the cache statistics named tuple (hits, misses, maxsize, currsize) with - f.cache_info(). Clear the cache and statistics with f.cache_clear(). - Access the underlying function with f.__wrapped__. - - See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used - - """ - - # Users should only access the lru_cache through its public API: - # cache_info, cache_clear, and f.__wrapped__ - # The internals of the lru_cache are encapsulated for thread safety and - # to allow the implementation to change (including a possible C version). - - def decorating_function(user_function): - - cache = dict() - stats = [0, 0] # make statistics updateable non-locally - HITS, MISSES = 0, 1 # names for the stats fields - make_key = _make_key - cache_get = cache.get # bound method to lookup key or return None - _len = len # localize the global len() function - lock = RLock() # because linkedlist updates aren't threadsafe - root = [] # root of the circular doubly linked list - root[:] = [root, root, None, None] # initialize by pointing to self - nonlocal_root = [root] # make updateable non-locally - PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields - - if maxsize == 0: - - def wrapper(*args, **kwds): - # no caching, just do a statistics update after a successful call - result = user_function(*args, **kwds) - stats[MISSES] += 1 - return result - - elif maxsize is None: - - def wrapper(*args, **kwds): - # simple caching without ordering or size limit - key = make_key(args, kwds, typed) - result = cache_get( - key, root - ) # root used here as a unique not-found sentinel - if result is not root: - stats[HITS] += 1 - return result - result = user_function(*args, **kwds) - cache[key] = result - stats[MISSES] += 1 - return result - - else: - - def wrapper(*args, **kwds): - # size limited caching that tracks accesses by recency - key = make_key(args, kwds, typed) if kwds or typed else args - with lock: - link = cache_get(key) - if link is not None: - # record recent use of the key by moving it to the front of the list - root, = nonlocal_root - link_prev, link_next, key, result = link - link_prev[NEXT] = link_next - link_next[PREV] = link_prev - last = root[PREV] - last[NEXT] = root[PREV] = link - link[PREV] = last - link[NEXT] = root - stats[HITS] += 1 - return result - result = user_function(*args, **kwds) - with lock: - root, = nonlocal_root - if key in cache: - # getting here means that this same key was added to the - # cache while the lock was released. since the link - # update is already done, we need only return the - # computed result and update the count of misses. - pass - elif _len(cache) >= maxsize: - # use the old root to store the new key and result - oldroot = root - oldroot[KEY] = key - oldroot[RESULT] = result - # empty the oldest link and make it the new root - root = nonlocal_root[0] = oldroot[NEXT] - oldkey = root[KEY] - root[KEY] = root[RESULT] = None - # now update the cache dictionary for the new links - del cache[oldkey] - cache[key] = oldroot - else: - # put result in a new link at the front of the list - last = root[PREV] - link = [last, root, key, result] - last[NEXT] = root[PREV] = cache[key] = link - stats[MISSES] += 1 - return result - - def cache_info(): - """Report cache statistics""" - with lock: - return _CacheInfo( - stats[HITS], stats[MISSES], maxsize, len(cache) - ) - - def cache_clear(): - """Clear the cache and cache statistics""" - with lock: - cache.clear() - root = nonlocal_root[0] - root[:] = [root, root, None, None] - stats[:] = [0, 0] - - wrapper.__wrapped__ = user_function - wrapper.cache_info = cache_info - wrapper.cache_clear = cache_clear - return update_wrapper(wrapper, user_function) - - return decorating_function +lru_cache