Search caching for the new backed with a simple LRU cache

This commit is contained in:
Kovid Goyal 2013-07-23 13:40:35 +05:30
parent ef48091c23
commit bfa4c67dc9
3 changed files with 166 additions and 23 deletions

View File

@ -150,12 +150,22 @@ class Cache(object):
field.clear_caches(book_ids=book_ids)
@write_api
def clear_caches(self, book_ids=None):
self._initialize_template_cache() # Clear the formatter template cache
def clear_search_caches(self):
self._search_api.clear_caches()
@write_api
def clear_caches(self, book_ids=None, template_cache=True):
if template_cache:
self._initialize_template_cache() # Clear the formatter template cache
for field in self.fields.itervalues():
if hasattr(field, 'clear_caches'):
field.clear_caches(book_ids=book_ids) # Clear the composite cache and ondevice caches
self.format_metadata_cache.clear()
if book_ids:
for book_id in book_ids:
self.format_metadata_cache.pop(book_id, None)
else:
self.format_metadata_cache.clear()
self._clear_search_caches()
@write_api
def reload_from_db(self, clear_caches=True):
@ -497,6 +507,8 @@ class Cache(object):
@write_api
def set_pref(self, name, val):
self.backend.prefs.set(name, val)
if name == 'grouped_search_terms':
self._clear_search_caches()
@api
def get_metadata(self, book_id,
@ -812,6 +824,7 @@ class Cache(object):
f.writer.set_books({book_id:now for book_id in book_ids}, self.backend)
if self.composites:
self._clear_composite_caches(book_ids)
self._clear_search_caches()
@write_api
def mark_as_dirty(self, book_ids):
@ -1286,6 +1299,7 @@ class Cache(object):
continue # Some fields like ondevice do not have tables
else:
table.remove_books(book_ids, self.backend)
self._clear_caches(book_ids=book_ids, template_cache=False)
@read_api
def author_sort_strings_for_books(self, book_ids):
@ -1563,10 +1577,12 @@ class Cache(object):
@write_api
def saved_search_set_all(self, smap):
self._search_api.saved_searches.set_all(smap)
self._clear_search_caches()
@write_api
def saved_search_delete(self, name):
self._search_api.saved_searches.delete(name)
self._clear_search_caches()
@write_api
def saved_search_add(self, name, val):
@ -1575,6 +1591,7 @@ class Cache(object):
@write_api
def saved_search_rename(self, old_name, new_name):
self._search_api.saved_searches.rename(old_name, new_name)
self._clear_search_caches()
@write_api
def change_search_locations(self, newlocs):

View File

@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
import re, weakref
from functools import partial
from datetime import timedelta
from collections import deque
from calibre.constants import preferred_encoding
from calibre.utils.config_base import prefs
@ -711,6 +712,47 @@ class Parser(SearchQueryParser):
return candidates - matches
return matches
class LRUCache(object):
'A simple Least-Recently-Used cache'
def __init__(self, limit=30):
self.item_map = {}
self.age_map = deque()
self.limit = limit
def _move_up(self, key):
if key != self.age_map[-1]:
self.age_map.remove(key)
self.age_map.append(key)
def add(self, key, val):
if key in self.item_map:
self._move_up(key)
return
if len(self.age_map) >= self.limit:
self.item_map.pop(self.age_map.popleft())
self.item_map[key] = val
self.age_map.append(key)
def get(self, key, default=None):
ans = self.item_map.get(key, default)
if ans is not default:
self._move_up(key)
return ans
def clear(self):
self.item_map.clear()
self.age_map.clear()
def __contains__(self, key):
return key in self.item_map
def __len__(self):
return len(self.age_map)
class Search(object):
def __init__(self, db, opt_name, all_search_locations=()):
@ -720,46 +762,73 @@ class Search(object):
self.bool_search = BooleanSearch()
self.keypair_search = KeyPairSearch()
self.saved_searches = SavedSearchQueries(db, opt_name)
self.cache = LRUCache()
def get_saved_searches(self):
return self.saved_searches
def change_locations(self, newlocs):
if frozenset(newlocs) != frozenset(self.all_search_locations):
self.clear_caches()
self.all_search_locations = newlocs
def clear_caches(self):
self.cache.clear()
def __call__(self, dbcache, query, search_restriction, virtual_fields=None, book_ids=None):
'''
Return the set of ids of all records that match the specified
query and restriction
'''
q = ''
if not query or not query.strip():
q = search_restriction
else:
q = query
if search_restriction:
q = u'(%s) and (%s)' % (search_restriction, query)
all_book_ids = dbcache._all_book_ids(type=set) if book_ids is None else set(book_ids)
if not q:
return all_book_ids
if not isinstance(q, type(u'')):
q = q.decode('utf-8')
# We construct a new parser instance per search as the parse is not
# thread safe.
sqp = Parser(
dbcache, all_book_ids, dbcache._pref('grouped_search_terms'),
dbcache, set(), dbcache._pref('grouped_search_terms'),
self.date_search, self.num_search, self.bool_search,
self.keypair_search,
prefs['limit_search_columns'],
prefs['limit_search_columns_to'], self.all_search_locations,
virtual_fields, self.saved_searches.lookup)
try:
ret = sqp.parse(q)
return self._do_search(sqp, query, search_restriction, dbcache, book_ids=book_ids)
finally:
sqp.dbcache = sqp.lookup_saved_search = None
return ret
def _do_search(self, sqp, query, search_restriction, dbcache, book_ids=None):
if isinstance(search_restriction, bytes):
search_restriction = search_restriction.decode('utf-8')
restricted_ids = all_book_ids = dbcache._all_book_ids(type=set)
if search_restriction and search_restriction.strip():
cached = self.cache.get(search_restriction.strip())
if cached is None:
sqp.all_book_ids = all_book_ids if book_ids is None else book_ids
restricted_ids = sqp.parse(search_restriction)
if sqp.all_book_ids is all_book_ids:
self.cache.add(search_restriction.strip(), restricted_ids)
else:
restricted_ids = cached
if book_ids is not None:
restricted_ids = book_ids.intersection(restricted_ids)
elif book_ids is not None:
restricted_ids = book_ids
if isinstance(query, bytes):
query = query.decode('utf-8')
if not query or not query.strip():
return restricted_ids
if restricted_ids is all_book_ids:
cached = self.cache.get(query.strip())
if cached is not None:
return cached
sqp.all_book_ids = restricted_ids
result = sqp.parse(query)
if sqp.all_book_ids is all_book_ids:
self.cache.add(query.strip(), result)
return result

View File

@ -386,7 +386,7 @@ class ReadingTest(BaseTest):
db.close()
# }}}
def test_datetime(self):
def test_datetime(self): # {{{
' Test the reading of datetimes stored in the db '
from calibre.utils.date import parse_date
from calibre.db.tables import c_parse, UNDEFINED_DATE, _c_speedup
@ -401,5 +401,62 @@ class ReadingTest(BaseTest):
self.assertEqual(c_parse(2003).year, 2003)
for x in (None, '', 'abc'):
self.assertEqual(UNDEFINED_DATE, c_parse(x))
# }}}
def test_restrictions(self): # {{{
' Test searching with and without restrictions '
cache = self.init_cache()
self.assertSetEqual(cache.all_book_ids(), cache.search(''))
self.assertSetEqual({1, 2}, cache.search('', 'not authors:=Unknown'))
self.assertSetEqual(set(), cache.search('authors:=Unknown', 'not authors:=Unknown'))
self.assertSetEqual({2}, cache.search('not authors:"=Author Two"', 'not authors:=Unknown'))
self.assertSetEqual({2}, cache.search('not authors:"=Author Two"', book_ids={1, 2}))
self.assertSetEqual({2}, cache.search('not authors:"=Author Two"', 'not authors:=Unknown', book_ids={1,2,3}))
self.assertSetEqual(set(), cache.search('authors:=Unknown', 'not authors:=Unknown', book_ids={1,2,3}))
# }}}
def test_search_caching(self): # {{{
' Test caching of searches '
from calibre.db.search import LRUCache
class TestCache(LRUCache):
hit_counter = 0
miss_counter = 0
def get(self, key, default=None):
ans = LRUCache.get(self, key, default=default)
if ans is not None:
self.hit_counter += 1
else:
self.miss_counter += 1
@property
def cc(self):
self.hit_counter = self.miss_counter = 0
@property
def counts(self):
return self.hit_counter, self.miss_counter
cache = self.init_cache()
cache._search_api.cache = c = TestCache()
ae, at = self.assertEqual, self.assertTrue
def test(hit, result, *args):
c.cc
ae(cache.search(*args), result)
ae(c.counts, (1, 0) if hit else (0, 1))
c.cc
test(False, {3}, 'Unknown')
test(True, {3}, 'Unknown')
test(True, {3}, 'Unknown')
cache.set_field('title', {3:'xxx'})
test(False, {3}, 'Unknown') # cache cleared
test(True, {3}, 'Unknown')
c.limit = 5
for i in range(6):
test(False, set(), 'nomatch_%s' % i)
test(False, {3}, 'Unknown') # cached search expired
test(False, {3}, '', 'unknown')
test(True, {3}, '', 'unknown')
test(True, {3}, 'Unknown', 'unknown')
# }}}