Search caching for the new backed with a simple LRU cache

This commit is contained in:
Kovid Goyal 2013-07-23 13:40:35 +05:30
parent ef48091c23
commit bfa4c67dc9
3 changed files with 166 additions and 23 deletions

View File

@ -150,12 +150,22 @@ class Cache(object):
field.clear_caches(book_ids=book_ids) field.clear_caches(book_ids=book_ids)
@write_api @write_api
def clear_caches(self, book_ids=None): def clear_search_caches(self):
self._initialize_template_cache() # Clear the formatter template cache self._search_api.clear_caches()
@write_api
def clear_caches(self, book_ids=None, template_cache=True):
if template_cache:
self._initialize_template_cache() # Clear the formatter template cache
for field in self.fields.itervalues(): for field in self.fields.itervalues():
if hasattr(field, 'clear_caches'): if hasattr(field, 'clear_caches'):
field.clear_caches(book_ids=book_ids) # Clear the composite cache and ondevice caches field.clear_caches(book_ids=book_ids) # Clear the composite cache and ondevice caches
self.format_metadata_cache.clear() if book_ids:
for book_id in book_ids:
self.format_metadata_cache.pop(book_id, None)
else:
self.format_metadata_cache.clear()
self._clear_search_caches()
@write_api @write_api
def reload_from_db(self, clear_caches=True): def reload_from_db(self, clear_caches=True):
@ -497,6 +507,8 @@ class Cache(object):
@write_api @write_api
def set_pref(self, name, val): def set_pref(self, name, val):
self.backend.prefs.set(name, val) self.backend.prefs.set(name, val)
if name == 'grouped_search_terms':
self._clear_search_caches()
@api @api
def get_metadata(self, book_id, def get_metadata(self, book_id,
@ -812,6 +824,7 @@ class Cache(object):
f.writer.set_books({book_id:now for book_id in book_ids}, self.backend) f.writer.set_books({book_id:now for book_id in book_ids}, self.backend)
if self.composites: if self.composites:
self._clear_composite_caches(book_ids) self._clear_composite_caches(book_ids)
self._clear_search_caches()
@write_api @write_api
def mark_as_dirty(self, book_ids): def mark_as_dirty(self, book_ids):
@ -1286,6 +1299,7 @@ class Cache(object):
continue # Some fields like ondevice do not have tables continue # Some fields like ondevice do not have tables
else: else:
table.remove_books(book_ids, self.backend) table.remove_books(book_ids, self.backend)
self._clear_caches(book_ids=book_ids, template_cache=False)
@read_api @read_api
def author_sort_strings_for_books(self, book_ids): def author_sort_strings_for_books(self, book_ids):
@ -1563,10 +1577,12 @@ class Cache(object):
@write_api @write_api
def saved_search_set_all(self, smap): def saved_search_set_all(self, smap):
self._search_api.saved_searches.set_all(smap) self._search_api.saved_searches.set_all(smap)
self._clear_search_caches()
@write_api @write_api
def saved_search_delete(self, name): def saved_search_delete(self, name):
self._search_api.saved_searches.delete(name) self._search_api.saved_searches.delete(name)
self._clear_search_caches()
@write_api @write_api
def saved_search_add(self, name, val): def saved_search_add(self, name, val):
@ -1575,6 +1591,7 @@ class Cache(object):
@write_api @write_api
def saved_search_rename(self, old_name, new_name): def saved_search_rename(self, old_name, new_name):
self._search_api.saved_searches.rename(old_name, new_name) self._search_api.saved_searches.rename(old_name, new_name)
self._clear_search_caches()
@write_api @write_api
def change_search_locations(self, newlocs): def change_search_locations(self, newlocs):

View File

@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
import re, weakref import re, weakref
from functools import partial from functools import partial
from datetime import timedelta from datetime import timedelta
from collections import deque
from calibre.constants import preferred_encoding from calibre.constants import preferred_encoding
from calibre.utils.config_base import prefs from calibre.utils.config_base import prefs
@ -711,6 +712,47 @@ class Parser(SearchQueryParser):
return candidates - matches return candidates - matches
return matches return matches
class LRUCache(object):
'A simple Least-Recently-Used cache'
def __init__(self, limit=30):
self.item_map = {}
self.age_map = deque()
self.limit = limit
def _move_up(self, key):
if key != self.age_map[-1]:
self.age_map.remove(key)
self.age_map.append(key)
def add(self, key, val):
if key in self.item_map:
self._move_up(key)
return
if len(self.age_map) >= self.limit:
self.item_map.pop(self.age_map.popleft())
self.item_map[key] = val
self.age_map.append(key)
def get(self, key, default=None):
ans = self.item_map.get(key, default)
if ans is not default:
self._move_up(key)
return ans
def clear(self):
self.item_map.clear()
self.age_map.clear()
def __contains__(self, key):
return key in self.item_map
def __len__(self):
return len(self.age_map)
class Search(object): class Search(object):
def __init__(self, db, opt_name, all_search_locations=()): def __init__(self, db, opt_name, all_search_locations=()):
@ -720,46 +762,73 @@ class Search(object):
self.bool_search = BooleanSearch() self.bool_search = BooleanSearch()
self.keypair_search = KeyPairSearch() self.keypair_search = KeyPairSearch()
self.saved_searches = SavedSearchQueries(db, opt_name) self.saved_searches = SavedSearchQueries(db, opt_name)
self.cache = LRUCache()
def get_saved_searches(self): def get_saved_searches(self):
return self.saved_searches return self.saved_searches
def change_locations(self, newlocs): def change_locations(self, newlocs):
if frozenset(newlocs) != frozenset(self.all_search_locations):
self.clear_caches()
self.all_search_locations = newlocs self.all_search_locations = newlocs
def clear_caches(self):
self.cache.clear()
def __call__(self, dbcache, query, search_restriction, virtual_fields=None, book_ids=None): def __call__(self, dbcache, query, search_restriction, virtual_fields=None, book_ids=None):
''' '''
Return the set of ids of all records that match the specified Return the set of ids of all records that match the specified
query and restriction query and restriction
''' '''
q = ''
if not query or not query.strip():
q = search_restriction
else:
q = query
if search_restriction:
q = u'(%s) and (%s)' % (search_restriction, query)
all_book_ids = dbcache._all_book_ids(type=set) if book_ids is None else set(book_ids)
if not q:
return all_book_ids
if not isinstance(q, type(u'')):
q = q.decode('utf-8')
# We construct a new parser instance per search as the parse is not # We construct a new parser instance per search as the parse is not
# thread safe. # thread safe.
sqp = Parser( sqp = Parser(
dbcache, all_book_ids, dbcache._pref('grouped_search_terms'), dbcache, set(), dbcache._pref('grouped_search_terms'),
self.date_search, self.num_search, self.bool_search, self.date_search, self.num_search, self.bool_search,
self.keypair_search, self.keypair_search,
prefs['limit_search_columns'], prefs['limit_search_columns'],
prefs['limit_search_columns_to'], self.all_search_locations, prefs['limit_search_columns_to'], self.all_search_locations,
virtual_fields, self.saved_searches.lookup) virtual_fields, self.saved_searches.lookup)
try: try:
ret = sqp.parse(q) return self._do_search(sqp, query, search_restriction, dbcache, book_ids=book_ids)
finally: finally:
sqp.dbcache = sqp.lookup_saved_search = None sqp.dbcache = sqp.lookup_saved_search = None
return ret
def _do_search(self, sqp, query, search_restriction, dbcache, book_ids=None):
if isinstance(search_restriction, bytes):
search_restriction = search_restriction.decode('utf-8')
restricted_ids = all_book_ids = dbcache._all_book_ids(type=set)
if search_restriction and search_restriction.strip():
cached = self.cache.get(search_restriction.strip())
if cached is None:
sqp.all_book_ids = all_book_ids if book_ids is None else book_ids
restricted_ids = sqp.parse(search_restriction)
if sqp.all_book_ids is all_book_ids:
self.cache.add(search_restriction.strip(), restricted_ids)
else:
restricted_ids = cached
if book_ids is not None:
restricted_ids = book_ids.intersection(restricted_ids)
elif book_ids is not None:
restricted_ids = book_ids
if isinstance(query, bytes):
query = query.decode('utf-8')
if not query or not query.strip():
return restricted_ids
if restricted_ids is all_book_ids:
cached = self.cache.get(query.strip())
if cached is not None:
return cached
sqp.all_book_ids = restricted_ids
result = sqp.parse(query)
if sqp.all_book_ids is all_book_ids:
self.cache.add(query.strip(), result)
return result

View File

@ -386,7 +386,7 @@ class ReadingTest(BaseTest):
db.close() db.close()
# }}} # }}}
def test_datetime(self): def test_datetime(self): # {{{
' Test the reading of datetimes stored in the db ' ' Test the reading of datetimes stored in the db '
from calibre.utils.date import parse_date from calibre.utils.date import parse_date
from calibre.db.tables import c_parse, UNDEFINED_DATE, _c_speedup from calibre.db.tables import c_parse, UNDEFINED_DATE, _c_speedup
@ -401,5 +401,62 @@ class ReadingTest(BaseTest):
self.assertEqual(c_parse(2003).year, 2003) self.assertEqual(c_parse(2003).year, 2003)
for x in (None, '', 'abc'): for x in (None, '', 'abc'):
self.assertEqual(UNDEFINED_DATE, c_parse(x)) self.assertEqual(UNDEFINED_DATE, c_parse(x))
# }}}
def test_restrictions(self): # {{{
' Test searching with and without restrictions '
cache = self.init_cache()
self.assertSetEqual(cache.all_book_ids(), cache.search(''))
self.assertSetEqual({1, 2}, cache.search('', 'not authors:=Unknown'))
self.assertSetEqual(set(), cache.search('authors:=Unknown', 'not authors:=Unknown'))
self.assertSetEqual({2}, cache.search('not authors:"=Author Two"', 'not authors:=Unknown'))
self.assertSetEqual({2}, cache.search('not authors:"=Author Two"', book_ids={1, 2}))
self.assertSetEqual({2}, cache.search('not authors:"=Author Two"', 'not authors:=Unknown', book_ids={1,2,3}))
self.assertSetEqual(set(), cache.search('authors:=Unknown', 'not authors:=Unknown', book_ids={1,2,3}))
# }}}
def test_search_caching(self): # {{{
' Test caching of searches '
from calibre.db.search import LRUCache
class TestCache(LRUCache):
hit_counter = 0
miss_counter = 0
def get(self, key, default=None):
ans = LRUCache.get(self, key, default=default)
if ans is not None:
self.hit_counter += 1
else:
self.miss_counter += 1
@property
def cc(self):
self.hit_counter = self.miss_counter = 0
@property
def counts(self):
return self.hit_counter, self.miss_counter
cache = self.init_cache()
cache._search_api.cache = c = TestCache()
ae, at = self.assertEqual, self.assertTrue
def test(hit, result, *args):
c.cc
ae(cache.search(*args), result)
ae(c.counts, (1, 0) if hit else (0, 1))
c.cc
test(False, {3}, 'Unknown')
test(True, {3}, 'Unknown')
test(True, {3}, 'Unknown')
cache.set_field('title', {3:'xxx'})
test(False, {3}, 'Unknown') # cache cleared
test(True, {3}, 'Unknown')
c.limit = 5
for i in range(6):
test(False, set(), 'nomatch_%s' % i)
test(False, {3}, 'Unknown') # cached search expired
test(False, {3}, '', 'unknown')
test(True, {3}, '', 'unknown')
test(True, {3}, 'Unknown', 'unknown')
# }}}