diff --git a/src/calibre/db/cache.py b/src/calibre/db/cache.py index 6cdb9419c6..52227a5b84 100644 --- a/src/calibre/db/cache.py +++ b/src/calibre/db/cache.py @@ -150,12 +150,22 @@ class Cache(object): field.clear_caches(book_ids=book_ids) @write_api - def clear_caches(self, book_ids=None): - self._initialize_template_cache() # Clear the formatter template cache + def clear_search_caches(self): + self._search_api.clear_caches() + + @write_api + def clear_caches(self, book_ids=None, template_cache=True): + if template_cache: + self._initialize_template_cache() # Clear the formatter template cache for field in self.fields.itervalues(): if hasattr(field, 'clear_caches'): field.clear_caches(book_ids=book_ids) # Clear the composite cache and ondevice caches - self.format_metadata_cache.clear() + if book_ids: + for book_id in book_ids: + self.format_metadata_cache.pop(book_id, None) + else: + self.format_metadata_cache.clear() + self._clear_search_caches() @write_api def reload_from_db(self, clear_caches=True): @@ -497,6 +507,8 @@ class Cache(object): @write_api def set_pref(self, name, val): self.backend.prefs.set(name, val) + if name == 'grouped_search_terms': + self._clear_search_caches() @api def get_metadata(self, book_id, @@ -812,6 +824,7 @@ class Cache(object): f.writer.set_books({book_id:now for book_id in book_ids}, self.backend) if self.composites: self._clear_composite_caches(book_ids) + self._clear_search_caches() @write_api def mark_as_dirty(self, book_ids): @@ -1286,6 +1299,7 @@ class Cache(object): continue # Some fields like ondevice do not have tables else: table.remove_books(book_ids, self.backend) + self._clear_caches(book_ids=book_ids, template_cache=False) @read_api def author_sort_strings_for_books(self, book_ids): @@ -1563,10 +1577,12 @@ class Cache(object): @write_api def saved_search_set_all(self, smap): self._search_api.saved_searches.set_all(smap) + self._clear_search_caches() @write_api def saved_search_delete(self, name): self._search_api.saved_searches.delete(name) + self._clear_search_caches() @write_api def saved_search_add(self, name, val): @@ -1575,6 +1591,7 @@ class Cache(object): @write_api def saved_search_rename(self, old_name, new_name): self._search_api.saved_searches.rename(old_name, new_name) + self._clear_search_caches() @write_api def change_search_locations(self, newlocs): diff --git a/src/calibre/db/search.py b/src/calibre/db/search.py index 98886b8fc9..07b3bde068 100644 --- a/src/calibre/db/search.py +++ b/src/calibre/db/search.py @@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en' import re, weakref from functools import partial from datetime import timedelta +from collections import deque from calibre.constants import preferred_encoding from calibre.utils.config_base import prefs @@ -711,6 +712,47 @@ class Parser(SearchQueryParser): return candidates - matches return matches +class LRUCache(object): + + 'A simple Least-Recently-Used cache' + + def __init__(self, limit=30): + self.item_map = {} + self.age_map = deque() + self.limit = limit + + def _move_up(self, key): + if key != self.age_map[-1]: + self.age_map.remove(key) + self.age_map.append(key) + + def add(self, key, val): + if key in self.item_map: + self._move_up(key) + return + + if len(self.age_map) >= self.limit: + self.item_map.pop(self.age_map.popleft()) + + self.item_map[key] = val + self.age_map.append(key) + + def get(self, key, default=None): + ans = self.item_map.get(key, default) + if ans is not default: + self._move_up(key) + return ans + + def clear(self): + self.item_map.clear() + self.age_map.clear() + + def __contains__(self, key): + return key in self.item_map + + def __len__(self): + return len(self.age_map) + class Search(object): def __init__(self, db, opt_name, all_search_locations=()): @@ -720,46 +762,73 @@ class Search(object): self.bool_search = BooleanSearch() self.keypair_search = KeyPairSearch() self.saved_searches = SavedSearchQueries(db, opt_name) + self.cache = LRUCache() def get_saved_searches(self): return self.saved_searches def change_locations(self, newlocs): + if frozenset(newlocs) != frozenset(self.all_search_locations): + self.clear_caches() self.all_search_locations = newlocs + def clear_caches(self): + self.cache.clear() + def __call__(self, dbcache, query, search_restriction, virtual_fields=None, book_ids=None): ''' Return the set of ids of all records that match the specified query and restriction ''' - q = '' - if not query or not query.strip(): - q = search_restriction - else: - q = query - if search_restriction: - q = u'(%s) and (%s)' % (search_restriction, query) - - all_book_ids = dbcache._all_book_ids(type=set) if book_ids is None else set(book_ids) - if not q: - return all_book_ids - - if not isinstance(q, type(u'')): - q = q.decode('utf-8') - # We construct a new parser instance per search as the parse is not # thread safe. sqp = Parser( - dbcache, all_book_ids, dbcache._pref('grouped_search_terms'), + dbcache, set(), dbcache._pref('grouped_search_terms'), self.date_search, self.num_search, self.bool_search, self.keypair_search, prefs['limit_search_columns'], prefs['limit_search_columns_to'], self.all_search_locations, virtual_fields, self.saved_searches.lookup) - try: - ret = sqp.parse(q) + return self._do_search(sqp, query, search_restriction, dbcache, book_ids=book_ids) finally: sqp.dbcache = sqp.lookup_saved_search = None - return ret + + def _do_search(self, sqp, query, search_restriction, dbcache, book_ids=None): + if isinstance(search_restriction, bytes): + search_restriction = search_restriction.decode('utf-8') + + restricted_ids = all_book_ids = dbcache._all_book_ids(type=set) + if search_restriction and search_restriction.strip(): + cached = self.cache.get(search_restriction.strip()) + if cached is None: + sqp.all_book_ids = all_book_ids if book_ids is None else book_ids + restricted_ids = sqp.parse(search_restriction) + if sqp.all_book_ids is all_book_ids: + self.cache.add(search_restriction.strip(), restricted_ids) + else: + restricted_ids = cached + if book_ids is not None: + restricted_ids = book_ids.intersection(restricted_ids) + elif book_ids is not None: + restricted_ids = book_ids + + if isinstance(query, bytes): + query = query.decode('utf-8') + + if not query or not query.strip(): + return restricted_ids + + if restricted_ids is all_book_ids: + cached = self.cache.get(query.strip()) + if cached is not None: + return cached + + sqp.all_book_ids = restricted_ids + result = sqp.parse(query) + + if sqp.all_book_ids is all_book_ids: + self.cache.add(query.strip(), result) + + return result diff --git a/src/calibre/db/tests/reading.py b/src/calibre/db/tests/reading.py index eea8cd2eaa..28c499e150 100644 --- a/src/calibre/db/tests/reading.py +++ b/src/calibre/db/tests/reading.py @@ -386,7 +386,7 @@ class ReadingTest(BaseTest): db.close() # }}} - def test_datetime(self): + def test_datetime(self): # {{{ ' Test the reading of datetimes stored in the db ' from calibre.utils.date import parse_date from calibre.db.tables import c_parse, UNDEFINED_DATE, _c_speedup @@ -401,5 +401,62 @@ class ReadingTest(BaseTest): self.assertEqual(c_parse(2003).year, 2003) for x in (None, '', 'abc'): self.assertEqual(UNDEFINED_DATE, c_parse(x)) + # }}} + def test_restrictions(self): # {{{ + ' Test searching with and without restrictions ' + cache = self.init_cache() + self.assertSetEqual(cache.all_book_ids(), cache.search('')) + self.assertSetEqual({1, 2}, cache.search('', 'not authors:=Unknown')) + self.assertSetEqual(set(), cache.search('authors:=Unknown', 'not authors:=Unknown')) + self.assertSetEqual({2}, cache.search('not authors:"=Author Two"', 'not authors:=Unknown')) + self.assertSetEqual({2}, cache.search('not authors:"=Author Two"', book_ids={1, 2})) + self.assertSetEqual({2}, cache.search('not authors:"=Author Two"', 'not authors:=Unknown', book_ids={1,2,3})) + self.assertSetEqual(set(), cache.search('authors:=Unknown', 'not authors:=Unknown', book_ids={1,2,3})) + # }}} + + def test_search_caching(self): # {{{ + ' Test caching of searches ' + from calibre.db.search import LRUCache + class TestCache(LRUCache): + hit_counter = 0 + miss_counter = 0 + def get(self, key, default=None): + ans = LRUCache.get(self, key, default=default) + if ans is not None: + self.hit_counter += 1 + else: + self.miss_counter += 1 + @property + def cc(self): + self.hit_counter = self.miss_counter = 0 + @property + def counts(self): + return self.hit_counter, self.miss_counter + + cache = self.init_cache() + cache._search_api.cache = c = TestCache() + + ae, at = self.assertEqual, self.assertTrue + + def test(hit, result, *args): + c.cc + ae(cache.search(*args), result) + ae(c.counts, (1, 0) if hit else (0, 1)) + c.cc + + test(False, {3}, 'Unknown') + test(True, {3}, 'Unknown') + test(True, {3}, 'Unknown') + cache.set_field('title', {3:'xxx'}) + test(False, {3}, 'Unknown') # cache cleared + test(True, {3}, 'Unknown') + c.limit = 5 + for i in range(6): + test(False, set(), 'nomatch_%s' % i) + test(False, {3}, 'Unknown') # cached search expired + test(False, {3}, '', 'unknown') + test(True, {3}, '', 'unknown') + test(True, {3}, 'Unknown', 'unknown') + # }}}