mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Start work on implementing search in the new backend. Searching for date columns working.
This commit is contained in:
parent
aff8f66fa1
commit
63b164241a
@ -13,6 +13,7 @@ from functools import wraps, partial
|
|||||||
|
|
||||||
from calibre.db.locking import create_locks, RecordLock
|
from calibre.db.locking import create_locks, RecordLock
|
||||||
from calibre.db.fields import create_field
|
from calibre.db.fields import create_field
|
||||||
|
from calibre.db.search import Search
|
||||||
from calibre.db.tables import VirtualTable
|
from calibre.db.tables import VirtualTable
|
||||||
from calibre.db.lazy import FormatMetadata, FormatsList
|
from calibre.db.lazy import FormatMetadata, FormatsList
|
||||||
from calibre.ebooks.metadata.book.base import Metadata
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
@ -50,6 +51,7 @@ class Cache(object):
|
|||||||
self.record_lock = RecordLock(self.read_lock)
|
self.record_lock = RecordLock(self.read_lock)
|
||||||
self.format_metadata_cache = defaultdict(dict)
|
self.format_metadata_cache = defaultdict(dict)
|
||||||
self.formatter_template_cache = {}
|
self.formatter_template_cache = {}
|
||||||
|
self._search_api = Search(self.field_metadata.get_search_terms())
|
||||||
|
|
||||||
# Implement locking for all simple read/write API methods
|
# Implement locking for all simple read/write API methods
|
||||||
# An unlocked version of the method is stored with the name starting
|
# An unlocked version of the method is stored with the name starting
|
||||||
@ -409,6 +411,10 @@ class Cache(object):
|
|||||||
else:
|
else:
|
||||||
return sorted(all_book_ids, key=partial(SortKey, fields, sort_keys))
|
return sorted(all_book_ids, key=partial(SortKey, fields, sort_keys))
|
||||||
|
|
||||||
|
@read_api
|
||||||
|
def search(self, query, restriction):
|
||||||
|
return self._search_api(self, query, restriction)
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class SortKey(object):
|
class SortKey(object):
|
||||||
|
@ -9,6 +9,7 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
from calibre.db.tables import ONE_ONE, MANY_ONE, MANY_MANY
|
from calibre.db.tables import ONE_ONE, MANY_ONE, MANY_MANY
|
||||||
from calibre.ebooks.metadata import title_sort
|
from calibre.ebooks.metadata import title_sort
|
||||||
@ -83,6 +84,15 @@ class Field(object):
|
|||||||
'''
|
'''
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||||
|
'''
|
||||||
|
Return a generator that yields items of the form (value, set of books
|
||||||
|
ids that have this value). Here, value is a searchable value. For
|
||||||
|
OneToOneField the set of books ids will contain only a single id, but for
|
||||||
|
other fields it will generally have more than one id. Returned books_ids
|
||||||
|
are restricted to the set of ids in candidates.
|
||||||
|
'''
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
class OneToOneField(Field):
|
class OneToOneField(Field):
|
||||||
|
|
||||||
@ -102,6 +112,11 @@ class OneToOneField(Field):
|
|||||||
return {id_ : self._sort_key(self.table.book_col_map.get(id_,
|
return {id_ : self._sort_key(self.table.book_col_map.get(id_,
|
||||||
self._default_sort_key)) for id_ in all_book_ids}
|
self._default_sort_key)) for id_ in all_book_ids}
|
||||||
|
|
||||||
|
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||||
|
cbm = self.table.book_col_map
|
||||||
|
for book_id in candidates:
|
||||||
|
yield cbm.get(book_id, default_value), {book_id}
|
||||||
|
|
||||||
class CompositeField(OneToOneField):
|
class CompositeField(OneToOneField):
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
@ -139,6 +154,9 @@ class CompositeField(OneToOneField):
|
|||||||
return {id_ : sort_key(self.get_value_with_cache(id_, get_metadata)) for id_ in
|
return {id_ : sort_key(self.get_value_with_cache(id_, get_metadata)) for id_ in
|
||||||
all_book_ids}
|
all_book_ids}
|
||||||
|
|
||||||
|
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||||
|
for book_id in candidates:
|
||||||
|
yield self.get_value_with_cache(book_id, get_metadata), {book_id}
|
||||||
|
|
||||||
class OnDeviceField(OneToOneField):
|
class OnDeviceField(OneToOneField):
|
||||||
|
|
||||||
@ -176,6 +194,10 @@ class OnDeviceField(OneToOneField):
|
|||||||
return {id_ : self.for_book(id_) for id_ in
|
return {id_ : self.for_book(id_) for id_ in
|
||||||
all_book_ids}
|
all_book_ids}
|
||||||
|
|
||||||
|
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||||
|
for book_id in candidates:
|
||||||
|
yield self.for_book(book_id, default_value=default_value), {book_id}
|
||||||
|
|
||||||
class ManyToOneField(Field):
|
class ManyToOneField(Field):
|
||||||
|
|
||||||
def for_book(self, book_id, default_value=None):
|
def for_book(self, book_id, default_value=None):
|
||||||
@ -206,6 +228,13 @@ class ManyToOneField(Field):
|
|||||||
for cid in ans.itervalues()}
|
for cid in ans.itervalues()}
|
||||||
return {id_ : sk_map[cid] for id_, cid in ans.iteritems()}
|
return {id_ : sk_map[cid] for id_, cid in ans.iteritems()}
|
||||||
|
|
||||||
|
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||||
|
cbm = self.table.col_book_map
|
||||||
|
for item_id, val in self.table.id_map.iteritems():
|
||||||
|
book_ids = set(cbm.get(item_id, ())).intersection(candidates)
|
||||||
|
if book_ids:
|
||||||
|
yield val, book_ids
|
||||||
|
|
||||||
class ManyToManyField(Field):
|
class ManyToManyField(Field):
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
@ -241,6 +270,12 @@ class ManyToManyField(Field):
|
|||||||
(self._default_sort_key,))
|
(self._default_sort_key,))
|
||||||
for id_, cids in ans.iteritems()}
|
for id_, cids in ans.iteritems()}
|
||||||
|
|
||||||
|
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||||
|
cbm = self.table.col_book_map
|
||||||
|
for item_id, val in self.table.id_map.iteritems():
|
||||||
|
book_ids = set(cbm.get(item_id, ())).intersection(candidates)
|
||||||
|
if book_ids:
|
||||||
|
yield val, book_ids
|
||||||
|
|
||||||
class IdentifiersField(ManyToManyField):
|
class IdentifiersField(ManyToManyField):
|
||||||
|
|
||||||
@ -276,6 +311,17 @@ class FormatsField(ManyToManyField):
|
|||||||
def format_fname(self, book_id, fmt):
|
def format_fname(self, book_id, fmt):
|
||||||
return self.table.fname_map[book_id][fmt.upper()]
|
return self.table.fname_map[book_id][fmt.upper()]
|
||||||
|
|
||||||
|
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||||
|
val_map = defaultdict(set)
|
||||||
|
cbm = self.table.book_col_map
|
||||||
|
for book_id in candidates:
|
||||||
|
vals = cbm.get(book_id, ())
|
||||||
|
for val in vals:
|
||||||
|
val_map[val].add(book_id)
|
||||||
|
|
||||||
|
for val, book_ids in val_map.iteritems():
|
||||||
|
yield val, book_ids
|
||||||
|
|
||||||
class SeriesField(ManyToOneField):
|
class SeriesField(ManyToOneField):
|
||||||
|
|
||||||
def sort_key_for_series(self, book_id, get_lang, series_sort_order):
|
def sort_key_for_series(self, book_id, get_lang, series_sort_order):
|
||||||
|
284
src/calibre/db/search.py
Normal file
284
src/calibre/db/search.py
Normal file
@ -0,0 +1,284 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import re
|
||||||
|
from functools import partial
|
||||||
|
from datetime import timedelta
|
||||||
|
|
||||||
|
from calibre.utils.config_base import prefs
|
||||||
|
from calibre.utils.date import parse_date, UNDEFINED_DATE, now
|
||||||
|
from calibre.utils.search_query_parser import SearchQueryParser, ParseException
|
||||||
|
|
||||||
|
# TODO: Thread safety of saved searches
|
||||||
|
|
||||||
|
class DateSearch(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.operators = {
|
||||||
|
'=' : (1, self.eq),
|
||||||
|
'!=' : (2, self.ne),
|
||||||
|
'>' : (1, self.gt),
|
||||||
|
'>=' : (2, self.ge),
|
||||||
|
'<' : (1, self.lt),
|
||||||
|
'<=' : (2, self.le),
|
||||||
|
}
|
||||||
|
self.local_today = { '_today', 'today', icu_lower(_('today')) }
|
||||||
|
self.local_yesterday = { '_yesterday', 'yesterday', icu_lower(_('yesterday')) }
|
||||||
|
self.local_thismonth = { '_thismonth', 'thismonth', icu_lower(_('thismonth')) }
|
||||||
|
self.daysago_pat = re.compile(r'(%s|daysago|_daysago)$'%_('daysago'))
|
||||||
|
|
||||||
|
def eq(self, dbdate, query, field_count):
|
||||||
|
if dbdate.year == query.year:
|
||||||
|
if field_count == 1:
|
||||||
|
return True
|
||||||
|
if dbdate.month == query.month:
|
||||||
|
if field_count == 2:
|
||||||
|
return True
|
||||||
|
return dbdate.day == query.day
|
||||||
|
return False
|
||||||
|
|
||||||
|
def ne(self, *args):
|
||||||
|
return not self.eq(*args)
|
||||||
|
|
||||||
|
def gt(self, dbdate, query, field_count):
|
||||||
|
if dbdate.year > query.year:
|
||||||
|
return True
|
||||||
|
if field_count > 1 and dbdate.year == query.year:
|
||||||
|
if dbdate.month > query.month:
|
||||||
|
return True
|
||||||
|
return (field_count == 3 and dbdate.month == query.month and
|
||||||
|
dbdate.day > query.day)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def le(self, *args):
|
||||||
|
return not self.gt(*args)
|
||||||
|
|
||||||
|
def lt(self, dbdate, query, field_count):
|
||||||
|
if dbdate.year < query.year:
|
||||||
|
return True
|
||||||
|
if field_count > 1 and dbdate.year == query.year:
|
||||||
|
if dbdate.month < query.month:
|
||||||
|
return True
|
||||||
|
return (field_count == 3 and dbdate.month == query.month and
|
||||||
|
dbdate.day < query.day)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def ge(self, *args):
|
||||||
|
return not self.lt(*args)
|
||||||
|
|
||||||
|
def __call__(self, query, field_iter):
|
||||||
|
matches = set()
|
||||||
|
if len(query) < 2:
|
||||||
|
return matches
|
||||||
|
|
||||||
|
if query == 'false':
|
||||||
|
for v, book_ids in field_iter():
|
||||||
|
if isinstance(v, (str, unicode)):
|
||||||
|
v = parse_date(v)
|
||||||
|
if v is None or v <= UNDEFINED_DATE:
|
||||||
|
matches |= book_ids
|
||||||
|
return matches
|
||||||
|
|
||||||
|
if query == 'true':
|
||||||
|
for v, book_ids in field_iter():
|
||||||
|
if isinstance(v, (str, unicode)):
|
||||||
|
v = parse_date(v)
|
||||||
|
if v is not None and v > UNDEFINED_DATE:
|
||||||
|
matches |= book_ids
|
||||||
|
return matches
|
||||||
|
|
||||||
|
relop = None
|
||||||
|
for k, op in self.operators.iteritems():
|
||||||
|
if query.startswith(k):
|
||||||
|
p, relop = op
|
||||||
|
query = query[p:]
|
||||||
|
if relop is None:
|
||||||
|
relop = self.operators['='][-1]
|
||||||
|
|
||||||
|
if query in self.local_today:
|
||||||
|
qd = now()
|
||||||
|
field_count = 3
|
||||||
|
elif query in self.local_yesterday:
|
||||||
|
qd = now() - timedelta(1)
|
||||||
|
field_count = 3
|
||||||
|
elif query in self.local_thismonth:
|
||||||
|
qd = now()
|
||||||
|
field_count = 2
|
||||||
|
else:
|
||||||
|
m = self.daysago_pat.search(query)
|
||||||
|
if m is not None:
|
||||||
|
num = query[:-len(m.group(1))]
|
||||||
|
try:
|
||||||
|
qd = now() - timedelta(int(num))
|
||||||
|
except:
|
||||||
|
raise ParseException(query, len(query), 'Number conversion error')
|
||||||
|
field_count = 3
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
qd = parse_date(query, as_utc=False)
|
||||||
|
except:
|
||||||
|
raise ParseException(query, len(query), 'Date conversion error')
|
||||||
|
if '-' in query:
|
||||||
|
field_count = query.count('-') + 1
|
||||||
|
else:
|
||||||
|
field_count = query.count('/') + 1
|
||||||
|
|
||||||
|
for v, book_ids in field_iter():
|
||||||
|
if isinstance(v, (str, unicode)):
|
||||||
|
v = parse_date(v)
|
||||||
|
if v is not None and relop(v, qd, field_count):
|
||||||
|
matches |= book_ids
|
||||||
|
|
||||||
|
return matches
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class Parser(SearchQueryParser):
|
||||||
|
|
||||||
|
def __init__(self, dbcache, all_book_ids, gst, date_search,
|
||||||
|
limit_search_columns, limit_search_columns_to, locations):
|
||||||
|
self.dbcache, self.all_book_ids = dbcache, all_book_ids
|
||||||
|
self.all_search_locations = frozenset(locations)
|
||||||
|
self.grouped_search_terms = gst
|
||||||
|
self.date_search = date_search
|
||||||
|
self.limit_search_columns, self.limit_search_columns_to = (
|
||||||
|
limit_search_columns, limit_search_columns_to)
|
||||||
|
super(Parser, self).__init__(locations, optimize=True)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def field_metadata(self):
|
||||||
|
return self.dbcache.field_metadata
|
||||||
|
|
||||||
|
def universal_set(self):
|
||||||
|
return self.all_book_ids
|
||||||
|
|
||||||
|
def field_iter(self, name, candidates):
|
||||||
|
get_metadata = partial(self.dbcache._get_metadata, get_user_categories=False)
|
||||||
|
return self.dbcache.fields[name].iter_searchable_values(get_metadata,
|
||||||
|
candidates)
|
||||||
|
|
||||||
|
def get_matches(self, location, query, candidates=None,
|
||||||
|
allow_recursion=True):
|
||||||
|
# If candidates is not None, it must not be modified. Changing its
|
||||||
|
# value will break query optimization in the search parser
|
||||||
|
matches = set()
|
||||||
|
|
||||||
|
if candidates is None:
|
||||||
|
candidates = self.all_book_ids
|
||||||
|
if not candidates or not query or not query.strip():
|
||||||
|
return matches
|
||||||
|
if location not in self.all_search_locations:
|
||||||
|
return matches
|
||||||
|
|
||||||
|
if (len(location) > 2 and location.startswith('@') and
|
||||||
|
location[1:] in self.grouped_search_terms):
|
||||||
|
location = location[1:]
|
||||||
|
|
||||||
|
# get metadata key associated with the search term. Eliminates
|
||||||
|
# dealing with plurals and other aliases
|
||||||
|
# original_location = location
|
||||||
|
location = self.field_metadata.search_term_to_field_key(
|
||||||
|
icu_lower(location.strip()))
|
||||||
|
# grouped search terms
|
||||||
|
if isinstance(location, list):
|
||||||
|
if allow_recursion:
|
||||||
|
if query.lower() == 'false':
|
||||||
|
invert = True
|
||||||
|
query = 'true'
|
||||||
|
else:
|
||||||
|
invert = False
|
||||||
|
for loc in location:
|
||||||
|
c = candidates.copy()
|
||||||
|
m = self.get_matches(loc, query,
|
||||||
|
candidates=c, allow_recursion=False)
|
||||||
|
matches |= m
|
||||||
|
c -= m
|
||||||
|
if len(c) == 0:
|
||||||
|
break
|
||||||
|
if invert:
|
||||||
|
matches = self.all_book_ids - matches
|
||||||
|
return matches
|
||||||
|
raise ParseException(query, len(query), 'Recursive query group detected')
|
||||||
|
|
||||||
|
# If the user has asked to restrict searching over all field, apply
|
||||||
|
# that restriction
|
||||||
|
if (location == 'all' and self.limit_search_columns and
|
||||||
|
self.limit_search_columns_to):
|
||||||
|
terms = set()
|
||||||
|
for l in self.limit_search_columns_to:
|
||||||
|
l = icu_lower(l.strip())
|
||||||
|
if l and l != 'all' and l in self.all_search_locations:
|
||||||
|
terms.add(l)
|
||||||
|
if terms:
|
||||||
|
c = candidates.copy()
|
||||||
|
for l in terms:
|
||||||
|
try:
|
||||||
|
m = self.get_matches(l, query,
|
||||||
|
candidates=c, allow_recursion=allow_recursion)
|
||||||
|
matches |= m
|
||||||
|
c -= m
|
||||||
|
if len(c) == 0:
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return matches
|
||||||
|
|
||||||
|
if location in self.field_metadata:
|
||||||
|
fm = self.field_metadata[location]
|
||||||
|
# take care of dates special case
|
||||||
|
if (fm['datatype'] == 'datetime' or
|
||||||
|
(fm['datatype'] == 'composite' and
|
||||||
|
fm['display'].get('composite_sort', '') == 'date')):
|
||||||
|
if location == 'date':
|
||||||
|
location = 'timestamp'
|
||||||
|
return self.date_search(
|
||||||
|
icu_lower(query), partial(self.field_iter, location, candidates))
|
||||||
|
|
||||||
|
return matches
|
||||||
|
|
||||||
|
|
||||||
|
class Search(object):
|
||||||
|
|
||||||
|
def __init__(self, all_search_locations):
|
||||||
|
self.all_search_locations = all_search_locations
|
||||||
|
self.date_search = DateSearch()
|
||||||
|
|
||||||
|
def change_locations(self, newlocs):
|
||||||
|
self.all_search_locations = newlocs
|
||||||
|
|
||||||
|
def __call__(self, dbcache, query, search_restriction):
|
||||||
|
'''
|
||||||
|
Return the set of ids of all records that match the specified
|
||||||
|
query and restriction
|
||||||
|
'''
|
||||||
|
q = ''
|
||||||
|
if not query or not query.strip():
|
||||||
|
q = search_restriction
|
||||||
|
else:
|
||||||
|
q = query
|
||||||
|
if search_restriction:
|
||||||
|
q = u'(%s) and (%s)' % (search_restriction, query)
|
||||||
|
|
||||||
|
all_book_ids = dbcache.all_book_ids(type=set)
|
||||||
|
if not q:
|
||||||
|
return all_book_ids
|
||||||
|
|
||||||
|
# We construct a new parser instance per search as pyparsing is not
|
||||||
|
# thread safe. On my desktop, constructing a SearchQueryParser instance
|
||||||
|
# takes 0.000975 seconds and restoring it from a pickle takes
|
||||||
|
# 0.000974 seconds.
|
||||||
|
sqp = Parser(
|
||||||
|
dbcache, all_book_ids, dbcache.pref('grouped_search_terms'),
|
||||||
|
self.date_search, prefs[ 'limit_search_columns' ],
|
||||||
|
prefs[ 'limit_search_columns_to' ], self.all_search_locations)
|
||||||
|
try:
|
||||||
|
ret = sqp.parse(query)
|
||||||
|
finally:
|
||||||
|
sqp.dbcache = None
|
||||||
|
return ret
|
||||||
|
|
@ -191,6 +191,26 @@ class ReadingTest(BaseTest):
|
|||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
def test_searching(self): # {{{
|
||||||
|
'Test searching returns the same data for both backends'
|
||||||
|
from calibre.library.database2 import LibraryDatabase2
|
||||||
|
old = LibraryDatabase2(self.library_path)
|
||||||
|
oldvals = {query:set(old.search_getting_ids(query, '')) for query in (
|
||||||
|
'date:9/6/2011', 'date:true', 'date:false', 'pubdate:9/2011',
|
||||||
|
'#date:true', 'date:<100daysago', 'date:>9/6/2011',
|
||||||
|
'#date:>9/1/2011', '#date:=2011',
|
||||||
|
)}
|
||||||
|
old = None
|
||||||
|
|
||||||
|
cache = self.init_cache(self.library_path)
|
||||||
|
for query, ans in oldvals.iteritems():
|
||||||
|
nr = cache.search(query, '')
|
||||||
|
self.assertEqual(ans, nr,
|
||||||
|
'Old result: %r != New result: %r for search: %s'%(
|
||||||
|
ans, nr, query))
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
def tests():
|
def tests():
|
||||||
return unittest.TestLoader().loadTestsFromTestCase(ReadingTest)
|
return unittest.TestLoader().loadTestsFromTestCase(ReadingTest)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user