diff --git a/src/calibre/db/fields.py b/src/calibre/db/fields.py index 43e89cdc6f..194cb33011 100644 --- a/src/calibre/db/fields.py +++ b/src/calibre/db/fields.py @@ -20,6 +20,8 @@ from calibre.utils.localization import calibre_langcode_to_name class Field(object): + is_many = False + def __init__(self, name, table): self.name, self.table = name, table self.has_text_data = self.metadata['datatype'] in ('text', 'comments', @@ -200,6 +202,8 @@ class OnDeviceField(OneToOneField): class ManyToOneField(Field): + is_many = True + def for_book(self, book_id, default_value=None): ids = self.table.book_col_map.get(book_id, None) if ids is not None: @@ -237,6 +241,8 @@ class ManyToOneField(Field): class ManyToManyField(Field): + is_many = True + def __init__(self, *args, **kwargs): Field.__init__(self, *args, **kwargs) self.alphabetical_sort = self.name != 'authors' @@ -277,6 +283,14 @@ class ManyToManyField(Field): if book_ids: yield val, book_ids + def iter_counts(self, candidates): + val_map = defaultdict(set) + cbm = self.table.book_col_map + for book_id in candidates: + val_map[len(cbm.get(book_id, ()))].add(book_id) + for count, book_ids in val_map.iteritems(): + yield count, book_ids + class IdentifiersField(ManyToManyField): def for_book(self, book_id, default_value=None): diff --git a/src/calibre/db/search.py b/src/calibre/db/search.py index d304deeb9a..fe9cec79c8 100644 --- a/src/calibre/db/search.py +++ b/src/calibre/db/search.py @@ -138,14 +138,101 @@ class DateSearch(object): # {{{ return matches # }}} +class NumericSearch(object): # {{{ + + def __init__(self): + self.operators = { + '=':( 1, lambda r, q: r == q ), + '>':( 1, lambda r, q: r is not None and r > q ), + '<':( 1, lambda r, q: r is not None and r < q ), + '!=':( 2, lambda r, q: r != q ), + '>=':( 2, lambda r, q: r is not None and r >= q ), + '<=':( 2, lambda r, q: r is not None and r <= q ) + } + + def __call__(self, query, field_iter, location, datatype, candidates, is_many=False): + matches = set() + if not query: + return matches + + q = '' + cast = adjust = lambda x: x + dt = datatype + + if is_many and query in {'true', 'false'}: + valcheck = lambda x: True + if datatype == 'rating': + valcheck = lambda x: x is not None and x > 0 + found = set() + for val, book_ids in field_iter(): + if valcheck(val): + found |= book_ids + return found if query == 'true' else candidates - found + + if query == 'false': + if location == 'cover': + relop = lambda x,y: not bool(x) + else: + relop = lambda x,y: x is None + elif query == 'true': + if location == 'cover': + relop = lambda x,y: bool(x) + else: + relop = lambda x,y: x is not None + else: + relop = None + for k, op in self.operators.iteritems(): + if query.startswith(k): + p, relop = op + query = query[p:] + if relop is None: + p, relop = self.operators['='] + + cast = int + if dt == 'rating': + cast = lambda x: 0 if x is None else int(x) + adjust = lambda x: x/2 + elif dt in ('float', 'composite'): + cast = float + + mult = 1.0 + if len(query) > 1: + mult = query[-1].lower() + mult = {'k': 1024.,'m': 1024.**2, 'g': 1024.**3}.get(mult, 1.0) + if mult != 1.0: + query = query[:-1] + else: + mult = 1.0 + + try: + q = cast(query) * mult + except: + raise ParseException(query, len(query), + 'Non-numeric value in query: %r'%query) + + for val, book_ids in field_iter(): + if val is None: + continue + try: + v = cast(val) + except: + v = None + if v: + v = adjust(v) + if relop(v, q): + matches |= book_ids + return matches + +# }}} + class Parser(SearchQueryParser): - def __init__(self, dbcache, all_book_ids, gst, date_search, + def __init__(self, dbcache, all_book_ids, gst, date_search, num_search, limit_search_columns, limit_search_columns_to, locations): self.dbcache, self.all_book_ids = dbcache, all_book_ids self.all_search_locations = frozenset(locations) self.grouped_search_terms = gst - self.date_search = date_search + self.date_search, self.num_search = date_search, num_search self.limit_search_columns, self.limit_search_columns_to = ( limit_search_columns, limit_search_columns_to) super(Parser, self).__init__(locations, optimize=True) @@ -230,15 +317,33 @@ class Parser(SearchQueryParser): if location in self.field_metadata: fm = self.field_metadata[location] + dt = fm['datatype'] + # take care of dates special case - if (fm['datatype'] == 'datetime' or - (fm['datatype'] == 'composite' and - fm['display'].get('composite_sort', '') == 'date')): + if (dt == 'datetime' or ( + dt == 'composite' and + fm['display'].get('composite_sort', '') == 'date')): if location == 'date': location = 'timestamp' return self.date_search( icu_lower(query), partial(self.field_iter, location, candidates)) + # take care of numbers special case + if (dt in ('rating', 'int', 'float') or + (dt == 'composite' and + fm['display'].get('composite_sort', '') == 'number')): + field = self.dbcache.fields[location] + return self.num_search( + icu_lower(query), partial(self.field_iter, location, candidates), + location, dt, candidates, is_many=field.is_many) + + # take care of the 'count' operator for is_multiples + if (fm['is_multiple'] and + len(query) > 1 and query[0] == '#' and query[1] in '=<>!'): + return self.num_search(icu_lower(query[1:]), partial( + self.dbcache.fields[location].iter_counts, candidates), + location, dt, candidates) + return matches @@ -247,6 +352,7 @@ class Search(object): def __init__(self, all_search_locations): self.all_search_locations = all_search_locations self.date_search = DateSearch() + self.num_search = NumericSearch() def change_locations(self, newlocs): self.all_search_locations = newlocs @@ -274,7 +380,7 @@ class Search(object): # 0.000974 seconds. sqp = Parser( dbcache, all_book_ids, dbcache.pref('grouped_search_terms'), - self.date_search, prefs[ 'limit_search_columns' ], + self.date_search, self.num_search, prefs[ 'limit_search_columns' ], prefs[ 'limit_search_columns_to' ], self.all_search_locations) try: ret = sqp.parse(query) diff --git a/src/calibre/db/tables.py b/src/calibre/db/tables.py index 58768c9ff5..234a7fe4a8 100644 --- a/src/calibre/db/tables.py +++ b/src/calibre/db/tables.py @@ -148,11 +148,11 @@ class ManyToManyTable(ManyToOneTable): ''' table_type = MANY_MANY + selectq = 'SELECT book, {0} FROM {1}' def read_maps(self, db): for row in db.conn.execute( - 'SELECT book, {0} FROM {1} ORDER BY id'.format( - self.metadata['link_column'], self.link_table)): + self.selectq.format(self.metadata['link_column'], self.link_table)): if row[1] not in self.col_book_map: self.col_book_map[row[1]] = [] self.col_book_map[row[1]].append(row[0]) @@ -168,6 +168,8 @@ class ManyToManyTable(ManyToOneTable): class AuthorsTable(ManyToManyTable): + selectq = 'SELECT book, {0} FROM {1} ORDER BY id' + def read_id_maps(self, db): self.alink_map = {} self.asort_map = {} diff --git a/src/calibre/db/tests/metadata.db b/src/calibre/db/tests/metadata.db index da54c61ad5..4bd6dfe4f9 100644 Binary files a/src/calibre/db/tests/metadata.db and b/src/calibre/db/tests/metadata.db differ diff --git a/src/calibre/db/tests/reading.py b/src/calibre/db/tests/reading.py index 22d1bba37e..35f4a7333d 100644 --- a/src/calibre/db/tests/reading.py +++ b/src/calibre/db/tests/reading.py @@ -65,7 +65,7 @@ class ReadingTest(BaseTest): 'author_sort': 'One, Author', 'series' : 'A Series One', 'series_index': 1.0, - 'tags':('Tag Two', 'Tag One'), + 'tags':('Tag One', 'Tag Two'), 'formats': (), 'rating': 4.0, 'identifiers': {'test':'one'}, @@ -196,9 +196,19 @@ class ReadingTest(BaseTest): from calibre.library.database2 import LibraryDatabase2 old = LibraryDatabase2(self.library_path) oldvals = {query:set(old.search_getting_ids(query, '')) for query in ( + # Date tests 'date:9/6/2011', 'date:true', 'date:false', 'pubdate:9/2011', '#date:true', 'date:<100daysago', 'date:>9/6/2011', '#date:>9/1/2011', '#date:=2011', + + # Number tests + 'rating:3', 'rating:>2', 'rating:=2', 'rating:true', + 'rating:false', 'rating:>4', 'tags:#<2', 'tags:#>7', + 'cover:false', 'cover:true', '#float:>11', '#float:<1k', + '#float:10.01', + + # TODO: Tests for searching the size column and + # cover:true|false )} old = None