Merge from trunk

2025-08-30 23:00:21 -04:00 · 2013-01-20 15:45:59 +01:00 · 2013-01-20 15:45:59 +01:00 · 9e1627569f
commit 9e1627569f
parent f4a8a2f267 3a299104fa
12 changed files with 608 additions and 73 deletions
--- a/recipes/barrons.recipe
+++ b/recipes/barrons.recipe
@ -64,8 +64,8 @@ class Barrons(BasicNewsRecipe):
            br = BasicNewsRecipe.get_browser()
            if self.username is not None and self.password is not None:
                br.open('http://commerce.barrons.com/auth/login')
-                br.select_form(name='login_form')
-                br['user']   = self.username
+                br.select_form(nr=0)
+                br['username']   = self.username
                br['password'] = self.password
                br.submit()
            return br
--- a/recipes/conowego_pl.recipe
+++ b/recipes/conowego_pl.recipe
--- a/recipes/icons/spiders_web_pl.png
+++ b/recipes/icons/spiders_web_pl.png
--- a/recipes/linux_journal.recipe
+++ b/recipes/linux_journal.recipe
--- a/recipes/michellemalkin.recipe
+++ b/recipes/michellemalkin.recipe
@ -18,6 +18,8 @@ class MichelleMalkin(BasicNewsRecipe):

    remove_javascript     = True
    no_stylesheets        = True
+    auto_cleanup = True
+    use_embedded_content = False


    conversion_options = {
@ -29,16 +31,16 @@ class MichelleMalkin(BasicNewsRecipe):
                        }


-    keep_only_tags = [
-      dict(name='div', attrs={'class':'article'})
-    ]
+    #keep_only_tags = [
+      #dict(name='div', attrs={'class':'article'})
+    #]

-    remove_tags  = [
-       dict(name=['iframe', 'embed', 'object']),
-       dict(name='div', attrs={'id':['comments', 'commentForm']}),
-       dict(name='div', attrs={'class':['postCategories', 'comments', 'blogInfo', 'postInfo']})
+    #remove_tags  = [
+       #dict(name=['iframe', 'embed', 'object']),
+       #dict(name='div', attrs={'id':['comments', 'commentForm']}),
+       #dict(name='div', attrs={'class':['postCategories', 'comments', 'blogInfo', 'postInfo']})

-    ]
+    #]


    feeds = [(u'http://feeds.feedburner.com/michellemalkin/posts')]
--- a/src/calibre/db/cache.py
+++ b/src/calibre/db/cache.py
@ -13,6 +13,7 @@ from functools import wraps, partial

 from calibre.db.locking import create_locks, RecordLock
 from calibre.db.fields import create_field
+from calibre.db.search import Search
 from calibre.db.tables import VirtualTable
 from calibre.db.lazy import FormatMetadata, FormatsList
 from calibre.ebooks.metadata.book.base import Metadata
@ -50,6 +51,7 @@ class Cache(object):
        self.record_lock = RecordLock(self.read_lock)
        self.format_metadata_cache = defaultdict(dict)
        self.formatter_template_cache = {}
+        self._search_api = Search(self.field_metadata.get_search_terms())

        # Implement locking for all simple read/write API methods
        # An unlocked version of the method is stored with the name starting
@ -269,11 +271,11 @@ class Cache(object):
            return ()

    @read_api
-    def all_book_ids(self):
+    def all_book_ids(self, type=frozenset):
        '''
        Frozen set of all known book ids.
        '''
-        return frozenset(self.fields['uuid'])
+        return type(self.fields['uuid'])

    @read_api
    def all_field_ids(self, name):
@ -316,6 +318,10 @@ class Cache(object):
            self.format_metadata_cache[book_id][fmt] = ans
        return ans

+    @read_api
+    def pref(self, name):
+        return self.backend.prefs[name]
+
    @api
    def get_metadata(self, book_id,
            get_cover=False, get_user_categories=True, cover_as_data=False):
@ -378,17 +384,21 @@ class Cache(object):
        all_book_ids = frozenset(self._all_book_ids() if ids_to_sort is None
                else ids_to_sort)
        get_metadata = partial(self._get_metadata, get_user_categories=False)
+        def get_lang(book_id):
+            ans = self._field_for('languages', book_id)
+            return ans[0] if ans else None

        fm = {'title':'sort', 'authors':'author_sort'}

        def sort_key(field):
            'Handle series type fields'
-            ans = self.fields[fm.get(field, field)].sort_keys_for_books(get_metadata,
-                                                        all_book_ids)
            idx = field + '_index'
-            if idx in self.fields:
-                idx_ans = self.fields[idx].sort_keys_for_books(get_metadata,
-                    all_book_ids)
+            is_series = idx in self.fields
+            ans = self.fields[fm.get(field, field)].sort_keys_for_books(
+                get_metadata, get_lang, all_book_ids,)
+            if is_series:
+                idx_ans = self.fields[idx].sort_keys_for_books(
+                    get_metadata, get_lang, all_book_ids)
                ans = {k:(v, idx_ans[k]) for k, v in ans.iteritems()}
            return ans

@ -401,6 +411,10 @@ class Cache(object):
        else:
            return sorted(all_book_ids, key=partial(SortKey, fields, sort_keys))

+    @read_api
+    def search(self, query, restriction):
+        return self._search_api(self, query, restriction)
+
    # }}}

 class SortKey(object):
--- a/src/calibre/db/fields.py
+++ b/src/calibre/db/fields.py
@ -9,14 +9,19 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 from threading import Lock
+from collections import defaultdict

 from calibre.db.tables import ONE_ONE, MANY_ONE, MANY_MANY
+from calibre.ebooks.metadata import title_sort
+from calibre.utils.config_base import tweaks
 from calibre.utils.icu import sort_key
 from calibre.utils.date import UNDEFINED_DATE
 from calibre.utils.localization import calibre_langcode_to_name

 class Field(object):

+    is_many = False
+
    def __init__(self, name, table):
        self.name, self.table = name, table
        self.has_text_data = self.metadata['datatype'] in ('text', 'comments',
@ -72,7 +77,7 @@ class Field(object):
        '''
        return iter(())

-    def sort_keys_for_books(self, get_metadata, all_book_ids):
+    def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
        '''
        Return a mapping of book_id -> sort_key. The sort key is suitable for
        use in sorting the list of all books by this field, via the python cmp
@ -81,6 +86,13 @@ class Field(object):
        '''
        raise NotImplementedError()

+    def iter_searchable_values(self, get_metadata, candidates, default_value=None):
+        '''
+        Return a generator that yields items of the form (value, set of books
+        ids that have this value). Here, value is a searchable value. Returned
+        books_ids are restricted to the set of ids in candidates.
+        '''
+        raise NotImplementedError()

 class OneToOneField(Field):

@ -96,10 +108,24 @@ class OneToOneField(Field):
    def __iter__(self):
        return self.table.book_col_map.iterkeys()

-    def sort_keys_for_books(self, get_metadata, all_book_ids):
+    def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
        return {id_ : self._sort_key(self.table.book_col_map.get(id_,
            self._default_sort_key)) for id_ in all_book_ids}

+    def iter_searchable_values(self, get_metadata, candidates, default_value=None):
+        cbm = self.table.book_col_map
+        if (self.name in {'id', 'uuid', 'title'} or
+            self.metadata['datatype'] == 'datetime'):
+            # Values are likely to be unique
+            for book_id in candidates:
+                yield cbm.get(book_id, default_value), {book_id}
+        else:
+            val_map = defaultdict(set)
+            for book_id in candidates:
+                val_map[cbm.get(book_id, default_value)].add(book_id)
+            for val, book_ids in val_map.iteritems():
+                yield val, book_ids
+
 class CompositeField(OneToOneField):

    def __init__(self, *args, **kwargs):
@ -133,10 +159,16 @@ class CompositeField(OneToOneField):
            ans = mi.get('#'+self.metadata['label'])
        return ans

-    def sort_keys_for_books(self, get_metadata, all_book_ids):
+    def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
        return {id_ : sort_key(self.get_value_with_cache(id_, get_metadata)) for id_ in
                all_book_ids}

+    def iter_searchable_values(self, get_metadata, candidates, default_value=None):
+        val_map = defaultdict(set)
+        for book_id in candidates:
+            val_map[self.get_value_with_cache(book_id, get_metadata)].add(book_id)
+        for val, book_ids in val_map.iteritems():
+            yield val, book_ids

 class OnDeviceField(OneToOneField):

@ -170,12 +202,21 @@ class OnDeviceField(OneToOneField):
    def __iter__(self):
        return iter(())

-    def sort_keys_for_books(self, get_metadata, all_book_ids):
+    def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
        return {id_ : self.for_book(id_) for id_ in
                all_book_ids}

+    def iter_searchable_values(self, get_metadata, candidates, default_value=None):
+        val_map = defaultdict(set)
+        for book_id in candidates:
+            val_map[self.for_book(book_id, default_value=default_value)].add(book_id)
+        for val, book_ids in val_map.iteritems():
+            yield val, book_ids
+
 class ManyToOneField(Field):

+    is_many = True
+
    def for_book(self, book_id, default_value=None):
        ids = self.table.book_col_map.get(book_id, None)
        if ids is not None:
@ -196,7 +237,7 @@ class ManyToOneField(Field):
    def __iter__(self):
        return self.table.id_map.iterkeys()

-    def sort_keys_for_books(self, get_metadata, all_book_ids):
+    def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
        ans = {id_ : self.table.book_col_map.get(id_, None)
                for id_ in all_book_ids}
        sk_map = {cid : (self._default_sort_key if cid is None else
@ -204,8 +245,17 @@ class ManyToOneField(Field):
                for cid in ans.itervalues()}
        return {id_ : sk_map[cid] for id_, cid in ans.iteritems()}

+    def iter_searchable_values(self, get_metadata, candidates, default_value=None):
+        cbm = self.table.col_book_map
+        for item_id, val in self.table.id_map.iteritems():
+            book_ids = set(cbm.get(item_id, ())).intersection(candidates)
+            if book_ids:
+                yield val, book_ids
+
 class ManyToManyField(Field):

+    is_many = True
+
    def __init__(self, *args, **kwargs):
        Field.__init__(self, *args, **kwargs)
        self.alphabetical_sort = self.name != 'authors'
@ -227,7 +277,7 @@ class ManyToManyField(Field):
    def __iter__(self):
        return self.table.id_map.iterkeys()

-    def sort_keys_for_books(self, get_metadata, all_book_ids):
+    def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
        ans = {id_ : self.table.book_col_map.get(id_, ())
                for id_ in all_book_ids}
        all_cids = set()
@ -239,6 +289,20 @@ class ManyToManyField(Field):
                        (self._default_sort_key,))
                for id_, cids in ans.iteritems()}

+    def iter_searchable_values(self, get_metadata, candidates, default_value=None):
+        cbm = self.table.col_book_map
+        for item_id, val in self.table.id_map.iteritems():
+            book_ids = set(cbm.get(item_id, ())).intersection(candidates)
+            if book_ids:
+                yield val, book_ids
+
+    def iter_counts(self, candidates):
+        val_map = defaultdict(set)
+        cbm = self.table.book_col_map
+        for book_id in candidates:
+            val_map[len(cbm.get(book_id, ()))].add(book_id)
+        for count, book_ids in val_map.iteritems():
+            yield count, book_ids

 class IdentifiersField(ManyToManyField):

@ -248,7 +312,7 @@ class IdentifiersField(ManyToManyField):
            ids = default_value
        return ids

-    def sort_keys_for_books(self, get_metadata, all_book_ids):
+    def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
        'Sort by identifier keys'
        ans = {id_ : self.table.book_col_map.get(id_, ())
                for id_ in all_book_ids}
@ -274,6 +338,32 @@ class FormatsField(ManyToManyField):
    def format_fname(self, book_id, fmt):
        return self.table.fname_map[book_id][fmt.upper()]

+    def iter_searchable_values(self, get_metadata, candidates, default_value=None):
+        val_map = defaultdict(set)
+        cbm = self.table.book_col_map
+        for book_id in candidates:
+            vals = cbm.get(book_id, ())
+            for val in vals:
+                val_map[val].add(book_id)
+
+        for val, book_ids in val_map.iteritems():
+            yield val, book_ids
+
+class SeriesField(ManyToOneField):
+
+    def sort_key_for_series(self, book_id, get_lang, series_sort_order):
+        sid = self.table.book_col_map.get(book_id, None)
+        if sid is None:
+            return self._default_sort_key
+        return self._sort_key(title_sort(self.table.id_map[sid],
+                                         order=series_sort_order,
+                                         lang=get_lang(book_id)))
+
+    def sort_keys_for_books(self, get_metadata, get_lang, all_book_ids):
+        sso = tweaks['title_series_sorting']
+        return {book_id:self.sort_key_for_series(book_id, get_lang, sso) for book_id
+                in all_book_ids}
+
 def create_field(name, table):
    cls = {
            ONE_ONE : OneToOneField,
@ -290,5 +380,7 @@ def create_field(name, table):
        cls = IdentifiersField
    elif table.metadata['datatype'] == 'composite':
        cls = CompositeField
+    elif table.metadata['datatype'] == 'series':
+        cls = SeriesField
    return cls(name, table)

--- a/src/calibre/db/search.py
+++ b/src/calibre/db/search.py
@ -0,0 +1,390 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re
+from functools import partial
+from datetime import timedelta
+
+from calibre.utils.config_base import prefs
+from calibre.utils.date import parse_date, UNDEFINED_DATE, now
+from calibre.utils.search_query_parser import SearchQueryParser, ParseException
+
+# TODO: Thread safety of saved searches
+
+class DateSearch(object): # {{{
+
+    def __init__(self):
+        self.operators = {
+            '='   : (1, self.eq),
+            '!='  : (2, self.ne),
+            '>'   : (1, self.gt),
+            '>='  : (2, self.ge),
+            '<'   : (1, self.lt),
+            '<='  : (2, self.le),
+        }
+        self.local_today         = { '_today', 'today', icu_lower(_('today')) }
+        self.local_yesterday     = { '_yesterday', 'yesterday', icu_lower(_('yesterday')) }
+        self.local_thismonth     = { '_thismonth', 'thismonth', icu_lower(_('thismonth')) }
+        self.daysago_pat = re.compile(r'(%s|daysago|_daysago)$'%_('daysago'))
+
+    def eq(self, dbdate, query, field_count):
+        if dbdate.year == query.year:
+            if field_count == 1:
+                return True
+            if dbdate.month == query.month:
+                if field_count == 2:
+                    return True
+                return dbdate.day == query.day
+        return False
+
+    def ne(self, *args):
+        return not self.eq(*args)
+
+    def gt(self, dbdate, query, field_count):
+        if dbdate.year > query.year:
+            return True
+        if field_count > 1 and dbdate.year == query.year:
+            if dbdate.month > query.month:
+                return True
+            return (field_count == 3 and dbdate.month == query.month and
+                    dbdate.day > query.day)
+        return False
+
+    def le(self, *args):
+        return not self.gt(*args)
+
+    def lt(self, dbdate, query, field_count):
+        if dbdate.year < query.year:
+            return True
+        if field_count > 1 and dbdate.year == query.year:
+            if dbdate.month < query.month:
+                return True
+            return (field_count == 3 and dbdate.month == query.month and
+                    dbdate.day < query.day)
+        return False
+
+    def ge(self, *args):
+        return not self.lt(*args)
+
+    def __call__(self, query, field_iter):
+        matches = set()
+        if len(query) < 2:
+            return matches
+
+        if query == 'false':
+            for v, book_ids in field_iter():
+                if isinstance(v, (str, unicode)):
+                    v = parse_date(v)
+                if v is None or v <= UNDEFINED_DATE:
+                    matches |= book_ids
+            return matches
+
+        if query == 'true':
+            for v, book_ids in field_iter():
+                if isinstance(v, (str, unicode)):
+                    v = parse_date(v)
+                if v is not None and v > UNDEFINED_DATE:
+                    matches |= book_ids
+            return matches
+
+        relop = None
+        for k, op in self.operators.iteritems():
+            if query.startswith(k):
+                p, relop = op
+                query = query[p:]
+        if relop is None:
+            relop = self.operators['='][-1]
+
+        if query in self.local_today:
+            qd = now()
+            field_count = 3
+        elif query in self.local_yesterday:
+            qd = now() - timedelta(1)
+            field_count = 3
+        elif query in self.local_thismonth:
+            qd = now()
+            field_count = 2
+        else:
+            m = self.daysago_pat.search(query)
+            if m is not None:
+                num = query[:-len(m.group(1))]
+                try:
+                    qd = now() - timedelta(int(num))
+                except:
+                    raise ParseException(query, len(query), 'Number conversion error')
+                field_count = 3
+            else:
+                try:
+                    qd = parse_date(query, as_utc=False)
+                except:
+                    raise ParseException(query, len(query), 'Date conversion error')
+                if '-' in query:
+                    field_count = query.count('-') + 1
+                else:
+                    field_count = query.count('/') + 1
+
+        for v, book_ids in field_iter():
+            if isinstance(v, (str, unicode)):
+                v = parse_date(v)
+            if v is not None and relop(v, qd, field_count):
+                matches |= book_ids
+
+        return matches
+# }}}
+
+class NumericSearch(object): # {{{
+
+    def __init__(self):
+        self.operators = {
+            '=':( 1, lambda r, q: r == q ),
+            '>':( 1, lambda r, q: r is not None and r > q ),
+            '<':( 1, lambda r, q: r is not None and r < q ),
+            '!=':( 2, lambda r, q: r != q ),
+            '>=':( 2, lambda r, q: r is not None and r >= q ),
+            '<=':( 2, lambda r, q: r is not None and r <= q )
+        }
+
+    def __call__(self, query, field_iter, location, datatype, candidates, is_many=False):
+        matches = set()
+        if not query:
+            return matches
+
+        q = ''
+        cast = adjust = lambda x: x
+        dt = datatype
+
+        if is_many and query in {'true', 'false'}:
+            valcheck = lambda x: True
+            if datatype == 'rating':
+                valcheck = lambda x: x is not None and x > 0
+            found = set()
+            for val, book_ids in field_iter():
+                if valcheck(val):
+                    found |= book_ids
+            return found if query == 'true' else candidates - found
+
+        if query == 'false':
+            if location == 'cover':
+                relop = lambda x,y: not bool(x)
+            else:
+                relop = lambda x,y: x is None
+        elif query == 'true':
+            if location == 'cover':
+                relop = lambda x,y: bool(x)
+            else:
+                relop = lambda x,y: x is not None
+        else:
+            relop = None
+            for k, op in self.operators.iteritems():
+                if query.startswith(k):
+                    p, relop = op
+                    query = query[p:]
+            if relop is None:
+                p, relop = self.operators['=']
+
+            cast = int
+            if  dt == 'rating':
+                cast = lambda x: 0 if x is None else int(x)
+                adjust = lambda x: x/2
+            elif dt in ('float', 'composite'):
+                cast = float
+
+            mult = 1.0
+            if len(query) > 1:
+                mult = query[-1].lower()
+                mult = {'k': 1024.,'m': 1024.**2, 'g': 1024.**3}.get(mult, 1.0)
+                if mult != 1.0:
+                    query = query[:-1]
+            else:
+                mult = 1.0
+
+            try:
+                q = cast(query) * mult
+            except:
+                raise ParseException(query, len(query),
+                                     'Non-numeric value in query: %r'%query)
+
+        for val, book_ids in field_iter():
+            if val is None:
+                continue
+            try:
+                v = cast(val)
+            except:
+                v = None
+            if v:
+                v = adjust(v)
+            if relop(v, q):
+                matches |= book_ids
+        return matches
+
+# }}}
+
+class Parser(SearchQueryParser):
+
+    def __init__(self, dbcache, all_book_ids, gst, date_search, num_search,
+                 limit_search_columns, limit_search_columns_to, locations):
+        self.dbcache, self.all_book_ids = dbcache, all_book_ids
+        self.all_search_locations = frozenset(locations)
+        self.grouped_search_terms = gst
+        self.date_search, self.num_search = date_search, num_search
+        self.limit_search_columns, self.limit_search_columns_to = (
+            limit_search_columns, limit_search_columns_to)
+        super(Parser, self).__init__(locations, optimize=True)
+
+    @property
+    def field_metadata(self):
+        return self.dbcache.field_metadata
+
+    def universal_set(self):
+        return self.all_book_ids
+
+    def field_iter(self, name, candidates):
+        get_metadata = partial(self.dbcache._get_metadata, get_user_categories=False)
+        return self.dbcache.fields[name].iter_searchable_values(get_metadata,
+                                                                candidates)
+
+    def get_matches(self, location, query, candidates=None,
+                    allow_recursion=True):
+        # If candidates is not None, it must not be modified. Changing its
+        # value will break query optimization in the search parser
+        matches = set()
+
+        if candidates is None:
+            candidates = self.all_book_ids
+        if not candidates or not query or not query.strip():
+            return matches
+        if location not in self.all_search_locations:
+            return matches
+
+        if (len(location) > 2 and location.startswith('@') and
+                    location[1:] in self.grouped_search_terms):
+            location = location[1:]
+
+        # get metadata key associated with the search term. Eliminates
+        # dealing with plurals and other aliases
+        # original_location = location
+        location = self.field_metadata.search_term_to_field_key(
+            icu_lower(location.strip()))
+        # grouped search terms
+        if isinstance(location, list):
+            if allow_recursion:
+                if query.lower() == 'false':
+                    invert = True
+                    query = 'true'
+                else:
+                    invert = False
+                for loc in location:
+                    c = candidates.copy()
+                    m = self.get_matches(loc, query,
+                            candidates=c, allow_recursion=False)
+                    matches |= m
+                    c -= m
+                    if len(c) == 0:
+                        break
+                if invert:
+                    matches = self.all_book_ids - matches
+                return matches
+            raise ParseException(query, len(query), 'Recursive query group detected')
+
+        # If the user has asked to restrict searching over all field, apply
+        # that restriction
+        if (location == 'all' and self.limit_search_columns and
+            self.limit_search_columns_to):
+            terms = set()
+            for l in self.limit_search_columns_to:
+                l = icu_lower(l.strip())
+                if l and l != 'all' and l in self.all_search_locations:
+                    terms.add(l)
+            if terms:
+                c = candidates.copy()
+                for l in terms:
+                    try:
+                        m = self.get_matches(l, query,
+                            candidates=c, allow_recursion=allow_recursion)
+                        matches |= m
+                        c -= m
+                        if len(c) == 0:
+                            break
+                    except:
+                        pass
+                return matches
+
+        if location in self.field_metadata:
+            fm = self.field_metadata[location]
+            dt = fm['datatype']
+
+            # take care of dates special case
+            if (dt == 'datetime' or (
+                dt == 'composite' and
+                fm['display'].get('composite_sort', '') == 'date')):
+                if location == 'date':
+                    location = 'timestamp'
+                return self.date_search(
+                    icu_lower(query), partial(self.field_iter, location, candidates))
+
+            # take care of numbers special case
+            if (dt in ('rating', 'int', 'float') or
+                    (dt == 'composite' and
+                     fm['display'].get('composite_sort', '') == 'number')):
+                field = self.dbcache.fields[location]
+                return self.num_search(
+                    icu_lower(query), partial(self.field_iter, location, candidates),
+                    location, dt, candidates, is_many=field.is_many)
+
+            # take care of the 'count' operator for is_multiples
+            if (fm['is_multiple'] and
+                len(query) > 1 and query[0] == '#' and query[1] in '=<>!'):
+                return self.num_search(icu_lower(query[1:]), partial(
+                        self.dbcache.fields[location].iter_counts, candidates),
+                    location, dt, candidates)
+
+        return matches
+
+
+class Search(object):
+
+    def __init__(self, all_search_locations):
+        self.all_search_locations = all_search_locations
+        self.date_search = DateSearch()
+        self.num_search = NumericSearch()
+
+    def change_locations(self, newlocs):
+        self.all_search_locations = newlocs
+
+    def __call__(self, dbcache, query, search_restriction):
+        '''
+        Return the set of ids of all records that match the specified
+        query and restriction
+        '''
+        q = ''
+        if not query or not query.strip():
+            q = search_restriction
+        else:
+            q = query
+            if search_restriction:
+                q = u'(%s) and (%s)' % (search_restriction, query)
+
+        all_book_ids = dbcache.all_book_ids(type=set)
+        if not q:
+            return all_book_ids
+
+        # We construct a new parser instance per search as pyparsing is not
+        # thread safe. On my desktop, constructing a SearchQueryParser instance
+        # takes 0.000975 seconds and restoring it from a pickle takes
+        # 0.000974 seconds.
+        sqp = Parser(
+            dbcache, all_book_ids, dbcache.pref('grouped_search_terms'),
+            self.date_search, self.num_search, prefs[ 'limit_search_columns' ],
+            prefs[ 'limit_search_columns_to' ], self.all_search_locations)
+        try:
+            ret = sqp.parse(query)
+        finally:
+            sqp.dbcache = None
+        return ret
+
--- a/src/calibre/db/tables.py
+++ b/src/calibre/db/tables.py
@ -148,11 +148,11 @@ class ManyToManyTable(ManyToOneTable):
    '''

    table_type = MANY_MANY
+    selectq = 'SELECT book, {0} FROM {1}'

    def read_maps(self, db):
        for row in db.conn.execute(
-                'SELECT book, {0} FROM {1}'.format(
-                    self.metadata['link_column'], self.link_table)):
+            self.selectq.format(self.metadata['link_column'], self.link_table)):
            if row[1] not in self.col_book_map:
                self.col_book_map[row[1]] = []
            self.col_book_map[row[1]].append(row[0])
@ -168,6 +168,8 @@ class ManyToManyTable(ManyToOneTable):

 class AuthorsTable(ManyToManyTable):

+    selectq = 'SELECT book, {0} FROM {1} ORDER BY id'
+
    def read_id_maps(self, db):
        self.alink_map = {}
        self.asort_map  = {}
--- a/src/calibre/db/tests/base.py
+++ b/src/calibre/db/tests/base.py
@ -7,8 +7,8 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-
 import unittest, os, shutil
+from future_builtins import map

 class BaseTest(unittest.TestCase):

@ -39,7 +39,10 @@ class BaseTest(unittest.TestCase):
                'ondevice_col', 'last_modified'}.union(allfk1)
        for attr in all_keys:
            if attr == 'user_metadata': continue
+            if attr == 'format_metadata': continue # TODO: Not implemented yet
            attr1, attr2 = getattr(mi1, attr), getattr(mi2, attr)
+            if attr == 'formats':
+                attr1, attr2 = map(lambda x:tuple(x) if x else (), (attr1, attr2))
            self.assertEqual(attr1, attr2,
                    '%s not the same: %r != %r'%(attr, attr1, attr2))
            if attr.startswith('#'):
--- a/src/calibre/db/tests/metadata.db
+++ b/src/calibre/db/tests/metadata.db
--- a/src/calibre/db/tests/reading.py
+++ b/src/calibre/db/tests/reading.py
@ -10,7 +10,7 @@ __docformat__ = 'restructuredtext en'
 import shutil, unittest, tempfile, datetime
 from cStringIO import StringIO

-from calibre.utils.date import local_tz
+from calibre.utils.date import utc_tz
 from calibre.db.tests.base import BaseTest

 class ReadingTest(BaseTest):
@ -37,12 +37,12 @@ class ReadingTest(BaseTest):
                    'tags': (),
                    'formats':(),
                    'identifiers': {},
-                    'timestamp': datetime.datetime(2011, 9, 7, 13, 54, 41,
-                        tzinfo=local_tz),
-                    'pubdate': datetime.datetime(2011, 9, 7, 13, 54, 41,
-                        tzinfo=local_tz),
-                    'last_modified': datetime.datetime(2011, 9, 7, 13, 54, 41,
-                        tzinfo=local_tz),
+                    'timestamp': datetime.datetime(2011, 9, 7, 19, 54, 41,
+                        tzinfo=utc_tz),
+                    'pubdate': datetime.datetime(2011, 9, 7, 19, 54, 41,
+                        tzinfo=utc_tz),
+                    'last_modified': datetime.datetime(2011, 9, 7, 19, 54, 41,
+                        tzinfo=utc_tz),
                    'publisher': None,
                    'languages': (),
                    'comments': None,
@ -63,23 +63,23 @@ class ReadingTest(BaseTest):
                    'sort': 'One',
                    'authors': ('Author One',),
                    'author_sort': 'One, Author',
-                    'series' : 'Series One',
+                    'series' : 'A Series One',
                    'series_index': 1.0,
-                    'tags':('Tag Two', 'Tag One'),
+                    'tags':('Tag One', 'Tag Two'),
                    'formats': (),
                    'rating': 4.0,
                    'identifiers': {'test':'one'},
-                    'timestamp': datetime.datetime(2011, 9, 5, 15, 6,
-                        tzinfo=local_tz),
-                    'pubdate': datetime.datetime(2011, 9, 5, 15, 6,
-                        tzinfo=local_tz),
+                    'timestamp': datetime.datetime(2011, 9, 5, 21, 6,
+                        tzinfo=utc_tz),
+                    'pubdate': datetime.datetime(2011, 9, 5, 21, 6,
+                        tzinfo=utc_tz),
                    'publisher': 'Publisher One',
                    'languages': ('eng',),
                    'comments': '<p>Comments One</p>',
                    '#enum':'One',
                    '#authors':('Custom One', 'Custom Two'),
-                    '#date':datetime.datetime(2011, 9, 5, 0, 0,
-                        tzinfo=local_tz),
+                    '#date':datetime.datetime(2011, 9, 5, 6, 0,
+                        tzinfo=utc_tz),
                    '#rating':2.0,
                    '#series':'My Series One',
                    '#series_index': 1.0,
@ -92,23 +92,23 @@ class ReadingTest(BaseTest):
                    'sort': 'Title Two',
                    'authors': ('Author Two', 'Author One'),
                    'author_sort': 'Two, Author & One, Author',
-                    'series' : 'Series One',
+                    'series' : 'A Series One',
                    'series_index': 2.0,
                    'rating': 6.0,
                    'tags': ('Tag One',),
                    'formats':(),
                    'identifiers': {'test':'two'},
-                    'timestamp': datetime.datetime(2011, 9, 6, 0, 0,
-                        tzinfo=local_tz),
-                    'pubdate': datetime.datetime(2011, 8, 5, 0, 0,
-                        tzinfo=local_tz),
+                    'timestamp': datetime.datetime(2011, 9, 6, 6, 0,
+                        tzinfo=utc_tz),
+                    'pubdate': datetime.datetime(2011, 8, 5, 6, 0,
+                        tzinfo=utc_tz),
                    'publisher': 'Publisher Two',
                    'languages': ('deu',),
                    'comments': '<p>Comments Two</p>',
                    '#enum':'Two',
                    '#authors':('My Author Two',),
-                    '#date':datetime.datetime(2011, 9, 1, 0, 0,
-                        tzinfo=local_tz),
+                    '#date':datetime.datetime(2011, 9, 1, 6, 0,
+                        tzinfo=utc_tz),
                    '#rating':4.0,
                    '#series':'My Series Two',
                    '#series_index': 3.0,
@ -130,30 +130,31 @@ class ReadingTest(BaseTest):
        'Test sorting'
        cache = self.init_cache(self.library_path)
        for field, order in {
-                'title'  : [2, 1, 3],
-                'authors': [2, 1, 3],
-                'series' : [3, 2, 1],
-                'tags'   : [3, 1, 2],
-                'rating' : [3, 2, 1],
-                # 'identifiers': [3, 2, 1], There is no stable sort since 1 and
-                # 2 have the same identifier keys
-                # TODO: Add an empty book to the db and ensure that empty
-                # fields sort the same as they do in db2
-                'timestamp': [2, 1, 3],
-                'pubdate'  : [1, 2, 3],
-                'publisher': [3, 2, 1],
-                'last_modified': [2, 1, 3],
-                'languages': [3, 2, 1],
-                'comments': [3, 2, 1],
-                '#enum' : [3, 2, 1],
-                '#authors' : [3, 2, 1],
-                '#date': [3, 1, 2],
-                '#rating':[3, 2, 1],
-                '#series':[3, 2, 1],
-                '#tags':[3, 2, 1],
-                '#yesno':[3, 1, 2],
-                '#comments':[3, 2, 1],
-            }.iteritems():
+            'title'  : [2, 1, 3],
+            'authors': [2, 1, 3],
+            'series' : [3, 1, 2],
+            'tags'   : [3, 1, 2],
+            'rating' : [3, 2, 1],
+            # 'identifiers': [3, 2, 1], There is no stable sort since 1 and
+            # 2 have the same identifier keys
+            # 'last_modified': [3, 2, 1], There is no stable sort as two
+            # records have the exact same value
+            'timestamp': [2, 1, 3],
+            'pubdate'  : [1, 2, 3],
+            'publisher': [3, 2, 1],
+            'languages': [3, 2, 1],
+            'comments': [3, 2, 1],
+            '#enum' : [3, 2, 1],
+            '#authors' : [3, 2, 1],
+            '#date': [3, 1, 2],
+            '#rating':[3, 2, 1],
+            '#series':[3, 2, 1],
+            '#tags':[3, 2, 1],
+            '#yesno':[3, 1, 2],
+            '#comments':[3, 2, 1],
+            # TODO: Add an empty book to the db and ensure that empty
+            # fields sort the same as they do in db2
+        }.iteritems():
            x = list(reversed(order))
            self.assertEqual(order, cache.multisort([(field, True)],
                ids_to_sort=x),
@ -190,6 +191,37 @@ class ReadingTest(BaseTest):

    # }}}

+    def test_searching(self): # {{{
+        'Test searching returns the same data for both backends'
+        from calibre.library.database2 import LibraryDatabase2
+        old = LibraryDatabase2(self.library_path)
+        oldvals = {query:set(old.search_getting_ids(query, '')) for query in (
+            # Date tests
+            'date:9/6/2011', 'date:true', 'date:false', 'pubdate:9/2011',
+            '#date:true', 'date:<100daysago', 'date:>9/6/2011',
+            '#date:>9/1/2011', '#date:=2011',
+
+            # Number tests
+            'rating:3', 'rating:>2', 'rating:=2', 'rating:true',
+            'rating:false', 'rating:>4', 'tags:#<2', 'tags:#>7',
+            'cover:false', 'cover:true', '#float:>11', '#float:<1k',
+            '#float:10.01', 'series_index:1', 'series_index:<3', 'id:1',
+            'id:>2',
+
+            # TODO: Tests for searching the size column and
+            # cover:true|false
+        )}
+        old = None
+
+        cache = self.init_cache(self.library_path)
+        for query, ans in oldvals.iteritems():
+            nr = cache.search(query, '')
+            self.assertEqual(ans, nr,
+                'Old result: %r != New result: %r for search: %s'%(
+                    ans, nr, query))
+
+    # }}}
+
 def tests():
    return unittest.TestLoader().loadTestsFromTestCase(ReadingTest)