merge from trunk

2025-12-19 19:45:01 -05:00 · 2011-01-09 20:12:19 +08:00 · 2011-01-09 20:12:19 +08:00 · 04bbda5e5a
commit 04bbda5e5a
parent 9751f99db9 e4e2b2f467
10 changed files with 241 additions and 109 deletions
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -29,7 +29,7 @@ class ANDROID(USBMS):
            # Motorola
            0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100],
                       0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
-                       0x4286 : [0x216] },
+                       0x4286 : [0x216], 0x42b3 : [0x216] },

            # Sony Ericsson
            0xfce : { 0xd12e : [0x0100]},
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -324,14 +324,16 @@ class Metadata(object):
            if metadata is None:
                traceback.print_stack()
                return
-            metadata = copy.deepcopy(metadata)
-            if '#value#' not in metadata:
-                if metadata['datatype'] == 'text' and metadata['is_multiple']:
-                    metadata['#value#'] = []
+            m = {}
+            for k in metadata:
+                m[k] = copy.copy(metadata[k])
+            if '#value#' not in m:
+                if m['datatype'] == 'text' and m['is_multiple']:
+                    m['#value#'] = []
                else:
-                    metadata['#value#'] = None
+                    m['#value#'] = None
            _data = object.__getattribute__(self, '_data')
-            _data['user_metadata'][field] = metadata
+            _data['user_metadata'][field] = m

    def template_to_attribute(self, other, ops):
        '''
--- a/src/calibre/ebooks/txt/txtml.py
+++ b/src/calibre/ebooks/txt/txtml.py
@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en'
 Transform OEB content into plain text
 '''

-import os
 import re

 from lxml import etree
@ -33,6 +32,15 @@ BLOCK_STYLES = [
    'block',
 ]

+HEADING_TAGS = [
+    'h1',
+    'h2',
+    'h3',
+    'h4',
+    'h5',
+    'h6',
+]
+
 SPACE_TAGS = [
    'td',
    'br',
@ -47,6 +55,10 @@ class TXTMLizer(object):
        self.log.info('Converting XHTML to TXT...')
        self.oeb_book = oeb_book
        self.opts = opts
+        self.toc_ids = []
+        self.last_was_heading = False
+        
+        self.create_flat_toc(self.oeb_book.toc)

        return self.mlize_spine()

@ -58,8 +70,11 @@ class TXTMLizer(object):
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
            content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
            content = self.remove_newlines(content)
-            output += self.dump_text(etree.fromstring(content), stylizer)
-        output = self.cleanup_text(u''.join(output))
+            output += self.dump_text(etree.fromstring(content), stylizer, item)
+            output += '\n\n\n\n\n\n'
+        output = u''.join(output)
+        output = u'\n'.join(l.rstrip() for l in output.splitlines())
+        output = self.cleanup_text(output)

        return output

@ -68,6 +83,8 @@ class TXTMLizer(object):
        text = text.replace('\r\n', ' ')
        text = text.replace('\n', ' ')
        text = text.replace('\r', ' ')
+        # Condense redundant spaces created by replacing newlines with spaces.
+        text = re.sub(r'[ ]{2,}', ' ', text)

        return text

@ -80,6 +97,14 @@ class TXTMLizer(object):
                toc.append(u'* %s\n\n' % item.title)
        return ''.join(toc)

+    def create_flat_toc(self, nodes):
+        '''
+        Turns a hierarchical list of TOC href's into a flat list.
+        '''
+        for item in nodes:
+            self.toc_ids.append(item.href)
+            self.create_flat_toc(item.nodes)
+
    def cleanup_text(self, text):
        self.log.debug('\tClean up text...')
        # Replace bad characters.
@ -92,7 +117,7 @@ class TXTMLizer(object):
        text = text.replace('\f+', ' ')

        # Single line paragraph.
-        text = re.sub('(?<=.)%s(?=.)' % os.linesep, ' ', text)
+        text = re.sub('(?<=.)\n(?=.)', ' ', text)

        # Remove multiple spaces.
        text = re.sub('[ ]{2,}', ' ', text)
@ -101,14 +126,20 @@ class TXTMLizer(object):
        text = re.sub('\n[ ]+\n', '\n\n', text)
        if self.opts.remove_paragraph_spacing:
            text = re.sub('\n{2,}', '\n', text)
-            text = re.sub('(?imu)^(?=.)', '\t', text)
+            text = re.sub(r'(?msu)^(?P<t>[^\t\n]+?)$', lambda mo: u'%s\n\n' % mo.group('t'), text)
+            text = re.sub(r'(?msu)(?P<b>[^\n])\n+(?P<t>[^\t\n]+?)(?=\n)', lambda mo: '%s\n\n\n\n\n\n%s' % (mo.group('b'), mo.group('t')), text)
        else:
-            text = re.sub('\n{3,}', '\n\n', text)
+            text = re.sub('\n{7,}', '\n\n\n\n\n\n', text)

        # Replace spaces at the beginning and end of lines
+        # We don't replace tabs because those are only added
+        # when remove paragraph spacing is enabled.
        text = re.sub('(?imu)^[ ]+', '', text)
        text = re.sub('(?imu)[ ]+$', '', text)
        
+        # Remove empty space and newlines at the beginning of the document.
+        text = re.sub(r'(?u)^[ \n]+', '', text)
+
        if self.opts.max_line_length:
            max_length = self.opts.max_line_length
            if self.opts.max_line_length < 25 and not self.opts.force_max_line_length:
@ -145,13 +176,11 @@ class TXTMLizer(object):

        return text

-    def dump_text(self, elem, stylizer, end=''):
+    def dump_text(self, elem, stylizer, page):
        '''
        @elem: The element in the etree that we are working on.
        @stylizer: The style information attached to the element.
-        @end: The last two characters of the text from the previous element.
-              This is used to determine if a blank line is needed when starting
-              a new block element.
+        @page: OEB page used to determine absolute urls.
        '''

        if not isinstance(elem.tag, basestring) \
@ -170,13 +199,22 @@ class TXTMLizer(object):
            return ['']

        tag = barename(elem.tag)
+        tag_id = elem.attrib.get('id', None)
        in_block = False
+        in_heading = False
+
+        # Are we in a heading?
+        # This can either be a heading tag or a TOC item.
+        if tag in HEADING_TAGS or '%s#%s' % (page.href, tag_id) in self.toc_ids:
+            in_heading = True
+            if not self.last_was_heading:
+                text.append('\n\n\n\n\n\n')

        # Are we in a paragraph block?
        if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
+            if self.opts.remove_paragraph_spacing and not in_heading:
+                text.append(u'\t')
            in_block = True
-            if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text:
-                text.append(u'\n\n')

        if tag in SPACE_TAGS:
            text.append(u' ')
@ -185,14 +223,17 @@ class TXTMLizer(object):
        if hasattr(elem, 'text') and elem.text:
            text.append(elem.text)

+        # Recurse down into tags within the tag we are in.
        for item in elem:
-            en = u''
-            if len(text) >= 2:
-                en = text[-1][-2:]
-            text += self.dump_text(item, stylizer, en)
+            text += self.dump_text(item, stylizer, page)

        if in_block:
            text.append(u'\n\n')
+        if in_heading:
+            text.append(u'\n')
+            self.last_was_heading = True
+        else:
+            self.last_was_heading = False

        if hasattr(elem, 'tail') and elem.tail:
            text.append(elem.tail)
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -1266,8 +1266,8 @@ class DeviceMixin(object): # {{{
        # Force a reset if the caches are not initialized
        if reset or not hasattr(self, 'db_book_title_cache'):
            # Build a cache (map) of the library, so the search isn't On**2
-            self.db_book_title_cache = {}
-            self.db_book_uuid_cache = {}
+            db_book_title_cache = {}
+            db_book_uuid_cache = {}
            # It might be possible to get here without having initialized the
            # library view. In this case, simply give up
            try:
@ -1278,8 +1278,8 @@ class DeviceMixin(object): # {{{
            for id in db.data.iterallids():
                mi = db.get_metadata(id, index_is_id=True)
                title = clean_string(mi.title)
-                if title not in self.db_book_title_cache:
-                    self.db_book_title_cache[title] = \
+                if title not in db_book_title_cache:
+                    db_book_title_cache[title] = \
                                {'authors':{}, 'author_sort':{}, 'db_ids':{}}
                # If there are multiple books in the library with the same title
                # and author, then remember the last one. That is OK, because as
@ -1287,12 +1287,14 @@ class DeviceMixin(object): # {{{
                # as another.
                if mi.authors:
                    authors = clean_string(authors_to_string(mi.authors))
-                    self.db_book_title_cache[title]['authors'][authors] = mi
+                    db_book_title_cache[title]['authors'][authors] = mi
                if mi.author_sort:
                    aus = clean_string(mi.author_sort)
-                    self.db_book_title_cache[title]['author_sort'][aus] = mi
-                self.db_book_title_cache[title]['db_ids'][mi.application_id] = mi
-                self.db_book_uuid_cache[mi.uuid] = mi
+                    db_book_title_cache[title]['author_sort'][aus] = mi
+                db_book_title_cache[title]['db_ids'][mi.application_id] = mi
+                db_book_uuid_cache[mi.uuid] = mi
+            self.db_book_title_cache = db_book_title_cache
+            self.db_book_uuid_cache = db_book_uuid_cache

        # Now iterate through all the books on the device, setting the
        # in_library field. If the UUID matches a book in the library, then
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@ -181,7 +181,7 @@ class ResultCache(SearchQueryParser): # {{{
        self.search_restriction = ''
        self.field_metadata = field_metadata
        self.all_search_locations = field_metadata.get_search_terms()
-        SearchQueryParser.__init__(self, self.all_search_locations)
+        SearchQueryParser.__init__(self, self.all_search_locations, optimize=True)
        self.build_date_relop_dict()
        self.build_numeric_relop_dict()

@ -264,7 +264,7 @@ class ResultCache(SearchQueryParser): # {{{
                            '<=':[2, relop_le]
                        }

-    def get_dates_matches(self, location, query):
+    def get_dates_matches(self, location, query, candidates):
        matches = set([])
        if len(query) < 2:
            return matches
@ -274,13 +274,15 @@ class ResultCache(SearchQueryParser): # {{{
        loc = self.field_metadata[location]['rec_index']

        if query == 'false':
-            for item in self._data:
+            for id_ in candidates:
+                item = self._data[id_]
                if item is None: continue
                if item[loc] is None or item[loc] <= UNDEFINED_DATE:
                    matches.add(item[0])
            return matches
        if query == 'true':
-            for item in self._data:
+            for id_ in candidates:
+                item = self._data[id_]
                if item is None: continue
                if item[loc] is not None and item[loc] > UNDEFINED_DATE:
                    matches.add(item[0])
@ -319,7 +321,8 @@ class ResultCache(SearchQueryParser): # {{{
                field_count = query.count('-') + 1
            else:
                field_count = query.count('/') + 1
-        for item in self._data:
+        for id_ in candidates:
+            item = self._data[id_]
            if item is None or item[loc] is None: continue
            if relop(item[loc], qd, field_count):
                matches.add(item[0])
@ -335,7 +338,7 @@ class ResultCache(SearchQueryParser): # {{{
                        '<=':[2, lambda r, q: r <= q]
                    }

-    def get_numeric_matches(self, location, query, val_func = None):
+    def get_numeric_matches(self, location, query, candidates, val_func = None):
        matches = set([])
        if len(query) == 0:
            return matches
@ -381,7 +384,8 @@ class ResultCache(SearchQueryParser): # {{{
        except:
            return matches

-        for item in self._data:
+        for id_ in candidates:
+            item = self._data[id_]
            if item is None:
                continue
            v = val_func(item)
@ -393,8 +397,13 @@ class ResultCache(SearchQueryParser): # {{{
                matches.add(item[0])
        return matches

-    def get_matches(self, location, query, allow_recursion=True):
+    def get_matches(self, location, query, allow_recursion=True, candidates=None):
        matches = set([])
+        if candidates is None:
+            candidates = self.universal_set()
+        if len(candidates) == 0:
+            return matches
+
        if query and query.strip():
            # get metadata key associated with the search term. Eliminates
            # dealing with plurals and other aliases
@ -476,7 +485,8 @@ class ResultCache(SearchQueryParser): # {{{
                else:
                    q = query

-                for item in self._data:
+                for id_ in candidates:
+                    item = self._data[id_]
                    if item is None: continue

                    if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak
--- a/src/calibre/library/custom_columns.py
+++ b/src/calibre/library/custom_columns.py
@ -195,8 +195,8 @@ class CustomColumns(object):
            data = self.custom_column_num_map[num]
        row = self.data._data[idx] if index_is_id else self.data[idx]
        ans = row[self.FIELD_MAP[data['num']]]
-        if data['is_multiple'] and data['datatype'] == 'text':
-            ans = ans.split('|') if ans else []
+        if ans and data['is_multiple'] and data['datatype'] == 'text':
+            ans = ans.split('|')
            if data['display'].get('sort_alpha', False):
                ans.sort(cmp=lambda x,y:cmp(x.lower(), y.lower()))
        return ans
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -256,7 +256,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
             'pubdate',
             'flags',
             'uuid',
-             'has_cover'
+             'has_cover',
+            ('au_map', 'authors', 'author', 'aum_sortconcat(link.id, authors.name, authors.sort)')
            ]
        lines = []
        for col in columns:
@ -273,9 +274,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):

        self.FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'timestamp':3,
             'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8,
-             'publisher':9, 'series_index':10,
-             'sort':11, 'author_sort':12, 'formats':13, 'isbn':14, 'path':15,
-             'lccn':16, 'pubdate':17, 'flags':18, 'uuid':19, 'cover':20}
+             'publisher':9, 'series_index':10, 'sort':11, 'author_sort':12,
+             'formats':13, 'isbn':14, 'path':15, 'lccn':16, 'pubdate':17,
+             'flags':18, 'uuid':19, 'cover':20, 'au_map':21}

        for k,v in self.FIELD_MAP.iteritems():
            self.field_metadata.set_field_record_index(k, v, prefer_custom=False)
@ -687,9 +688,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        Convenience method to return metadata as a :class:`Metadata` object.
        Note that the list of formats is not verified.
        '''
+        row = self.data._data[idx] if index_is_id else self.data[idx]
+        fm = self.FIELD_MAP
+
        self.gm_count += 1
-        mi = self.data.get(idx, self.FIELD_MAP['all_metadata'],
-                           row_is_id = index_is_id)
+        mi = row[self.FIELD_MAP['all_metadata']]
        if mi is not None:
            if get_cover:
                # Always get the cover, because the value can be wrong if the
@ -699,49 +702,46 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):

        self.gm_missed += 1
        mi = Metadata(None)
-        self.data.set(idx, self.FIELD_MAP['all_metadata'], mi,
-                      row_is_id = index_is_id)
+        self.data.set(idx, fm['all_metadata'], mi, row_is_id = index_is_id)

-        aut_list = self.authors_with_sort_strings(idx, index_is_id=index_is_id)
+        aut_list = row[fm['au_map']]
+        aut_list = [p.split(':::') for p in aut_list.split(':#:')]
        aum = []
        aus = {}
        for (author, author_sort) in aut_list:
            aum.append(author)
-            aus[author] = author_sort
-        mi.title       = self.title(idx, index_is_id=index_is_id)
+            aus[author] = author_sort.replace('|', ',')
+        mi.title       = row[fm['title']]
        mi.authors     = aum
-        mi.author_sort = self.author_sort(idx, index_is_id=index_is_id)
+        mi.author_sort = row[fm['author_sort']]
        mi.author_sort_map = aus
-        mi.comments    = self.comments(idx, index_is_id=index_is_id)
-        mi.publisher   = self.publisher(idx, index_is_id=index_is_id)
-        mi.timestamp   = self.timestamp(idx, index_is_id=index_is_id)
-        mi.pubdate     = self.pubdate(idx, index_is_id=index_is_id)
-        mi.uuid        = self.uuid(idx, index_is_id=index_is_id)
-        mi.title_sort  = self.title_sort(idx, index_is_id=index_is_id)
-        mi.formats     = self.formats(idx, index_is_id=index_is_id,
-                                        verify_formats=False)
-        if hasattr(mi.formats, 'split'):
-            mi.formats = mi.formats.split(',')
-        else:
-            mi.formats = None
-        tags = self.tags(idx, index_is_id=index_is_id)
+        mi.comments    = row[fm['comments']]
+        mi.publisher   = row[fm['publisher']]
+        mi.timestamp   = row[fm['timestamp']]
+        mi.pubdate     = row[fm['pubdate']]
+        mi.uuid        = row[fm['uuid']]
+        mi.title_sort  = row[fm['sort']]
+        formats = row[fm['formats']]
+        if not formats:
+            formats = None
+        mi.formats = formats
+        tags = row[fm['tags']]
        if tags:
            mi.tags = [i.strip() for i in tags.split(',')]
-        mi.series = self.series(idx, index_is_id=index_is_id)
+        mi.series = row[fm['series']]
        if mi.series:
-            mi.series_index = self.series_index(idx, index_is_id=index_is_id)
-        mi.rating = self.rating(idx, index_is_id=index_is_id)
-        mi.isbn = self.isbn(idx, index_is_id=index_is_id)
+            mi.series_index = row[fm['series_index']]
+        mi.rating = row[fm['rating']]
+        mi.isbn = row[fm['isbn']]
        id = idx if index_is_id else self.id(idx)
        mi.application_id = id
        mi.id = id
-        for key,meta in self.field_metadata.iteritems():
-            if meta['is_custom']:
-                mi.set_user_metadata(key, meta)
-                mi.set(key, val=self.get_custom(idx, label=meta['label'],
-                                                index_is_id=index_is_id),
-                            extra=self.get_custom_extra(idx, label=meta['label'],
-                                                        index_is_id=index_is_id))
+        for key, meta in self.field_metadata.custom_iteritems():
+            mi.set_user_metadata(key, meta)
+            mi.set(key, val=self.get_custom(idx, label=meta['label'],
+                                            index_is_id=index_is_id),
+                        extra=self.get_custom_extra(idx, label=meta['label'],
+                                                    index_is_id=index_is_id))
        if get_cover:
            mi.cover = self.cover(id, index_is_id=True, as_path=True)
        return mi
@ -877,18 +877,17 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):

    def formats(self, index, index_is_id=False, verify_formats=True):
        ''' Return available formats as a comma separated list or None if there are no available formats '''
-        id = index if index_is_id else self.id(index)
-        try:
-            formats = self.conn.get('SELECT format FROM data WHERE book=?', (id,))
-            formats = map(lambda x:x[0], formats)
-        except:
+        id_ = index if index_is_id else self.id(index)
+        formats = self.data.get(id_, self.FIELD_MAP['formats'], row_is_id=True)
+        if not formats:
            return None
        if not verify_formats:
-            return ','.join(formats)
+            return formats
+        formats = formats.split(',')
        ans = []
-        for format in formats:
-            if self.format_abspath(id, format, index_is_id=True) is not None:
-                ans.append(format)
+        for fmt in formats:
+            if self.format_abspath(id_, fmt, index_is_id=True) is not None:
+                ans.append(fmt)
        if not ans:
            return None
        return ','.join(ans)
@ -1607,6 +1606,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
                      ','.join([a.replace(',', '|') for a in authors]),
                      row_is_id=True)
        self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True)
+        aum = self.authors_with_sort_strings(id, index_is_id=True)
+        self.data.set(id, self.FIELD_MAP['au_map'],
+            ':#:'.join([':::'.join((au.replace(',', '|'), aus)) for (au, aus) in aum]),
+            row_is_id=True)

    def set_authors(self, id, authors, notify=True, commit=True):
        '''
--- a/src/calibre/library/field_metadata.py
+++ b/src/calibre/library/field_metadata.py
@ -180,6 +180,15 @@ class FieldMetadata(dict):
                           'search_terms':['author_sort'],
                           'is_custom':False,
                           'is_category':False}),
+            ('au_map',    {'table':None,
+                           'column':None,
+                           'datatype':'text',
+                           'is_multiple':',',
+                           'kind':'field',
+                           'name':None,
+                           'search_terms':[],
+                           'is_custom':False,
+                           'is_category':False}),
            ('comments',  {'table':None,
                           'column':None,
                           'datatype':'text',
@ -400,6 +409,12 @@ class FieldMetadata(dict):
        for key in self._tb_cats:
            yield (key, self._tb_cats[key])

+    def custom_iteritems(self):
+        for key in self._tb_cats:
+            fm = self._tb_cats[key]
+            if fm['is_custom']:
+                yield (key, self._tb_cats[key])
+
    def items(self):
        return list(self.iteritems())

--- a/src/calibre/library/sqlite.py
+++ b/src/calibre/library/sqlite.py
@ -87,6 +87,23 @@ class SortedConcatenate(object):
 class SafeSortedConcatenate(SortedConcatenate):
    sep = '|'

+class AumSortedConcatenate(object):
+    '''String concatenation aggregator for the author sort map'''
+    def __init__(self):
+        self.ans = {}
+
+    def step(self, ndx, author, sort):
+        if author is not None:
+            self.ans[ndx] = author + ':::' + sort
+
+    def finalize(self):
+        keys = self.ans.keys()
+        if len(keys) == 0:
+            return None
+        if len(keys) == 1:
+            return self.ans[keys[0]]
+        return ':#:'.join([self.ans[v] for v in sorted(keys)])
+
 class Connection(sqlite.Connection):

    def get(self, *args, **kw):
@ -155,6 +172,7 @@ class DBThread(Thread):
        c_ext_loaded = load_c_extensions(self.conn)
        self.conn.row_factory = sqlite.Row if self.row_factory else  lambda cursor, row : list(row)
        self.conn.create_aggregate('concat', 1, Concatenate)
+        self.conn.create_aggregate('aum_sortconcat', 3, AumSortedConcatenate)
        if not c_ext_loaded:
            self.conn.create_aggregate('sortconcat', 2, SortedConcatenate)
            self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate)
--- a/src/calibre/utils/search_query_parser.py
+++ b/src/calibre/utils/search_query_parser.py
@ -118,8 +118,9 @@ class SearchQueryParser(object):
                failed.append(test[0])
        return failed

-    def __init__(self, locations, test=False):
+    def __init__(self, locations, test=False, optimize=False):
        self._tests_failed = False
+        self.optimize = optimize
        # Define a token
        standard_locations = map(lambda x : CaselessLiteral(x)+Suppress(':'),
                locations)
@ -182,38 +183,52 @@ class SearchQueryParser(object):
        # empty the list of searches used for recursion testing
        self.recurse_level = 0
        self.searches_seen = set([])
-        return self._parse(query)
+        candidates = self.universal_set()
+        return self._parse(query, candidates)

    # this parse is used internally because it doesn't clear the
    # recursive search test list. However, we permit seeing the
    # same search a few times because the search might appear within
    # another search.
-    def _parse(self, query):
+    def _parse(self, query, candidates=None):
        self.recurse_level += 1
        res = self._parser.parseString(query)[0]
-        t = self.evaluate(res)
+        if candidates is None:
+            candidates = self.universal_set()
+        t = self.evaluate(res, candidates)
        self.recurse_level -= 1
        return t

    def method(self, group_name):
        return getattr(self, 'evaluate_'+group_name)

-    def evaluate(self, parse_result):
-        return self.method(parse_result.getName())(parse_result)
+    def evaluate(self, parse_result, candidates):
+        return self.method(parse_result.getName())(parse_result, candidates)

-    def evaluate_and(self, argument):
-        return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
+    def evaluate_and(self, argument, candidates):
+        # RHS checks only those items matched by LHS
+        # returns result of RHS check: RHmatches(LHmatches(c))
+        #  return self.evaluate(argument[0]).intersection(self.evaluate(argument[1]))
+        l = self.evaluate(argument[0], candidates)
+        return l.intersection(self.evaluate(argument[1], l))

-    def evaluate_or(self, argument):
-        return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
+    def evaluate_or(self, argument, candidates):
+        # RHS checks only those elements not matched by LHS
+        # returns LHS union RHS: LHmatches(c) + RHmatches(c-LHmatches(c))
+        #  return self.evaluate(argument[0]).union(self.evaluate(argument[1]))
+        l = self.evaluate(argument[0], candidates)
+        return l.union(self.evaluate(argument[1], candidates.difference(l)))

-    def evaluate_not(self, argument):
-        return self.universal_set().difference(self.evaluate(argument[0]))
+    def evaluate_not(self, argument, candidates):
+        # unary op checks only candidates. Result: list of items matching
+        # returns: c - matches(c)
+        #  return self.universal_set().difference(self.evaluate(argument[0]))
+        return candidates.difference(self.evaluate(argument[0], candidates))

-    def evaluate_parenthesis(self, argument):
-        return self.evaluate(argument[0])
+    def evaluate_parenthesis(self, argument, candidates):
+        return self.evaluate(argument[0], candidates)

-    def evaluate_token(self, argument):
+    def evaluate_token(self, argument, candidates):
        location = argument[0]
        query = argument[1]
        if location.lower() == 'search':
@ -224,17 +239,27 @@ class SearchQueryParser(object):
                    raise ParseException(query, len(query), 'undefined saved search', self)
                if self.recurse_level > 5:
                    self.searches_seen.add(query)
-                return self._parse(saved_searches().lookup(query))
+                return self._parse(saved_searches().lookup(query), candidates)
            except: # convert all exceptions (e.g., missing key) to a parse error
                raise ParseException(query, len(query), 'undefined saved search', self)
-        return self.get_matches(location, query)
+        return self._get_matches(location, query, candidates)

-    def get_matches(self, location, query):
+    def _get_matches(self, location, query, candidates):
+        if self.optimize:
+            return self.get_matches(location, query, candidates=candidates)
+        else:
+            return self.get_matches(location, query)
+
+    def get_matches(self, location, query, candidates=None):
        '''
        Should return the set of matches for :param:'location` and :param:`query`.

+        The search must be performed over all entries is :param:`candidates` is
+        None otherwise only over the items in candidates.
+
        :param:`location` is one of the items in :member:`SearchQueryParser.DEFAULT_LOCATIONS`.
        :param:`query` is a string literal.
+        :param: None or a subset of the set returned by :meth:`universal_set`.
        '''
        return set([])

@ -561,7 +586,7 @@ class Tester(SearchQueryParser):
    def universal_set(self):
        return self._universal_set

-    def get_matches(self, location, query):
+    def get_matches(self, location, query, candidates=None):
        location = location.lower()
        if location in self.fields.keys():
            getter = operator.itemgetter(self.fields[location])
@ -573,8 +598,13 @@ class Tester(SearchQueryParser):
        if not query:
            return set([])
        query = query.lower()
-        return set(key for key, val in self.texts.items() \
-            if query and query in getattr(getter(val), 'lower', lambda : '')())
+        if candidates:
+            return set(key for key, val in self.texts.items() \
+                if key in candidates and query and query
+                        in getattr(getter(val), 'lower', lambda : '')())
+        else:
+            return set(key for key, val in self.texts.items() \
+                if query and query in getattr(getter(val), 'lower', lambda : '')())



@ -592,6 +622,7 @@ class Tester(SearchQueryParser):


 def main(args=sys.argv):
+    print 'testing unoptimized'
    tester = Tester(['authors', 'author', 'series', 'formats', 'format',
        'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
        'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
@ -601,6 +632,16 @@ def main(args=sys.argv):
        print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
        return 1

+    print '\n\ntesting optimized'
+    tester = Tester(['authors', 'author', 'series', 'formats', 'format',
+        'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover',
+        'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read',
+        'all', 'search'], test=True, optimize=True)
+    failed = tester.run_tests()
+    if tester._tests_failed or failed:
+        print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<'
+        return 1
+
    return 0

 if __name__ == '__main__':