diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py index b7e2f0fd2e..73c930778e 100644 --- a/src/calibre/devices/android/driver.py +++ b/src/calibre/devices/android/driver.py @@ -29,7 +29,7 @@ class ANDROID(USBMS): # Motorola 0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100], 0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216], - 0x4286 : [0x216] }, + 0x4286 : [0x216], 0x42b3 : [0x216] }, # Sony Ericsson 0xfce : { 0xd12e : [0x0100]}, diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 17f2c6705c..799bdef8e6 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -324,14 +324,16 @@ class Metadata(object): if metadata is None: traceback.print_stack() return - metadata = copy.deepcopy(metadata) - if '#value#' not in metadata: - if metadata['datatype'] == 'text' and metadata['is_multiple']: - metadata['#value#'] = [] + m = {} + for k in metadata: + m[k] = copy.copy(metadata[k]) + if '#value#' not in m: + if m['datatype'] == 'text' and m['is_multiple']: + m['#value#'] = [] else: - metadata['#value#'] = None + m['#value#'] = None _data = object.__getattribute__(self, '_data') - _data['user_metadata'][field] = metadata + _data['user_metadata'][field] = m def template_to_attribute(self, other, ops): ''' diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py index a3b4ed7afe..786f50824d 100644 --- a/src/calibre/ebooks/txt/txtml.py +++ b/src/calibre/ebooks/txt/txtml.py @@ -8,7 +8,6 @@ __docformat__ = 'restructuredtext en' Transform OEB content into plain text ''' -import os import re from lxml import etree @@ -33,6 +32,15 @@ BLOCK_STYLES = [ 'block', ] +HEADING_TAGS = [ + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', +] + SPACE_TAGS = [ 'td', 'br', @@ -47,6 +55,10 @@ class TXTMLizer(object): self.log.info('Converting XHTML to TXT...') self.oeb_book = oeb_book self.opts = opts + self.toc_ids = [] + self.last_was_heading = False + + self.create_flat_toc(self.oeb_book.toc) return self.mlize_spine() @@ -58,8 +70,11 @@ class TXTMLizer(object): stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode)) content = self.remove_newlines(content) - output += self.dump_text(etree.fromstring(content), stylizer) - output = self.cleanup_text(u''.join(output)) + output += self.dump_text(etree.fromstring(content), stylizer, item) + output += '\n\n\n\n\n\n' + output = u''.join(output) + output = u'\n'.join(l.rstrip() for l in output.splitlines()) + output = self.cleanup_text(output) return output @@ -68,6 +83,8 @@ class TXTMLizer(object): text = text.replace('\r\n', ' ') text = text.replace('\n', ' ') text = text.replace('\r', ' ') + # Condense redundant spaces created by replacing newlines with spaces. + text = re.sub(r'[ ]{2,}', ' ', text) return text @@ -80,6 +97,14 @@ class TXTMLizer(object): toc.append(u'* %s\n\n' % item.title) return ''.join(toc) + def create_flat_toc(self, nodes): + ''' + Turns a hierarchical list of TOC href's into a flat list. + ''' + for item in nodes: + self.toc_ids.append(item.href) + self.create_flat_toc(item.nodes) + def cleanup_text(self, text): self.log.debug('\tClean up text...') # Replace bad characters. @@ -92,7 +117,7 @@ class TXTMLizer(object): text = text.replace('\f+', ' ') # Single line paragraph. - text = re.sub('(?<=.)%s(?=.)' % os.linesep, ' ', text) + text = re.sub('(?<=.)\n(?=.)', ' ', text) # Remove multiple spaces. text = re.sub('[ ]{2,}', ' ', text) @@ -101,13 +126,19 @@ class TXTMLizer(object): text = re.sub('\n[ ]+\n', '\n\n', text) if self.opts.remove_paragraph_spacing: text = re.sub('\n{2,}', '\n', text) - text = re.sub('(?imu)^(?=.)', '\t', text) + text = re.sub(r'(?msu)^(?P[^\t\n]+?)$', lambda mo: u'%s\n\n' % mo.group('t'), text) + text = re.sub(r'(?msu)(?P[^\n])\n+(?P[^\t\n]+?)(?=\n)', lambda mo: '%s\n\n\n\n\n\n%s' % (mo.group('b'), mo.group('t')), text) else: - text = re.sub('\n{3,}', '\n\n', text) + text = re.sub('\n{7,}', '\n\n\n\n\n\n', text) # Replace spaces at the beginning and end of lines + # We don't replace tabs because those are only added + # when remove paragraph spacing is enabled. text = re.sub('(?imu)^[ ]+', '', text) text = re.sub('(?imu)[ ]+$', '', text) + + # Remove empty space and newlines at the beginning of the document. + text = re.sub(r'(?u)^[ \n]+', '', text) if self.opts.max_line_length: max_length = self.opts.max_line_length @@ -145,13 +176,11 @@ class TXTMLizer(object): return text - def dump_text(self, elem, stylizer, end=''): + def dump_text(self, elem, stylizer, page): ''' @elem: The element in the etree that we are working on. @stylizer: The style information attached to the element. - @end: The last two characters of the text from the previous element. - This is used to determine if a blank line is needed when starting - a new block element. + @page: OEB page used to determine absolute urls. ''' if not isinstance(elem.tag, basestring) \ @@ -170,13 +199,22 @@ class TXTMLizer(object): return [''] tag = barename(elem.tag) + tag_id = elem.attrib.get('id', None) in_block = False + in_heading = False + + # Are we in a heading? + # This can either be a heading tag or a TOC item. + if tag in HEADING_TAGS or '%s#%s' % (page.href, tag_id) in self.toc_ids: + in_heading = True + if not self.last_was_heading: + text.append('\n\n\n\n\n\n') # Are we in a paragraph block? if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES: + if self.opts.remove_paragraph_spacing and not in_heading: + text.append(u'\t') in_block = True - if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text: - text.append(u'\n\n') if tag in SPACE_TAGS: text.append(u' ') @@ -185,14 +223,17 @@ class TXTMLizer(object): if hasattr(elem, 'text') and elem.text: text.append(elem.text) + # Recurse down into tags within the tag we are in. for item in elem: - en = u'' - if len(text) >= 2: - en = text[-1][-2:] - text += self.dump_text(item, stylizer, en) + text += self.dump_text(item, stylizer, page) if in_block: text.append(u'\n\n') + if in_heading: + text.append(u'\n') + self.last_was_heading = True + else: + self.last_was_heading = False if hasattr(elem, 'tail') and elem.tail: text.append(elem.tail) diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 6d289a3e5c..944ce03305 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -637,7 +637,7 @@ class DeviceMixin(object): # {{{ self.device_manager.mount_device(kls=FOLDER_DEVICE, kind='folder', path=dir) def connect_to_bambook(self): - self.device_manager.mount_device(kls=BAMBOOKWifi, kind='bambook', + self.device_manager.mount_device(kls=BAMBOOKWifi, kind='bambook', path=BAMBOOK.settings().extra_customization) def connect_to_itunes(self): @@ -1266,8 +1266,8 @@ class DeviceMixin(object): # {{{ # Force a reset if the caches are not initialized if reset or not hasattr(self, 'db_book_title_cache'): # Build a cache (map) of the library, so the search isn't On**2 - self.db_book_title_cache = {} - self.db_book_uuid_cache = {} + db_book_title_cache = {} + db_book_uuid_cache = {} # It might be possible to get here without having initialized the # library view. In this case, simply give up try: @@ -1278,8 +1278,8 @@ class DeviceMixin(object): # {{{ for id in db.data.iterallids(): mi = db.get_metadata(id, index_is_id=True) title = clean_string(mi.title) - if title not in self.db_book_title_cache: - self.db_book_title_cache[title] = \ + if title not in db_book_title_cache: + db_book_title_cache[title] = \ {'authors':{}, 'author_sort':{}, 'db_ids':{}} # If there are multiple books in the library with the same title # and author, then remember the last one. That is OK, because as @@ -1287,12 +1287,14 @@ class DeviceMixin(object): # {{{ # as another. if mi.authors: authors = clean_string(authors_to_string(mi.authors)) - self.db_book_title_cache[title]['authors'][authors] = mi + db_book_title_cache[title]['authors'][authors] = mi if mi.author_sort: aus = clean_string(mi.author_sort) - self.db_book_title_cache[title]['author_sort'][aus] = mi - self.db_book_title_cache[title]['db_ids'][mi.application_id] = mi - self.db_book_uuid_cache[mi.uuid] = mi + db_book_title_cache[title]['author_sort'][aus] = mi + db_book_title_cache[title]['db_ids'][mi.application_id] = mi + db_book_uuid_cache[mi.uuid] = mi + self.db_book_title_cache = db_book_title_cache + self.db_book_uuid_cache = db_book_uuid_cache # Now iterate through all the books on the device, setting the # in_library field. If the UUID matches a book in the library, then diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index 980c9f1fa9..0763318912 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -181,7 +181,7 @@ class ResultCache(SearchQueryParser): # {{{ self.search_restriction = '' self.field_metadata = field_metadata self.all_search_locations = field_metadata.get_search_terms() - SearchQueryParser.__init__(self, self.all_search_locations) + SearchQueryParser.__init__(self, self.all_search_locations, optimize=True) self.build_date_relop_dict() self.build_numeric_relop_dict() @@ -264,7 +264,7 @@ class ResultCache(SearchQueryParser): # {{{ '<=':[2, relop_le] } - def get_dates_matches(self, location, query): + def get_dates_matches(self, location, query, candidates): matches = set([]) if len(query) < 2: return matches @@ -274,13 +274,15 @@ class ResultCache(SearchQueryParser): # {{{ loc = self.field_metadata[location]['rec_index'] if query == 'false': - for item in self._data: + for id_ in candidates: + item = self._data[id_] if item is None: continue if item[loc] is None or item[loc] <= UNDEFINED_DATE: matches.add(item[0]) return matches if query == 'true': - for item in self._data: + for id_ in candidates: + item = self._data[id_] if item is None: continue if item[loc] is not None and item[loc] > UNDEFINED_DATE: matches.add(item[0]) @@ -319,7 +321,8 @@ class ResultCache(SearchQueryParser): # {{{ field_count = query.count('-') + 1 else: field_count = query.count('/') + 1 - for item in self._data: + for id_ in candidates: + item = self._data[id_] if item is None or item[loc] is None: continue if relop(item[loc], qd, field_count): matches.add(item[0]) @@ -335,7 +338,7 @@ class ResultCache(SearchQueryParser): # {{{ '<=':[2, lambda r, q: r <= q] } - def get_numeric_matches(self, location, query, val_func = None): + def get_numeric_matches(self, location, query, candidates, val_func = None): matches = set([]) if len(query) == 0: return matches @@ -381,7 +384,8 @@ class ResultCache(SearchQueryParser): # {{{ except: return matches - for item in self._data: + for id_ in candidates: + item = self._data[id_] if item is None: continue v = val_func(item) @@ -393,8 +397,13 @@ class ResultCache(SearchQueryParser): # {{{ matches.add(item[0]) return matches - def get_matches(self, location, query, allow_recursion=True): + def get_matches(self, location, query, allow_recursion=True, candidates=None): matches = set([]) + if candidates is None: + candidates = self.universal_set() + if len(candidates) == 0: + return matches + if query and query.strip(): # get metadata key associated with the search term. Eliminates # dealing with plurals and other aliases @@ -476,7 +485,8 @@ class ResultCache(SearchQueryParser): # {{{ else: q = query - for item in self._data: + for id_ in candidates: + item = self._data[id_] if item is None: continue if col_datatype[loc] == 'bool': # complexity caused by the two-/three-value tweak diff --git a/src/calibre/library/custom_columns.py b/src/calibre/library/custom_columns.py index ba218c3ecc..d925f7c91d 100644 --- a/src/calibre/library/custom_columns.py +++ b/src/calibre/library/custom_columns.py @@ -195,8 +195,8 @@ class CustomColumns(object): data = self.custom_column_num_map[num] row = self.data._data[idx] if index_is_id else self.data[idx] ans = row[self.FIELD_MAP[data['num']]] - if data['is_multiple'] and data['datatype'] == 'text': - ans = ans.split('|') if ans else [] + if ans and data['is_multiple'] and data['datatype'] == 'text': + ans = ans.split('|') if data['display'].get('sort_alpha', False): ans.sort(cmp=lambda x,y:cmp(x.lower(), y.lower())) return ans diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 611aa1cc89..138560020e 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -256,7 +256,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): 'pubdate', 'flags', 'uuid', - 'has_cover' + 'has_cover', + ('au_map', 'authors', 'author', 'aum_sortconcat(link.id, authors.name, authors.sort)') ] lines = [] for col in columns: @@ -273,9 +274,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'timestamp':3, 'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8, - 'publisher':9, 'series_index':10, - 'sort':11, 'author_sort':12, 'formats':13, 'isbn':14, 'path':15, - 'lccn':16, 'pubdate':17, 'flags':18, 'uuid':19, 'cover':20} + 'publisher':9, 'series_index':10, 'sort':11, 'author_sort':12, + 'formats':13, 'isbn':14, 'path':15, 'lccn':16, 'pubdate':17, + 'flags':18, 'uuid':19, 'cover':20, 'au_map':21} for k,v in self.FIELD_MAP.iteritems(): self.field_metadata.set_field_record_index(k, v, prefer_custom=False) @@ -687,9 +688,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): Convenience method to return metadata as a :class:`Metadata` object. Note that the list of formats is not verified. ''' + row = self.data._data[idx] if index_is_id else self.data[idx] + fm = self.FIELD_MAP + self.gm_count += 1 - mi = self.data.get(idx, self.FIELD_MAP['all_metadata'], - row_is_id = index_is_id) + mi = row[self.FIELD_MAP['all_metadata']] if mi is not None: if get_cover: # Always get the cover, because the value can be wrong if the @@ -699,49 +702,46 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.gm_missed += 1 mi = Metadata(None) - self.data.set(idx, self.FIELD_MAP['all_metadata'], mi, - row_is_id = index_is_id) + self.data.set(idx, fm['all_metadata'], mi, row_is_id = index_is_id) - aut_list = self.authors_with_sort_strings(idx, index_is_id=index_is_id) + aut_list = row[fm['au_map']] + aut_list = [p.split(':::') for p in aut_list.split(':#:')] aum = [] aus = {} for (author, author_sort) in aut_list: aum.append(author) - aus[author] = author_sort - mi.title = self.title(idx, index_is_id=index_is_id) + aus[author] = author_sort.replace('|', ',') + mi.title = row[fm['title']] mi.authors = aum - mi.author_sort = self.author_sort(idx, index_is_id=index_is_id) + mi.author_sort = row[fm['author_sort']] mi.author_sort_map = aus - mi.comments = self.comments(idx, index_is_id=index_is_id) - mi.publisher = self.publisher(idx, index_is_id=index_is_id) - mi.timestamp = self.timestamp(idx, index_is_id=index_is_id) - mi.pubdate = self.pubdate(idx, index_is_id=index_is_id) - mi.uuid = self.uuid(idx, index_is_id=index_is_id) - mi.title_sort = self.title_sort(idx, index_is_id=index_is_id) - mi.formats = self.formats(idx, index_is_id=index_is_id, - verify_formats=False) - if hasattr(mi.formats, 'split'): - mi.formats = mi.formats.split(',') - else: - mi.formats = None - tags = self.tags(idx, index_is_id=index_is_id) + mi.comments = row[fm['comments']] + mi.publisher = row[fm['publisher']] + mi.timestamp = row[fm['timestamp']] + mi.pubdate = row[fm['pubdate']] + mi.uuid = row[fm['uuid']] + mi.title_sort = row[fm['sort']] + formats = row[fm['formats']] + if not formats: + formats = None + mi.formats = formats + tags = row[fm['tags']] if tags: mi.tags = [i.strip() for i in tags.split(',')] - mi.series = self.series(idx, index_is_id=index_is_id) + mi.series = row[fm['series']] if mi.series: - mi.series_index = self.series_index(idx, index_is_id=index_is_id) - mi.rating = self.rating(idx, index_is_id=index_is_id) - mi.isbn = self.isbn(idx, index_is_id=index_is_id) + mi.series_index = row[fm['series_index']] + mi.rating = row[fm['rating']] + mi.isbn = row[fm['isbn']] id = idx if index_is_id else self.id(idx) mi.application_id = id mi.id = id - for key,meta in self.field_metadata.iteritems(): - if meta['is_custom']: - mi.set_user_metadata(key, meta) - mi.set(key, val=self.get_custom(idx, label=meta['label'], - index_is_id=index_is_id), - extra=self.get_custom_extra(idx, label=meta['label'], - index_is_id=index_is_id)) + for key, meta in self.field_metadata.custom_iteritems(): + mi.set_user_metadata(key, meta) + mi.set(key, val=self.get_custom(idx, label=meta['label'], + index_is_id=index_is_id), + extra=self.get_custom_extra(idx, label=meta['label'], + index_is_id=index_is_id)) if get_cover: mi.cover = self.cover(id, index_is_id=True, as_path=True) return mi @@ -877,18 +877,17 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): def formats(self, index, index_is_id=False, verify_formats=True): ''' Return available formats as a comma separated list or None if there are no available formats ''' - id = index if index_is_id else self.id(index) - try: - formats = self.conn.get('SELECT format FROM data WHERE book=?', (id,)) - formats = map(lambda x:x[0], formats) - except: + id_ = index if index_is_id else self.id(index) + formats = self.data.get(id_, self.FIELD_MAP['formats'], row_is_id=True) + if not formats: return None if not verify_formats: - return ','.join(formats) + return formats + formats = formats.split(',') ans = [] - for format in formats: - if self.format_abspath(id, format, index_is_id=True) is not None: - ans.append(format) + for fmt in formats: + if self.format_abspath(id_, fmt, index_is_id=True) is not None: + ans.append(fmt) if not ans: return None return ','.join(ans) @@ -1607,6 +1606,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): ','.join([a.replace(',', '|') for a in authors]), row_is_id=True) self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True) + aum = self.authors_with_sort_strings(id, index_is_id=True) + self.data.set(id, self.FIELD_MAP['au_map'], + ':#:'.join([':::'.join((au.replace(',', '|'), aus)) for (au, aus) in aum]), + row_is_id=True) def set_authors(self, id, authors, notify=True, commit=True): ''' diff --git a/src/calibre/library/field_metadata.py b/src/calibre/library/field_metadata.py index 1be6604d5d..676eb13d2b 100644 --- a/src/calibre/library/field_metadata.py +++ b/src/calibre/library/field_metadata.py @@ -180,6 +180,15 @@ class FieldMetadata(dict): 'search_terms':['author_sort'], 'is_custom':False, 'is_category':False}), + ('au_map', {'table':None, + 'column':None, + 'datatype':'text', + 'is_multiple':',', + 'kind':'field', + 'name':None, + 'search_terms':[], + 'is_custom':False, + 'is_category':False}), ('comments', {'table':None, 'column':None, 'datatype':'text', @@ -400,6 +409,12 @@ class FieldMetadata(dict): for key in self._tb_cats: yield (key, self._tb_cats[key]) + def custom_iteritems(self): + for key in self._tb_cats: + fm = self._tb_cats[key] + if fm['is_custom']: + yield (key, self._tb_cats[key]) + def items(self): return list(self.iteritems()) diff --git a/src/calibre/library/sqlite.py b/src/calibre/library/sqlite.py index 0458ada27b..75856dd0f6 100644 --- a/src/calibre/library/sqlite.py +++ b/src/calibre/library/sqlite.py @@ -87,6 +87,23 @@ class SortedConcatenate(object): class SafeSortedConcatenate(SortedConcatenate): sep = '|' +class AumSortedConcatenate(object): + '''String concatenation aggregator for the author sort map''' + def __init__(self): + self.ans = {} + + def step(self, ndx, author, sort): + if author is not None: + self.ans[ndx] = author + ':::' + sort + + def finalize(self): + keys = self.ans.keys() + if len(keys) == 0: + return None + if len(keys) == 1: + return self.ans[keys[0]] + return ':#:'.join([self.ans[v] for v in sorted(keys)]) + class Connection(sqlite.Connection): def get(self, *args, **kw): @@ -155,6 +172,7 @@ class DBThread(Thread): c_ext_loaded = load_c_extensions(self.conn) self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row) self.conn.create_aggregate('concat', 1, Concatenate) + self.conn.create_aggregate('aum_sortconcat', 3, AumSortedConcatenate) if not c_ext_loaded: self.conn.create_aggregate('sortconcat', 2, SortedConcatenate) self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate) diff --git a/src/calibre/utils/search_query_parser.py b/src/calibre/utils/search_query_parser.py index db7c7bde5f..447ff8cd14 100644 --- a/src/calibre/utils/search_query_parser.py +++ b/src/calibre/utils/search_query_parser.py @@ -118,8 +118,9 @@ class SearchQueryParser(object): failed.append(test[0]) return failed - def __init__(self, locations, test=False): + def __init__(self, locations, test=False, optimize=False): self._tests_failed = False + self.optimize = optimize # Define a token standard_locations = map(lambda x : CaselessLiteral(x)+Suppress(':'), locations) @@ -182,38 +183,52 @@ class SearchQueryParser(object): # empty the list of searches used for recursion testing self.recurse_level = 0 self.searches_seen = set([]) - return self._parse(query) + candidates = self.universal_set() + return self._parse(query, candidates) # this parse is used internally because it doesn't clear the # recursive search test list. However, we permit seeing the # same search a few times because the search might appear within # another search. - def _parse(self, query): + def _parse(self, query, candidates=None): self.recurse_level += 1 res = self._parser.parseString(query)[0] - t = self.evaluate(res) + if candidates is None: + candidates = self.universal_set() + t = self.evaluate(res, candidates) self.recurse_level -= 1 return t def method(self, group_name): return getattr(self, 'evaluate_'+group_name) - def evaluate(self, parse_result): - return self.method(parse_result.getName())(parse_result) + def evaluate(self, parse_result, candidates): + return self.method(parse_result.getName())(parse_result, candidates) - def evaluate_and(self, argument): - return self.evaluate(argument[0]).intersection(self.evaluate(argument[1])) + def evaluate_and(self, argument, candidates): + # RHS checks only those items matched by LHS + # returns result of RHS check: RHmatches(LHmatches(c)) + # return self.evaluate(argument[0]).intersection(self.evaluate(argument[1])) + l = self.evaluate(argument[0], candidates) + return l.intersection(self.evaluate(argument[1], l)) - def evaluate_or(self, argument): - return self.evaluate(argument[0]).union(self.evaluate(argument[1])) + def evaluate_or(self, argument, candidates): + # RHS checks only those elements not matched by LHS + # returns LHS union RHS: LHmatches(c) + RHmatches(c-LHmatches(c)) + # return self.evaluate(argument[0]).union(self.evaluate(argument[1])) + l = self.evaluate(argument[0], candidates) + return l.union(self.evaluate(argument[1], candidates.difference(l))) - def evaluate_not(self, argument): - return self.universal_set().difference(self.evaluate(argument[0])) + def evaluate_not(self, argument, candidates): + # unary op checks only candidates. Result: list of items matching + # returns: c - matches(c) + # return self.universal_set().difference(self.evaluate(argument[0])) + return candidates.difference(self.evaluate(argument[0], candidates)) - def evaluate_parenthesis(self, argument): - return self.evaluate(argument[0]) + def evaluate_parenthesis(self, argument, candidates): + return self.evaluate(argument[0], candidates) - def evaluate_token(self, argument): + def evaluate_token(self, argument, candidates): location = argument[0] query = argument[1] if location.lower() == 'search': @@ -224,17 +239,27 @@ class SearchQueryParser(object): raise ParseException(query, len(query), 'undefined saved search', self) if self.recurse_level > 5: self.searches_seen.add(query) - return self._parse(saved_searches().lookup(query)) + return self._parse(saved_searches().lookup(query), candidates) except: # convert all exceptions (e.g., missing key) to a parse error raise ParseException(query, len(query), 'undefined saved search', self) - return self.get_matches(location, query) + return self._get_matches(location, query, candidates) - def get_matches(self, location, query): + def _get_matches(self, location, query, candidates): + if self.optimize: + return self.get_matches(location, query, candidates=candidates) + else: + return self.get_matches(location, query) + + def get_matches(self, location, query, candidates=None): ''' Should return the set of matches for :param:'location` and :param:`query`. + The search must be performed over all entries is :param:`candidates` is + None otherwise only over the items in candidates. + :param:`location` is one of the items in :member:`SearchQueryParser.DEFAULT_LOCATIONS`. :param:`query` is a string literal. + :param: None or a subset of the set returned by :meth:`universal_set`. ''' return set([]) @@ -561,7 +586,7 @@ class Tester(SearchQueryParser): def universal_set(self): return self._universal_set - def get_matches(self, location, query): + def get_matches(self, location, query, candidates=None): location = location.lower() if location in self.fields.keys(): getter = operator.itemgetter(self.fields[location]) @@ -573,8 +598,13 @@ class Tester(SearchQueryParser): if not query: return set([]) query = query.lower() - return set(key for key, val in self.texts.items() \ - if query and query in getattr(getter(val), 'lower', lambda : '')()) + if candidates: + return set(key for key, val in self.texts.items() \ + if key in candidates and query and query + in getattr(getter(val), 'lower', lambda : '')()) + else: + return set(key for key, val in self.texts.items() \ + if query and query in getattr(getter(val), 'lower', lambda : '')()) @@ -592,6 +622,7 @@ class Tester(SearchQueryParser): def main(args=sys.argv): + print 'testing unoptimized' tester = Tester(['authors', 'author', 'series', 'formats', 'format', 'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover', 'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read', @@ -601,6 +632,16 @@ def main(args=sys.argv): print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<' return 1 + print '\n\ntesting optimized' + tester = Tester(['authors', 'author', 'series', 'formats', 'format', + 'publisher', 'rating', 'tags', 'tag', 'comments', 'comment', 'cover', + 'isbn', 'ondevice', 'pubdate', 'size', 'date', 'title', u'#read', + 'all', 'search'], test=True, optimize=True) + failed = tester.run_tests() + if tester._tests_failed or failed: + print '>>>>>>>>>>>>>> Tests Failed <<<<<<<<<<<<<<<' + return 1 + return 0 if __name__ == '__main__':