From 90f973e7ac999bd7d143b5152cd9fcbe2e9c1f3a Mon Sep 17 00:00:00 2001 From: Charles Haley <> Date: Thu, 6 Jan 2011 13:30:40 +0000 Subject: [PATCH] Improve performance of get_metadata. Up to 4 times faster when connecting to a device --- src/calibre/ebooks/metadata/book/base.py | 14 +++-- src/calibre/gui2/device.py | 20 +++--- src/calibre/library/custom_columns.py | 4 +- src/calibre/library/database2.py | 78 +++++++++++++----------- src/calibre/library/field_metadata.py | 15 +++++ 5 files changed, 79 insertions(+), 52 deletions(-) diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 17f2c6705c..799bdef8e6 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -324,14 +324,16 @@ class Metadata(object): if metadata is None: traceback.print_stack() return - metadata = copy.deepcopy(metadata) - if '#value#' not in metadata: - if metadata['datatype'] == 'text' and metadata['is_multiple']: - metadata['#value#'] = [] + m = {} + for k in metadata: + m[k] = copy.copy(metadata[k]) + if '#value#' not in m: + if m['datatype'] == 'text' and m['is_multiple']: + m['#value#'] = [] else: - metadata['#value#'] = None + m['#value#'] = None _data = object.__getattribute__(self, '_data') - _data['user_metadata'][field] = metadata + _data['user_metadata'][field] = m def template_to_attribute(self, other, ops): ''' diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 6d289a3e5c..944ce03305 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -637,7 +637,7 @@ class DeviceMixin(object): # {{{ self.device_manager.mount_device(kls=FOLDER_DEVICE, kind='folder', path=dir) def connect_to_bambook(self): - self.device_manager.mount_device(kls=BAMBOOKWifi, kind='bambook', + self.device_manager.mount_device(kls=BAMBOOKWifi, kind='bambook', path=BAMBOOK.settings().extra_customization) def connect_to_itunes(self): @@ -1266,8 +1266,8 @@ class DeviceMixin(object): # {{{ # Force a reset if the caches are not initialized if reset or not hasattr(self, 'db_book_title_cache'): # Build a cache (map) of the library, so the search isn't On**2 - self.db_book_title_cache = {} - self.db_book_uuid_cache = {} + db_book_title_cache = {} + db_book_uuid_cache = {} # It might be possible to get here without having initialized the # library view. In this case, simply give up try: @@ -1278,8 +1278,8 @@ class DeviceMixin(object): # {{{ for id in db.data.iterallids(): mi = db.get_metadata(id, index_is_id=True) title = clean_string(mi.title) - if title not in self.db_book_title_cache: - self.db_book_title_cache[title] = \ + if title not in db_book_title_cache: + db_book_title_cache[title] = \ {'authors':{}, 'author_sort':{}, 'db_ids':{}} # If there are multiple books in the library with the same title # and author, then remember the last one. That is OK, because as @@ -1287,12 +1287,14 @@ class DeviceMixin(object): # {{{ # as another. if mi.authors: authors = clean_string(authors_to_string(mi.authors)) - self.db_book_title_cache[title]['authors'][authors] = mi + db_book_title_cache[title]['authors'][authors] = mi if mi.author_sort: aus = clean_string(mi.author_sort) - self.db_book_title_cache[title]['author_sort'][aus] = mi - self.db_book_title_cache[title]['db_ids'][mi.application_id] = mi - self.db_book_uuid_cache[mi.uuid] = mi + db_book_title_cache[title]['author_sort'][aus] = mi + db_book_title_cache[title]['db_ids'][mi.application_id] = mi + db_book_uuid_cache[mi.uuid] = mi + self.db_book_title_cache = db_book_title_cache + self.db_book_uuid_cache = db_book_uuid_cache # Now iterate through all the books on the device, setting the # in_library field. If the UUID matches a book in the library, then diff --git a/src/calibre/library/custom_columns.py b/src/calibre/library/custom_columns.py index ba218c3ecc..d925f7c91d 100644 --- a/src/calibre/library/custom_columns.py +++ b/src/calibre/library/custom_columns.py @@ -195,8 +195,8 @@ class CustomColumns(object): data = self.custom_column_num_map[num] row = self.data._data[idx] if index_is_id else self.data[idx] ans = row[self.FIELD_MAP[data['num']]] - if data['is_multiple'] and data['datatype'] == 'text': - ans = ans.split('|') if ans else [] + if ans and data['is_multiple'] and data['datatype'] == 'text': + ans = ans.split('|') if data['display'].get('sort_alpha', False): ans.sort(cmp=lambda x,y:cmp(x.lower(), y.lower())) return ans diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 611aa1cc89..96f21b88ee 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -256,7 +256,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): 'pubdate', 'flags', 'uuid', - 'has_cover' + 'has_cover', + '''(SELECT group_concat(authors.name || ':::' || authors.sort, ':#:') + FROM authors, books_authors_link as bl + WHERE bl.book=books.id and authors.id=bl.author + ORDER BY bl.id) au_map''', + '(SELECT group_concat(format) FROM data WHERE book=books.id) formats' ] lines = [] for col in columns: @@ -275,7 +280,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): 'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8, 'publisher':9, 'series_index':10, 'sort':11, 'author_sort':12, 'formats':13, 'isbn':14, 'path':15, - 'lccn':16, 'pubdate':17, 'flags':18, 'uuid':19, 'cover':20} + 'lccn':16, 'pubdate':17, 'flags':18, 'uuid':19, 'cover':20, + 'au_map':21, 'formats':22} for k,v in self.FIELD_MAP.iteritems(): self.field_metadata.set_field_record_index(k, v, prefer_custom=False) @@ -687,9 +693,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): Convenience method to return metadata as a :class:`Metadata` object. Note that the list of formats is not verified. ''' + row = self.data._data[idx] if index_is_id else self.data[idx] + fm = self.FIELD_MAP + self.gm_count += 1 - mi = self.data.get(idx, self.FIELD_MAP['all_metadata'], - row_is_id = index_is_id) + mi = row[self.FIELD_MAP['all_metadata']] if mi is not None: if get_cover: # Always get the cover, because the value can be wrong if the @@ -699,49 +707,47 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.gm_missed += 1 mi = Metadata(None) - self.data.set(idx, self.FIELD_MAP['all_metadata'], mi, - row_is_id = index_is_id) + self.data.set(idx, fm['all_metadata'], mi, row_is_id = index_is_id) - aut_list = self.authors_with_sort_strings(idx, index_is_id=index_is_id) + aut_list = row[fm['au_map']] + aut_list = [p.split(':::') for p in aut_list.split(':#:')] aum = [] aus = {} for (author, author_sort) in aut_list: aum.append(author) aus[author] = author_sort - mi.title = self.title(idx, index_is_id=index_is_id) + mi.title = row[fm['title']] mi.authors = aum - mi.author_sort = self.author_sort(idx, index_is_id=index_is_id) + mi.author_sort = row[fm['author_sort']] mi.author_sort_map = aus - mi.comments = self.comments(idx, index_is_id=index_is_id) - mi.publisher = self.publisher(idx, index_is_id=index_is_id) - mi.timestamp = self.timestamp(idx, index_is_id=index_is_id) - mi.pubdate = self.pubdate(idx, index_is_id=index_is_id) - mi.uuid = self.uuid(idx, index_is_id=index_is_id) - mi.title_sort = self.title_sort(idx, index_is_id=index_is_id) - mi.formats = self.formats(idx, index_is_id=index_is_id, - verify_formats=False) - if hasattr(mi.formats, 'split'): - mi.formats = mi.formats.split(',') + mi.comments = row[fm['comments']] + mi.publisher = row[fm['publisher']] + mi.timestamp = row[fm['timestamp']] + mi.pubdate = row[fm['pubdate']] + mi.uuid = row[fm['uuid']] + mi.title_sort = row[fm['sort']] + formats = row[fm['formats']] + if hasattr(formats, 'split'): + mi.formats = formats.split(',') else: mi.formats = None - tags = self.tags(idx, index_is_id=index_is_id) + tags = row[fm['tags']] if tags: mi.tags = [i.strip() for i in tags.split(',')] - mi.series = self.series(idx, index_is_id=index_is_id) + mi.series = row[fm['series']] if mi.series: - mi.series_index = self.series_index(idx, index_is_id=index_is_id) - mi.rating = self.rating(idx, index_is_id=index_is_id) - mi.isbn = self.isbn(idx, index_is_id=index_is_id) + mi.series_index = row[fm['series_index']] + mi.rating = row[fm['rating']] + mi.isbn = row[fm['isbn']] id = idx if index_is_id else self.id(idx) mi.application_id = id mi.id = id - for key,meta in self.field_metadata.iteritems(): - if meta['is_custom']: - mi.set_user_metadata(key, meta) - mi.set(key, val=self.get_custom(idx, label=meta['label'], - index_is_id=index_is_id), - extra=self.get_custom_extra(idx, label=meta['label'], - index_is_id=index_is_id)) + for key,meta in self.field_metadata.custom_iteritems(): + mi.set_user_metadata(key, meta) + mi.set(key, val=self.get_custom(idx, label=meta['label'], + index_is_id=index_is_id), + extra=self.get_custom_extra(idx, label=meta['label'], + index_is_id=index_is_id)) if get_cover: mi.cover = self.cover(id, index_is_id=True, as_path=True) return mi @@ -878,10 +884,8 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): def formats(self, index, index_is_id=False, verify_formats=True): ''' Return available formats as a comma separated list or None if there are no available formats ''' id = index if index_is_id else self.id(index) - try: - formats = self.conn.get('SELECT format FROM data WHERE book=?', (id,)) - formats = map(lambda x:x[0], formats) - except: + formats = self.data.get(id, self.FIELD_MAP['formats'], row_is_id = True) + if not formats: return None if not verify_formats: return ','.join(formats) @@ -1607,6 +1611,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): ','.join([a.replace(',', '|') for a in authors]), row_is_id=True) self.data.set(id, self.FIELD_MAP['author_sort'], ss, row_is_id=True) + aum = self.authors_with_sort_strings(id, index_is_id=True) + self.data.set(id, self.FIELD_MAP['au_map'], + ':#:'.join([':::'.join((au.replace(',', '|'), aus)) for (au, aus) in aum]), + row_is_id=True) def set_authors(self, id, authors, notify=True, commit=True): ''' diff --git a/src/calibre/library/field_metadata.py b/src/calibre/library/field_metadata.py index 1be6604d5d..676eb13d2b 100644 --- a/src/calibre/library/field_metadata.py +++ b/src/calibre/library/field_metadata.py @@ -180,6 +180,15 @@ class FieldMetadata(dict): 'search_terms':['author_sort'], 'is_custom':False, 'is_category':False}), + ('au_map', {'table':None, + 'column':None, + 'datatype':'text', + 'is_multiple':',', + 'kind':'field', + 'name':None, + 'search_terms':[], + 'is_custom':False, + 'is_category':False}), ('comments', {'table':None, 'column':None, 'datatype':'text', @@ -400,6 +409,12 @@ class FieldMetadata(dict): for key in self._tb_cats: yield (key, self._tb_cats[key]) + def custom_iteritems(self): + for key in self._tb_cats: + fm = self._tb_cats[key] + if fm['is_custom']: + yield (key, self._tb_cats[key]) + def items(self): return list(self.iteritems())