From 8b40b9f22c81e90060c8d09334cad3dfeed4ab74 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 16 Aug 2010 19:13:16 -0600 Subject: [PATCH] DB: Store temporary tables in memory. Fix #6472 (Add/change tags for large number of eBooks is s l o w) --- src/calibre/gui2/dialogs/metadata_bulk.py | 18 +++-- src/calibre/library/database2.py | 86 +++++++++++++++++++++-- src/calibre/library/sqlite.py | 18 +++++ 3 files changed, 106 insertions(+), 16 deletions(-) diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py index 9fcfe13253..05c4f48cf3 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.py +++ b/src/calibre/gui2/dialogs/metadata_bulk.py @@ -27,8 +27,9 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): self.changed = False QObject.connect(self.button_box, SIGNAL("accepted()"), self.sync) - self.tags.update_tags_cache(self.db.all_tags()) - self.remove_tags.update_tags_cache(self.db.all_tags()) + all_tags = self.db.all_tags() + self.tags.update_tags_cache(all_tags) + self.remove_tags.update_tags_cache(all_tags) self.initialize_combos() @@ -103,6 +104,11 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): self.remove_tags.update_tags_cache(self.db.all_tags()) def sync(self): + remove = unicode(self.remove_tags.text()).strip().split(',') + add = unicode(self.tags.text()).strip().split(',') + self.db.bulk_modify_tags(self.ids, add=add, remove=remove) + + for id in self.ids: au = unicode(self.authors.text()) if au: @@ -120,14 +126,6 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): pub = unicode(self.publisher.text()) if pub: self.db.set_publisher(id, pub, notify=False) - remove_tags = unicode(self.remove_tags.text()).strip() - if remove_tags: - remove_tags = [i.strip() for i in remove_tags.split(',')] - self.db.unapply_tags(id, remove_tags, notify=False) - tags = unicode(self.tags.text()).strip() - if tags: - tags = map(lambda x: x.strip(), tags.split(',')) - self.db.set_tags(id, tags, append=True, notify=False) if self.write_series: series = unicode(self.series.currentText()).strip() next = self.db.get_next_series_num_for(series) diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 36a31b78a2..b8ac065760 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -26,7 +26,7 @@ from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats from calibre.constants import preferred_encoding, iswindows, isosx, filesystem_encoding from calibre.ptempfile import PersistentTemporaryFile from calibre.customize.ui import run_plugins_on_import - +from calibre import isbytestring from calibre.utils.filenames import ascii_filename from calibre.utils.date import utcnow, now as nowf, utcfromtimestamp from calibre.utils.config import prefs, tweaks @@ -116,6 +116,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): # so that various code taht connects directly will not complain about # missing functions self.books_list_filter = self.conn.create_dynamic_filter('books_list_filter') + # Store temporary tables in memory + self.conn.execute('pragma temp_store=2') + self.conn.commit() @classmethod def exists_at(cls, path): @@ -1369,6 +1372,80 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): return set([]) return set([r[0] for r in result]) + @classmethod + def cleanup_tags(cls, tags): + tags = [x.strip() for x in tags if x.strip()] + tags = [x.decode(preferred_encoding, 'replace') \ + if isbytestring(x) else x for x in tags] + tags = [u' '.join(x.split()) for x in tags] + ans, seen = [], set([]) + for tag in tags: + if tag.lower() not in seen: + seen.add(tag.lower()) + ans.append(tag) + return ans + + def bulk_modify_tags(self, ids, add=[], remove=[], notify=False): + add = self.cleanup_tags(add) + remove = self.cleanup_tags(remove) + remove = set(remove) - set(add) + if not ids or (not add and not remove): + return + + # Add tags that do not already exist into the tag table + all_tags = self.all_tags() + lt = [t.lower() for t in all_tags] + new_tags = [t for t in add if t.lower() not in lt] + if new_tags: + self.conn.executemany('INSERT INTO tags(name) VALUES (?)', [(x,) for x in + new_tags]) + + # Create the temporary tables to store the ids for books and tags + # to be operated on + tables = ('temp_bulk_tag_edit_books', 'temp_bulk_tag_edit_add', + 'temp_bulk_tag_edit_remove') + drops = '\n'.join(['DROP TABLE IF EXISTS %s;'%t for t in tables]) + creates = '\n'.join(['CREATE TEMP TABLE %s(id INTEGER PRIMARY KEY);'%t + for t in tables]) + self.conn.executescript(drops + creates) + + # Populate the books temp table + self.conn.executemany( + 'INSERT INTO temp_bulk_tag_edit_books VALUES (?)', + [(x,) for x in ids]) + + # Populate the add/remove tags temp tables + for table, tags in enumerate([add, remove]): + if not tags: + continue + table = tables[table+1] + insert = ('INSERT INTO %s(id) SELECT tags.id FROM tags WHERE name=?' + ' COLLATE PYNOCASE LIMIT 1') + self.conn.executemany(insert%table, [(x,) for x in tags]) + + if remove: + self.conn.execute( + '''DELETE FROM books_tags_link WHERE + book IN (SELECT id FROM %s) AND + tag IN (SELECT id FROM %s)''' + % (tables[0], tables[2])) + + if add: + self.conn.execute( + ''' + INSERT INTO books_tags_link(book, tag) SELECT {0}.id, {1}.id FROM + {0}, {1} + '''.format(tables[0], tables[1]) + ) + self.conn.executescript(drops) + self.conn.commit() + + for x in ids: + tags = u','.join(self.get_tags(x)) + self.data.set(x, self.FIELD_MAP['tags'], tags, row_is_id=True) + if notify: + self.notify('metadata', ids) + def set_tags(self, id, tags, append=False, notify=True): ''' @param tags: list of strings @@ -1378,10 +1455,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.conn.execute('DELETE FROM books_tags_link WHERE book=?', (id,)) self.conn.execute('DELETE FROM tags WHERE (SELECT COUNT(id) FROM books_tags_link WHERE tag=tags.id) < 1') otags = self.get_tags(id) - tags = [x.strip() for x in tags if x.strip()] - tags = [x.decode(preferred_encoding, 'replace') if not isinstance(x, - unicode) else x for x in tags] - tags = [u' '.join(x.split()) for x in tags] + tags = self.cleanup_tags(tags) for tag in (set(tags)-otags): tag = tag.strip() if not tag: @@ -1407,7 +1481,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.conn.execute('INSERT INTO books_tags_link(book, tag) VALUES (?,?)', (id, tid)) self.conn.commit() - tags = ','.join(self.get_tags(id)) + tags = u','.join(self.get_tags(id)) self.data.set(id, self.FIELD_MAP['tags'], tags, row_is_id=True) if notify: self.notify('metadata', [id]) diff --git a/src/calibre/library/sqlite.py b/src/calibre/library/sqlite.py index 85954f6e0f..1242d0bf7b 100644 --- a/src/calibre/library/sqlite.py +++ b/src/calibre/library/sqlite.py @@ -13,10 +13,12 @@ from threading import Thread from Queue import Queue from threading import RLock from datetime import datetime +from functools import partial from calibre.ebooks.metadata import title_sort, author_to_author_sort from calibre.utils.config import tweaks from calibre.utils.date import parse_date, isoformat +from calibre import isbytestring global_lock = RLock() @@ -98,6 +100,19 @@ def _author_to_author_sort(x): if not x: return '' return author_to_author_sort(x.replace('|', ',')) +def pynocase(one, two, encoding='utf-8'): + if isbytestring(one): + try: + one = one.decode(encoding, 'replace') + except: + pass + if isbytestring(two): + try: + two = two.decode(encoding, 'replace') + except: + pass + return cmp(one.lower(), two.lower()) + class DBThread(Thread): CLOSE = '-------close---------' @@ -115,10 +130,13 @@ class DBThread(Thread): def connect(self): self.conn = sqlite.connect(self.path, factory=Connection, detect_types=sqlite.PARSE_DECLTYPES|sqlite.PARSE_COLNAMES) + encoding = self.conn.execute('pragma encoding').fetchone()[0] self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row) self.conn.create_aggregate('concat', 1, Concatenate) self.conn.create_aggregate('sortconcat', 2, SortedConcatenate) self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate) + self.conn.create_collation('PYNOCASE', partial(pynocase, + encoding=encoding)) if tweaks['title_series_sorting'] == 'strictly_alphabetic': self.conn.create_function('title_sort', 1, lambda x:x) else: