From 8b40b9f22c81e90060c8d09334cad3dfeed4ab74 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 16 Aug 2010 19:13:16 -0600
Subject: [PATCH] DB: Store temporary tables in memory. Fix #6472 (Add/change
 tags for large number of eBooks is  s l o w)

---
 src/calibre/gui2/dialogs/metadata_bulk.py | 18 +++--
 src/calibre/library/database2.py          | 86 +++++++++++++++++++++--
 src/calibre/library/sqlite.py             | 18 +++++
 3 files changed, 106 insertions(+), 16 deletions(-)

diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py
index 9fcfe13253..05c4f48cf3 100644
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@@ -27,8 +27,9 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
         self.changed = False
         QObject.connect(self.button_box, SIGNAL("accepted()"), self.sync)
 
-        self.tags.update_tags_cache(self.db.all_tags())
-        self.remove_tags.update_tags_cache(self.db.all_tags())
+        all_tags = self.db.all_tags()
+        self.tags.update_tags_cache(all_tags)
+        self.remove_tags.update_tags_cache(all_tags)
 
         self.initialize_combos()
 
@@ -103,6 +104,11 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
             self.remove_tags.update_tags_cache(self.db.all_tags())
 
     def sync(self):
+        remove = unicode(self.remove_tags.text()).strip().split(',')
+        add = unicode(self.tags.text()).strip().split(',')
+        self.db.bulk_modify_tags(self.ids, add=add, remove=remove)
+
+
         for id in self.ids:
             au = unicode(self.authors.text())
             if au:
@@ -120,14 +126,6 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
             pub = unicode(self.publisher.text())
             if pub:
                 self.db.set_publisher(id, pub, notify=False)
-            remove_tags = unicode(self.remove_tags.text()).strip()
-            if remove_tags:
-                remove_tags = [i.strip() for i in remove_tags.split(',')]
-                self.db.unapply_tags(id, remove_tags, notify=False)
-            tags = unicode(self.tags.text()).strip()
-            if tags:
-                tags = map(lambda x: x.strip(), tags.split(','))
-                self.db.set_tags(id, tags, append=True, notify=False)
             if self.write_series:
                 series = unicode(self.series.currentText()).strip()
                 next = self.db.get_next_series_num_for(series)
diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py
index 36a31b78a2..b8ac065760 100644
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@@ -26,7 +26,7 @@ from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats
 from calibre.constants import preferred_encoding, iswindows, isosx, filesystem_encoding
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.customize.ui import run_plugins_on_import
-
+from calibre import isbytestring
 from calibre.utils.filenames import ascii_filename
 from calibre.utils.date import utcnow, now as nowf, utcfromtimestamp
 from calibre.utils.config import prefs, tweaks
@@ -116,6 +116,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
         # so that various code taht connects directly will not complain about
         # missing functions
         self.books_list_filter = self.conn.create_dynamic_filter('books_list_filter')
+        # Store temporary tables in memory
+        self.conn.execute('pragma temp_store=2')
+        self.conn.commit()
 
     @classmethod
     def exists_at(cls, path):
@@ -1369,6 +1372,80 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
             return set([])
         return set([r[0] for r in result])
 
+    @classmethod
+    def cleanup_tags(cls, tags):
+        tags = [x.strip() for x in tags if x.strip()]
+        tags = [x.decode(preferred_encoding, 'replace') \
+                    if isbytestring(x) else x for x in tags]
+        tags = [u' '.join(x.split()) for x in tags]
+        ans, seen = [], set([])
+        for tag in tags:
+            if tag.lower() not in seen:
+                seen.add(tag.lower())
+                ans.append(tag)
+        return ans
+
+    def bulk_modify_tags(self, ids, add=[], remove=[], notify=False):
+        add = self.cleanup_tags(add)
+        remove = self.cleanup_tags(remove)
+        remove = set(remove) - set(add)
+        if not ids or (not add and not remove):
+            return
+
+        # Add tags that do not already exist into the tag table
+        all_tags = self.all_tags()
+        lt = [t.lower() for t in all_tags]
+        new_tags = [t for t in add if t.lower() not in lt]
+        if new_tags:
+            self.conn.executemany('INSERT INTO tags(name) VALUES (?)', [(x,) for x in
+                new_tags])
+
+        # Create the temporary tables to store the ids for books and tags
+        # to be operated on
+        tables = ('temp_bulk_tag_edit_books', 'temp_bulk_tag_edit_add',
+                    'temp_bulk_tag_edit_remove')
+        drops = '\n'.join(['DROP TABLE IF EXISTS %s;'%t for t in tables])
+        creates = '\n'.join(['CREATE TEMP TABLE %s(id INTEGER PRIMARY KEY);'%t
+                for t in tables])
+        self.conn.executescript(drops + creates)
+
+        # Populate the books temp table
+        self.conn.executemany(
+            'INSERT INTO temp_bulk_tag_edit_books VALUES (?)',
+                [(x,) for x in ids])
+
+        # Populate the add/remove tags temp tables
+        for table, tags in enumerate([add, remove]):
+            if not tags:
+                continue
+            table = tables[table+1]
+            insert = ('INSERT INTO %s(id) SELECT tags.id FROM tags WHERE name=?'
+                     ' COLLATE PYNOCASE LIMIT 1')
+            self.conn.executemany(insert%table, [(x,) for x in tags])
+
+        if remove:
+            self.conn.execute(
+              '''DELETE FROM books_tags_link WHERE
+                    book IN (SELECT id FROM %s) AND
+                    tag IN (SELECT id FROM %s)'''
+              % (tables[0], tables[2]))
+
+        if add:
+            self.conn.execute(
+            '''
+            INSERT INTO books_tags_link(book, tag) SELECT {0}.id, {1}.id FROM
+            {0}, {1}
+            '''.format(tables[0], tables[1])
+            )
+        self.conn.executescript(drops)
+        self.conn.commit()
+
+        for x in ids:
+            tags = u','.join(self.get_tags(x))
+            self.data.set(x, self.FIELD_MAP['tags'], tags, row_is_id=True)
+        if notify:
+            self.notify('metadata', ids)
+
     def set_tags(self, id, tags, append=False, notify=True):
         '''
         @param tags: list of strings
@@ -1378,10 +1455,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
             self.conn.execute('DELETE FROM books_tags_link WHERE book=?', (id,))
             self.conn.execute('DELETE FROM tags WHERE (SELECT COUNT(id) FROM books_tags_link WHERE tag=tags.id) < 1')
         otags = self.get_tags(id)
-        tags = [x.strip() for x in tags if x.strip()]
-        tags = [x.decode(preferred_encoding, 'replace') if not isinstance(x,
-            unicode) else x for x in tags]
-        tags = [u' '.join(x.split()) for x in tags]
+        tags = self.cleanup_tags(tags)
         for tag in (set(tags)-otags):
             tag = tag.strip()
             if not tag:
@@ -1407,7 +1481,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
                 self.conn.execute('INSERT INTO books_tags_link(book, tag) VALUES (?,?)',
                               (id, tid))
         self.conn.commit()
-        tags = ','.join(self.get_tags(id))
+        tags = u','.join(self.get_tags(id))
         self.data.set(id, self.FIELD_MAP['tags'], tags, row_is_id=True)
         if notify:
             self.notify('metadata', [id])
diff --git a/src/calibre/library/sqlite.py b/src/calibre/library/sqlite.py
index 85954f6e0f..1242d0bf7b 100644
--- a/src/calibre/library/sqlite.py
+++ b/src/calibre/library/sqlite.py
@@ -13,10 +13,12 @@ from threading import Thread
 from Queue import Queue
 from threading import RLock
 from datetime import datetime
+from functools import partial
 
 from calibre.ebooks.metadata import title_sort, author_to_author_sort
 from calibre.utils.config import tweaks
 from calibre.utils.date import parse_date, isoformat
+from calibre import isbytestring
 
 global_lock = RLock()
 
@@ -98,6 +100,19 @@ def _author_to_author_sort(x):
     if not x: return ''
     return author_to_author_sort(x.replace('|', ','))
 
+def pynocase(one, two, encoding='utf-8'):
+    if isbytestring(one):
+        try:
+            one = one.decode(encoding, 'replace')
+        except:
+            pass
+    if isbytestring(two):
+        try:
+            two = two.decode(encoding, 'replace')
+        except:
+            pass
+    return cmp(one.lower(), two.lower())
+
 class DBThread(Thread):
 
     CLOSE = '-------close---------'
@@ -115,10 +130,13 @@ class DBThread(Thread):
     def connect(self):
         self.conn = sqlite.connect(self.path, factory=Connection,
                                    detect_types=sqlite.PARSE_DECLTYPES|sqlite.PARSE_COLNAMES)
+        encoding = self.conn.execute('pragma encoding').fetchone()[0]
         self.conn.row_factory = sqlite.Row if self.row_factory else  lambda cursor, row : list(row)
         self.conn.create_aggregate('concat', 1, Concatenate)
         self.conn.create_aggregate('sortconcat', 2, SortedConcatenate)
         self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate)
+        self.conn.create_collation('PYNOCASE', partial(pynocase,
+            encoding=encoding))
         if tweaks['title_series_sorting'] == 'strictly_alphabetic':
             self.conn.create_function('title_sort', 1, lambda x:x)
         else: