DB: Store temporary tables in memory. Fix #6472 (Add/change tags for large number of eBooks is s l o w)

This commit is contained in:
Kovid Goyal 2010-08-16 19:13:16 -06:00
parent 6bbc5c3e2b
commit 8b40b9f22c
3 changed files with 106 additions and 16 deletions

View File

@ -27,8 +27,9 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
self.changed = False self.changed = False
QObject.connect(self.button_box, SIGNAL("accepted()"), self.sync) QObject.connect(self.button_box, SIGNAL("accepted()"), self.sync)
self.tags.update_tags_cache(self.db.all_tags()) all_tags = self.db.all_tags()
self.remove_tags.update_tags_cache(self.db.all_tags()) self.tags.update_tags_cache(all_tags)
self.remove_tags.update_tags_cache(all_tags)
self.initialize_combos() self.initialize_combos()
@ -103,6 +104,11 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
self.remove_tags.update_tags_cache(self.db.all_tags()) self.remove_tags.update_tags_cache(self.db.all_tags())
def sync(self): def sync(self):
remove = unicode(self.remove_tags.text()).strip().split(',')
add = unicode(self.tags.text()).strip().split(',')
self.db.bulk_modify_tags(self.ids, add=add, remove=remove)
for id in self.ids: for id in self.ids:
au = unicode(self.authors.text()) au = unicode(self.authors.text())
if au: if au:
@ -120,14 +126,6 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
pub = unicode(self.publisher.text()) pub = unicode(self.publisher.text())
if pub: if pub:
self.db.set_publisher(id, pub, notify=False) self.db.set_publisher(id, pub, notify=False)
remove_tags = unicode(self.remove_tags.text()).strip()
if remove_tags:
remove_tags = [i.strip() for i in remove_tags.split(',')]
self.db.unapply_tags(id, remove_tags, notify=False)
tags = unicode(self.tags.text()).strip()
if tags:
tags = map(lambda x: x.strip(), tags.split(','))
self.db.set_tags(id, tags, append=True, notify=False)
if self.write_series: if self.write_series:
series = unicode(self.series.currentText()).strip() series = unicode(self.series.currentText()).strip()
next = self.db.get_next_series_num_for(series) next = self.db.get_next_series_num_for(series)

View File

@ -26,7 +26,7 @@ from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats
from calibre.constants import preferred_encoding, iswindows, isosx, filesystem_encoding from calibre.constants import preferred_encoding, iswindows, isosx, filesystem_encoding
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.customize.ui import run_plugins_on_import from calibre.customize.ui import run_plugins_on_import
from calibre import isbytestring
from calibre.utils.filenames import ascii_filename from calibre.utils.filenames import ascii_filename
from calibre.utils.date import utcnow, now as nowf, utcfromtimestamp from calibre.utils.date import utcnow, now as nowf, utcfromtimestamp
from calibre.utils.config import prefs, tweaks from calibre.utils.config import prefs, tweaks
@ -116,6 +116,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
# so that various code taht connects directly will not complain about # so that various code taht connects directly will not complain about
# missing functions # missing functions
self.books_list_filter = self.conn.create_dynamic_filter('books_list_filter') self.books_list_filter = self.conn.create_dynamic_filter('books_list_filter')
# Store temporary tables in memory
self.conn.execute('pragma temp_store=2')
self.conn.commit()
@classmethod @classmethod
def exists_at(cls, path): def exists_at(cls, path):
@ -1369,6 +1372,80 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
return set([]) return set([])
return set([r[0] for r in result]) return set([r[0] for r in result])
@classmethod
def cleanup_tags(cls, tags):
tags = [x.strip() for x in tags if x.strip()]
tags = [x.decode(preferred_encoding, 'replace') \
if isbytestring(x) else x for x in tags]
tags = [u' '.join(x.split()) for x in tags]
ans, seen = [], set([])
for tag in tags:
if tag.lower() not in seen:
seen.add(tag.lower())
ans.append(tag)
return ans
def bulk_modify_tags(self, ids, add=[], remove=[], notify=False):
add = self.cleanup_tags(add)
remove = self.cleanup_tags(remove)
remove = set(remove) - set(add)
if not ids or (not add and not remove):
return
# Add tags that do not already exist into the tag table
all_tags = self.all_tags()
lt = [t.lower() for t in all_tags]
new_tags = [t for t in add if t.lower() not in lt]
if new_tags:
self.conn.executemany('INSERT INTO tags(name) VALUES (?)', [(x,) for x in
new_tags])
# Create the temporary tables to store the ids for books and tags
# to be operated on
tables = ('temp_bulk_tag_edit_books', 'temp_bulk_tag_edit_add',
'temp_bulk_tag_edit_remove')
drops = '\n'.join(['DROP TABLE IF EXISTS %s;'%t for t in tables])
creates = '\n'.join(['CREATE TEMP TABLE %s(id INTEGER PRIMARY KEY);'%t
for t in tables])
self.conn.executescript(drops + creates)
# Populate the books temp table
self.conn.executemany(
'INSERT INTO temp_bulk_tag_edit_books VALUES (?)',
[(x,) for x in ids])
# Populate the add/remove tags temp tables
for table, tags in enumerate([add, remove]):
if not tags:
continue
table = tables[table+1]
insert = ('INSERT INTO %s(id) SELECT tags.id FROM tags WHERE name=?'
' COLLATE PYNOCASE LIMIT 1')
self.conn.executemany(insert%table, [(x,) for x in tags])
if remove:
self.conn.execute(
'''DELETE FROM books_tags_link WHERE
book IN (SELECT id FROM %s) AND
tag IN (SELECT id FROM %s)'''
% (tables[0], tables[2]))
if add:
self.conn.execute(
'''
INSERT INTO books_tags_link(book, tag) SELECT {0}.id, {1}.id FROM
{0}, {1}
'''.format(tables[0], tables[1])
)
self.conn.executescript(drops)
self.conn.commit()
for x in ids:
tags = u','.join(self.get_tags(x))
self.data.set(x, self.FIELD_MAP['tags'], tags, row_is_id=True)
if notify:
self.notify('metadata', ids)
def set_tags(self, id, tags, append=False, notify=True): def set_tags(self, id, tags, append=False, notify=True):
''' '''
@param tags: list of strings @param tags: list of strings
@ -1378,10 +1455,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.conn.execute('DELETE FROM books_tags_link WHERE book=?', (id,)) self.conn.execute('DELETE FROM books_tags_link WHERE book=?', (id,))
self.conn.execute('DELETE FROM tags WHERE (SELECT COUNT(id) FROM books_tags_link WHERE tag=tags.id) < 1') self.conn.execute('DELETE FROM tags WHERE (SELECT COUNT(id) FROM books_tags_link WHERE tag=tags.id) < 1')
otags = self.get_tags(id) otags = self.get_tags(id)
tags = [x.strip() for x in tags if x.strip()] tags = self.cleanup_tags(tags)
tags = [x.decode(preferred_encoding, 'replace') if not isinstance(x,
unicode) else x for x in tags]
tags = [u' '.join(x.split()) for x in tags]
for tag in (set(tags)-otags): for tag in (set(tags)-otags):
tag = tag.strip() tag = tag.strip()
if not tag: if not tag:
@ -1407,7 +1481,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
self.conn.execute('INSERT INTO books_tags_link(book, tag) VALUES (?,?)', self.conn.execute('INSERT INTO books_tags_link(book, tag) VALUES (?,?)',
(id, tid)) (id, tid))
self.conn.commit() self.conn.commit()
tags = ','.join(self.get_tags(id)) tags = u','.join(self.get_tags(id))
self.data.set(id, self.FIELD_MAP['tags'], tags, row_is_id=True) self.data.set(id, self.FIELD_MAP['tags'], tags, row_is_id=True)
if notify: if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])

View File

@ -13,10 +13,12 @@ from threading import Thread
from Queue import Queue from Queue import Queue
from threading import RLock from threading import RLock
from datetime import datetime from datetime import datetime
from functools import partial
from calibre.ebooks.metadata import title_sort, author_to_author_sort from calibre.ebooks.metadata import title_sort, author_to_author_sort
from calibre.utils.config import tweaks from calibre.utils.config import tweaks
from calibre.utils.date import parse_date, isoformat from calibre.utils.date import parse_date, isoformat
from calibre import isbytestring
global_lock = RLock() global_lock = RLock()
@ -98,6 +100,19 @@ def _author_to_author_sort(x):
if not x: return '' if not x: return ''
return author_to_author_sort(x.replace('|', ',')) return author_to_author_sort(x.replace('|', ','))
def pynocase(one, two, encoding='utf-8'):
if isbytestring(one):
try:
one = one.decode(encoding, 'replace')
except:
pass
if isbytestring(two):
try:
two = two.decode(encoding, 'replace')
except:
pass
return cmp(one.lower(), two.lower())
class DBThread(Thread): class DBThread(Thread):
CLOSE = '-------close---------' CLOSE = '-------close---------'
@ -115,10 +130,13 @@ class DBThread(Thread):
def connect(self): def connect(self):
self.conn = sqlite.connect(self.path, factory=Connection, self.conn = sqlite.connect(self.path, factory=Connection,
detect_types=sqlite.PARSE_DECLTYPES|sqlite.PARSE_COLNAMES) detect_types=sqlite.PARSE_DECLTYPES|sqlite.PARSE_COLNAMES)
encoding = self.conn.execute('pragma encoding').fetchone()[0]
self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row) self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row)
self.conn.create_aggregate('concat', 1, Concatenate) self.conn.create_aggregate('concat', 1, Concatenate)
self.conn.create_aggregate('sortconcat', 2, SortedConcatenate) self.conn.create_aggregate('sortconcat', 2, SortedConcatenate)
self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate) self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate)
self.conn.create_collation('PYNOCASE', partial(pynocase,
encoding=encoding))
if tweaks['title_series_sorting'] == 'strictly_alphabetic': if tweaks['title_series_sorting'] == 'strictly_alphabetic':
self.conn.create_function('title_sort', 1, lambda x:x) self.conn.create_function('title_sort', 1, lambda x:x)
else: else: