Various database optimizations

This commit is contained in:
Kovid Goyal 2008-11-01 16:51:22 -07:00
parent d02ec242d5
commit 114c1e799e
5 changed files with 129 additions and 46 deletions

View File

@ -185,7 +185,7 @@ class BooksModel(QAbstractTableModel):
if not self.db: if not self.db:
return return
ascending = order == Qt.AscendingOrder ascending = order == Qt.AscendingOrder
self.db.refresh(self.cols[col], ascending) self.db.sort(self.cols[col], ascending)
self.research() self.research()
if reset: if reset:
self.clear_caches() self.clear_caches()

View File

@ -654,10 +654,6 @@ class Main(MainWindow, Ui_MainWindow):
for row in rows: for row in rows:
d = MetadataSingleDialog(self, row.row(), d = MetadataSingleDialog(self, row.row(),
self.library_view.model().db) self.library_view.model().db)
self.connect(d, SIGNAL('accepted()'), partial(self.metadata_edited, d.id), Qt.QueuedConnection)
def metadata_edited(self, id):
self.library_view.model().refresh_ids([id], self.library_view.currentIndex().row())
def edit_bulk_metadata(self, checked): def edit_bulk_metadata(self, checked):
''' '''

View File

@ -1,3 +1,12 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' Code to manage ebook library''' ''' Code to manage ebook library'''
import re
title_pat = re.compile('^(A|The|An)\s+', re.IGNORECASE)
def title_sort(title):
match = title_pat.search(title)
if match:
prep = match.group(1)
title = title.replace(prep, '') + ', ' + prep
return title.strip()

View File

@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
The database used to store ebook metadata The database used to store ebook metadata
''' '''
import os, re, sys, shutil, cStringIO, glob, collections, textwrap, \ import os, re, sys, shutil, cStringIO, glob, collections, textwrap, \
operator, itertools, functools, traceback itertools, functools, traceback
from itertools import repeat from itertools import repeat
from datetime import datetime from datetime import datetime
@ -15,6 +15,7 @@ from PyQt4.QtCore import QCoreApplication, QThread, QReadWriteLock
from PyQt4.QtGui import QApplication, QPixmap, QImage from PyQt4.QtGui import QApplication, QPixmap, QImage
__app = None __app = None
from calibre.library import title_sort
from calibre.library.database import LibraryDatabase from calibre.library.database import LibraryDatabase
from calibre.library.sqlite import connect, IntegrityError from calibre.library.sqlite import connect, IntegrityError
from calibre.utils.search_query_parser import SearchQueryParser from calibre.utils.search_query_parser import SearchQueryParser
@ -218,8 +219,8 @@ class ResultCache(SearchQueryParser):
if id in self._map_filtered: if id in self._map_filtered:
self._map_filtered.remove(id) self._map_filtered.remove(id)
def set(self, row, col, val): def set(self, row, col, val, row_is_id=False):
id = self._map_filtered[row] id = row if row_is_id else self._map_filtered[row]
self._data[id][col] = val self._data[id][col] = val
def index(self, id, cache=False): def index(self, id, cache=False):
@ -243,6 +244,12 @@ class ResultCache(SearchQueryParser):
self._map[0:0] = ids self._map[0:0] = ids
self._map_filtered[0:0] = ids self._map_filtered[0:0] = ids
def books_deleted(self, ids):
for id in ids:
self._data[id] = None
if id in self._map: self._map.remove(id)
if id in self._map_filtered: self._map_filtered.remove(id)
def refresh(self, db, field=None, ascending=True): def refresh(self, db, field=None, ascending=True):
temp = db.conn.get('SELECT * FROM meta') temp = db.conn.get('SELECT * FROM meta')
self._data = list(itertools.repeat(None, temp[-1][0]+2)) if temp else [] self._data = list(itertools.repeat(None, temp[-1][0]+2)) if temp else []
@ -254,19 +261,24 @@ class ResultCache(SearchQueryParser):
self._map_filtered = list(self._map) self._map_filtered = list(self._map)
def seriescmp(self, x, y): def seriescmp(self, x, y):
ans = self.strcmp(self._data[x][9], self._data[y][9]) try:
ans = cmp(self._data[x][9].lower(), self._data[y][9].lower()) if str else\
cmp(self._data[x][9], self._data[y][9])
except AttributeError: # Some entries may be None
ans = cmp(self._data[x][9], self._data[y][9])
if ans != 0: return ans if ans != 0: return ans
return cmp(self._data[x][10], self._data[y][10]) return cmp(self._data[x][10], self._data[y][10])
def cmp(self, loc, x, y, str=True): def cmp(self, loc, x, y, str=True):
try:
ans = cmp(self._data[x][loc].lower(), self._data[y][loc].lower()) if str else\ ans = cmp(self._data[x][loc].lower(), self._data[y][loc].lower()) if str else\
cmp(self._data[x][loc], self._data[y][loc]) cmp(self._data[x][loc], self._data[y][loc])
except AttributeError: # Some entries may be None
ans = cmp(self._data[x][loc], self._data[y][loc])
if ans != 0: return ans if ans != 0: return ans
return cmp(self._data[x][11].lower(), self._data[y][11].lower()) return cmp(self._data[x][11].lower(), self._data[y][11].lower())
def sort(self, field, ascending): def sort(self, field, ascending):
import time
start = time.time()
field = field.lower().strip() field = field.lower().strip()
if field in ('author', 'tag', 'comment'): if field in ('author', 'tag', 'comment'):
field += 's' field += 's'
@ -274,9 +286,10 @@ class ResultCache(SearchQueryParser):
elif field == 'title': field = 'sort' elif field == 'title': field = 'sort'
elif field == 'author': field = 'author_sort' elif field == 'author': field = 'author_sort'
fcmp = self.seriescmp if field == 'series' else \ fcmp = self.seriescmp if field == 'series' else \
functools.partial(self.cmp, FIELD_MAP[field], field not in ('size', 'rating', 'timestamp')) functools.partial(self.cmp, FIELD_MAP[field],
str=field not in ('size', 'rating', 'timestamp'))
self._map.sort(cmp=fcmp, reverse=not ascending) self._map.sort(cmp=fcmp, reverse=not ascending)
print time.time() - start
def search(self, query): def search(self, query):
if not query or not query.strip(): if not query or not query.strip():
@ -348,7 +361,6 @@ class LibraryDatabase2(LibraryDatabase):
self.user_version += 1 self.user_version += 1
self.data = ResultCache() self.data = ResultCache()
self.data.refresh()
self.search = self.data.search self.search = self.data.search
self.refresh = functools.partial(self.data.refresh, self) self.refresh = functools.partial(self.data.refresh, self)
self.sort = functools.partial(self.data.refresh, self) self.sort = functools.partial(self.data.refresh, self)
@ -356,6 +368,16 @@ class LibraryDatabase2(LibraryDatabase):
self.refresh_ids = functools.partial(self.data.refresh_ids, self.conn) self.refresh_ids = functools.partial(self.data.refresh_ids, self.conn)
self.row = self.data.row self.row = self.data.row
self.refresh()
def get_property(idx, index_is_id=False, loc=-1):
row = self.data._data[idx] if index_is_id else self.data[idx]
return row[loc]
for prop in ('author_sort', 'authors', 'comment', 'comments', 'isbn',
'publisher', 'rating', 'series', 'series_index', 'tags', 'title'):
setattr(self, prop, functools.partial(get_property, loc=FIELD_MAP['comments' if prop == 'comment' else prop]))
def initialize_database(self): def initialize_database(self):
from calibre.resources import metadata_sqlite from calibre.resources import metadata_sqlite
self.conn.executescript(metadata_sqlite) self.conn.executescript(metadata_sqlite)
@ -617,6 +639,11 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.execute('INSERT INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)', self.conn.execute('INSERT INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)',
(id, format.upper(), size, name)) (id, format.upper(), size, name))
self.conn.commit() self.conn.commit()
try:
fmts = [f.strip().upper() for f in self.data[self.data.row(id)][FIELD_MAP['formats']].split(',')]
except AttributeError:
fmts = []
self.data.set(id, FIELD_MAP['formats'], ','.join(fmts+[format.upper()]), row_is_id=True)
self.notify('metadata', [id]) self.notify('metadata', [id])
def delete_book(self, id): def delete_book(self, id):
@ -633,6 +660,7 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.execute('DELETE FROM books WHERE id=?', (id,)) self.conn.execute('DELETE FROM books WHERE id=?', (id,))
self.conn.commit() self.conn.commit()
self.clean() self.clean()
self.data.books_deleted([id])
self.notify('delete', [id]) self.notify('delete', [id])
def remove_format(self, index, format, index_is_id=False): def remove_format(self, index, format, index_is_id=False):
@ -649,6 +677,9 @@ class LibraryDatabase2(LibraryDatabase):
pass pass
self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format.upper())) self.conn.execute('DELETE FROM data WHERE book=? AND format=?', (id, format.upper()))
self.conn.commit() self.conn.commit()
fmts = [f.strip().upper() for f in self.data[self.data.row(id)][FIELD_MAP['formats']].split(',')]
fmts.remove(format.upper())
self.data.set(id, FIELD_MAP['formats'], ','.join(fmts), row_is_id=True)
self.notify('metadata', [id]) self.notify('metadata', [id])
def clean(self): def clean(self):
@ -706,7 +737,7 @@ class LibraryDatabase2(LibraryDatabase):
elif column == 'publisher': elif column == 'publisher':
self.set_publisher(id, val, notify=False) self.set_publisher(id, val, notify=False)
elif column == 'rating': elif column == 'rating':
self.set_rating(id, val) self.set_rating(id, val, notify=False)
elif column == 'tags': elif column == 'tags':
self.set_tags(id, val.split(','), append=False, notify=False) self.set_tags(id, val.split(','), append=False, notify=False)
self.data.refresh_ids(self.conn, [id]) self.data.refresh_ids(self.conn, [id])
@ -726,11 +757,11 @@ class LibraryDatabase2(LibraryDatabase):
authors += a.split('&') authors += a.split('&')
self.set_authors(id, authors, notify=False) self.set_authors(id, authors, notify=False)
if mi.author_sort: if mi.author_sort:
self.set_author_sort(id, mi.author_sort) self.set_author_sort(id, mi.author_sort, notify=False)
if mi.publisher: if mi.publisher:
self.set_publisher(id, mi.publisher, notify=False) self.set_publisher(id, mi.publisher, notify=False)
if mi.rating: if mi.rating:
self.set_rating(id, mi.rating) self.set_rating(id, mi.rating, notify=False)
if mi.series: if mi.series:
self.set_series(id, mi.series, notify=False) self.set_series(id, mi.series, notify=False)
if mi.cover_data[1] is not None: if mi.cover_data[1] is not None:
@ -738,9 +769,11 @@ class LibraryDatabase2(LibraryDatabase):
if mi.tags: if mi.tags:
self.set_tags(id, mi.tags, notify=False) self.set_tags(id, mi.tags, notify=False)
if mi.comments: if mi.comments:
self.set_comment(id, mi.comments) self.set_comment(id, mi.comments, notify=False)
if mi.isbn and mi.isbn.strip(): if mi.isbn and mi.isbn.strip():
self.set_isbn(id, mi.isbn) self.set_isbn(id, mi.isbn, notify=False)
if mi.series_index and mi.series_index > 0:
self.set_series_index(id, mi.series_index, notify=False)
self.set_path(id, True) self.set_path(id, True)
self.notify('metadata', [id]) self.notify('metadata', [id])
@ -748,6 +781,8 @@ class LibraryDatabase2(LibraryDatabase):
''' '''
`authors`: A list of authors. `authors`: A list of authors.
''' '''
if not authors:
authors = [_('Unknown')]
self.conn.execute('DELETE FROM books_authors_link WHERE book=?',(id,)) self.conn.execute('DELETE FROM books_authors_link WHERE book=?',(id,))
self.conn.execute('DELETE FROM authors WHERE (SELECT COUNT(id) FROM books_authors_link WHERE author=authors.id) < 1') self.conn.execute('DELETE FROM authors WHERE (SELECT COUNT(id) FROM books_authors_link WHERE author=authors.id) < 1')
for a in authors: for a in authors:
@ -767,7 +802,11 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.execute('INSERT INTO books_authors_link(book, author) VALUES (?,?)', (id, aid)) self.conn.execute('INSERT INTO books_authors_link(book, author) VALUES (?,?)', (id, aid))
except IntegrityError: # Sometimes books specify the same author twice in their metadata except IntegrityError: # Sometimes books specify the same author twice in their metadata
pass pass
self.conn.commit()
self.set_path(id, True) self.set_path(id, True)
self.data.set(id, FIELD_MAP['authors'], ','.join([a.replace(',', '|') for a in authors]), row_is_id=True)
self.data.set(id, FIELD_MAP['author_sort'], self.data[self.data.row(id)][FIELD_MAP['authors']], row_is_id=True)
if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def set_title(self, id, title, notify=True): def set_title(self, id, title, notify=True):
@ -777,6 +816,10 @@ class LibraryDatabase2(LibraryDatabase):
title = title.decode(preferred_encoding, 'replace') title = title.decode(preferred_encoding, 'replace')
self.conn.execute('UPDATE books SET title=? WHERE id=?', (title, id)) self.conn.execute('UPDATE books SET title=? WHERE id=?', (title, id))
self.set_path(id, True) self.set_path(id, True)
self.data.set(id, FIELD_MAP['title'], title, row_is_id=True)
self.data.set(id, FIELD_MAP['sort'], title_sort(title), row_is_id=True)
self.conn.commit()
if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def set_publisher(self, id, publisher, notify=True): def set_publisher(self, id, publisher, notify=True):
@ -792,6 +835,8 @@ class LibraryDatabase2(LibraryDatabase):
aid = self.conn.execute('INSERT INTO publishers(name) VALUES (?)', (publisher,)).lastrowid aid = self.conn.execute('INSERT INTO publishers(name) VALUES (?)', (publisher,)).lastrowid
self.conn.execute('INSERT INTO books_publishers_link(book, publisher) VALUES (?,?)', (id, aid)) self.conn.execute('INSERT INTO books_publishers_link(book, publisher) VALUES (?,?)', (id, aid))
self.conn.commit() self.conn.commit()
self.data.set(id, FIELD_MAP['publisher'], publisher, row_is_id=True)
if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def set_tags(self, id, tags, append=False, notify=True): def set_tags(self, id, tags, append=False, notify=True):
@ -819,6 +864,15 @@ class LibraryDatabase2(LibraryDatabase):
self.conn.execute('INSERT INTO books_tags_link(book, tag) VALUES (?,?)', self.conn.execute('INSERT INTO books_tags_link(book, tag) VALUES (?,?)',
(id, tid)) (id, tid))
self.conn.commit() self.conn.commit()
try:
otags = [t.strip() for t in self.data[self.data.row(id)][FIELD_MAP['tags']].split(',')]
except AttributeError:
otags = []
if not append:
otags = []
tags = ','.join(otags+tags)
self.data.set(id, FIELD_MAP['tags'], tags, row_is_id=True)
if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
@ -841,6 +895,8 @@ class LibraryDatabase2(LibraryDatabase):
self.data.set(row, 9, series) self.data.set(row, 9, series)
except ValueError: except ValueError:
pass pass
self.data.set(id, FIELD_MAP['series'], series, row_is_id=True)
if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def set_series_index(self, id, idx, notify=True): def set_series_index(self, id, idx, notify=True):
@ -855,6 +911,41 @@ class LibraryDatabase2(LibraryDatabase):
self.data.set(row, 10, idx) self.data.set(row, 10, idx)
except ValueError: except ValueError:
pass pass
self.data.set(id, FIELD_MAP['series_index'], int(idx), row_is_id=True)
if notify:
self.notify('metadata', [id])
def set_rating(self, id, rating, notify=True):
rating = int(rating)
self.conn.execute('DELETE FROM books_ratings_link WHERE book=?',(id,))
rat = self.conn.get('SELECT id FROM ratings WHERE rating=?', (rating,), all=False)
rat = rat if rat else self.conn.execute('INSERT INTO ratings(rating) VALUES (?)', (rating,)).lastrowid
self.conn.execute('INSERT INTO books_ratings_link(book, rating) VALUES (?,?)', (id, rat))
self.conn.commit()
self.data.set(id, FIELD_MAP['rating'], rating, row_is_id=True)
if notify:
self.notify('metadata', [id])
def set_comment(self, id, text, notify=True):
self.conn.execute('DELETE FROM comments WHERE book=?', (id,))
self.conn.execute('INSERT INTO comments(book,text) VALUES (?,?)', (id, text))
self.conn.commit()
self.data.set(id, FIELD_MAP['comments'], text, row_is_id=True)
if notify:
self.notify('metadata', [id])
def set_author_sort(self, id, sort, notify=True):
self.conn.execute('UPDATE books SET author_sort=? WHERE id=?', (sort, id))
self.conn.commit()
self.data.set(id, FIELD_MAP['author_sort'], sort, row_is_id=True)
if notify:
self.notify('metadata', [id])
def set_isbn(self, id, isbn, notify=True):
self.conn.execute('UPDATE books SET isbn=? WHERE id=?', (isbn, id))
self.conn.commit()
self.data.set(id, FIELD_MAP['isbn'], isbn, row_is_id=True)
if notify:
self.notify('metadata', [id]) self.notify('metadata', [id])
def add_books(self, paths, formats, metadata, uris=[], add_duplicates=True): def add_books(self, paths, formats, metadata, uris=[], add_duplicates=True):
@ -880,6 +971,7 @@ class LibraryDatabase2(LibraryDatabase):
obj = self.conn.execute('INSERT INTO books(title, uri, series_index, author_sort) VALUES (?, ?, ?, ?)', obj = self.conn.execute('INSERT INTO books(title, uri, series_index, author_sort) VALUES (?, ?, ?, ?)',
(mi.title, uri, series_index, aus)) (mi.title, uri, series_index, aus))
id = obj.lastrowid id = obj.lastrowid
self.data.books_added([id], self.conn)
ids.append(id) ids.append(id)
self.set_path(id, True) self.set_path(id, True)
self.conn.commit() self.conn.commit()
@ -891,8 +983,6 @@ class LibraryDatabase2(LibraryDatabase):
if not hasattr(path, 'read'): if not hasattr(path, 'read'):
stream.close() stream.close()
self.conn.commit() self.conn.commit()
if ids:
self.data.books_added(ids, self.conn)
if duplicates: if duplicates:
paths = tuple(duplicate[0] for duplicate in duplicates) paths = tuple(duplicate[0] for duplicate in duplicates)
formats = tuple(duplicate[1] for duplicate in duplicates) formats = tuple(duplicate[1] for duplicate in duplicates)
@ -909,6 +999,7 @@ class LibraryDatabase2(LibraryDatabase):
obj = self.conn.execute('INSERT INTO books(title, uri, series_index, author_sort) VALUES (?, ?, ?, ?)', obj = self.conn.execute('INSERT INTO books(title, uri, series_index, author_sort) VALUES (?, ?, ?, ?)',
(mi.title, None, series_index, aus)) (mi.title, None, series_index, aus))
id = obj.lastrowid id = obj.lastrowid
self.data.books_added([id], self.conn)
self.set_path(id, True) self.set_path(id, True)
self.set_metadata(id, mi) self.set_metadata(id, mi)
for path in formats: for path in formats:
@ -916,7 +1007,6 @@ class LibraryDatabase2(LibraryDatabase):
stream = open(path, 'rb') stream = open(path, 'rb')
self.add_format(id, ext, stream, index_is_id=True) self.add_format(id, ext, stream, index_is_id=True)
self.conn.commit() self.conn.commit()
self.data.books_added([id], self.conn)
self.notify('add', [id]) self.notify('add', [id])
def move_library_to(self, newloc, progress=None): def move_library_to(self, newloc, progress=None):
@ -966,8 +1056,6 @@ class LibraryDatabase2(LibraryDatabase):
def __iter__(self): def __iter__(self):
if len(self.data._data) == 0:
self.refresh('timestamp', True)
for record in self.data._data: for record in self.data._data:
if record is not None: if record is not None:
yield record yield record
@ -991,8 +1079,6 @@ class LibraryDatabase2(LibraryDatabase):
prefix = self.library_path prefix = self.library_path
FIELDS = set(['title', 'authors', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', 'isbn']) FIELDS = set(['title', 'authors', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', 'isbn'])
data = [] data = []
if len(self.data) == 0:
self.refresh(None, True)
for record in self.data: for record in self.data:
if record is None: continue if record is None: continue
x = {} x = {}

View File

@ -7,11 +7,12 @@ __docformat__ = 'restructuredtext en'
Wrapper for multi-threaded access to a single sqlite database connection. Serializes Wrapper for multi-threaded access to a single sqlite database connection. Serializes
all calls. all calls.
''' '''
import sqlite3 as sqlite, traceback, re, time import sqlite3 as sqlite, traceback, time
from sqlite3 import IntegrityError from sqlite3 import IntegrityError
from threading import Thread from threading import Thread
from Queue import Queue from Queue import Queue
from calibre.library import title_sort
class Concatenate(object): class Concatenate(object):
'''String concatenation aggregator for sqlite''' '''String concatenation aggregator for sqlite'''
@ -61,15 +62,6 @@ class DBThread(Thread):
detect_types=sqlite.PARSE_DECLTYPES|sqlite.PARSE_COLNAMES) detect_types=sqlite.PARSE_DECLTYPES|sqlite.PARSE_COLNAMES)
self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row) self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row)
self.conn.create_aggregate('concat', 1, Concatenate) self.conn.create_aggregate('concat', 1, Concatenate)
title_pat = re.compile('^(A|The|An)\s+', re.IGNORECASE)
def title_sort(title):
match = title_pat.search(title)
if match:
prep = match.group(1)
title = title.replace(prep, '') + ', ' + prep
return title.strip()
self.conn.create_function('title_sort', 1, title_sort) self.conn.create_function('title_sort', 1, title_sort)
def run(self): def run(self):