mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Start work on new db backend
This commit is contained in:
parent
15d8272efe
commit
50dadb45cf
67
src/calibre/db/__init__.py
Normal file
67
src/calibre/db/__init__.py
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
Rewrite of the calibre database backend.
|
||||||
|
|
||||||
|
Broad Objectives:
|
||||||
|
|
||||||
|
* Use the sqlite db only as a datastore. i.e. do not do
|
||||||
|
sorting/searching/concatenation or anything else in sqlite. Instead
|
||||||
|
mirror the sqlite tables in memory, create caches and lookup maps from
|
||||||
|
them and create a set_* API that updates the memory caches and the sqlite
|
||||||
|
correctly.
|
||||||
|
|
||||||
|
* Move from keeping a list of books in memory as a cache to a per table
|
||||||
|
cache. This allows much faster search and sort operations at the expense
|
||||||
|
of slightly slower lookup operations. That slowdown can be mitigated by
|
||||||
|
keeping lots of maps and updating them in the set_* API. Also
|
||||||
|
get_categories becomes blazingly fast.
|
||||||
|
|
||||||
|
* Separate the database layer from the cache layer more cleanly. Rather
|
||||||
|
than having the db layer refer to the cache layer and vice versa, the
|
||||||
|
cache layer will refer to the db layer only and the new API will be
|
||||||
|
defined on the cache layer.
|
||||||
|
|
||||||
|
* Get rid of index_is_id and other poor design decisions
|
||||||
|
|
||||||
|
* Minimize the API as much as possible and define it cleanly
|
||||||
|
|
||||||
|
* Do not change the on disk format of metadata.db at all (this is for
|
||||||
|
backwards compatibility)
|
||||||
|
|
||||||
|
* Get rid of the need for a separate db access thread by switching to apsw
|
||||||
|
to access sqlite, which is thread safe
|
||||||
|
|
||||||
|
* The new API will have methods to efficiently do bulk operations and will
|
||||||
|
use shared/exclusive/pending locks to serialize access to the in-mem data
|
||||||
|
structures. Use the same locking scheme as sqlite itself does.
|
||||||
|
|
||||||
|
How this will proceed:
|
||||||
|
|
||||||
|
1. Create the new API
|
||||||
|
2. Create a test suite for it
|
||||||
|
3. Write a replacement for LibraryDatabase2 that uses the new API
|
||||||
|
internally
|
||||||
|
4. Lots of testing of calibre with the new LibraryDatabase2
|
||||||
|
5. Gradually migrate code to use the (much faster) new api wherever possible (the new api
|
||||||
|
will be exposed via db.new_api)
|
||||||
|
|
||||||
|
I plan to work on this slowly, in parallel to normal calibre development
|
||||||
|
work.
|
||||||
|
|
||||||
|
Various things that require other things before they can be migrated:
|
||||||
|
1. From initialize_dynamic(): set_saved_searches,
|
||||||
|
load_user_template_functions. Also add custom
|
||||||
|
columns/categories/searches info into
|
||||||
|
self.field_metadata. Finally, implement metadata dirtied
|
||||||
|
functionality.
|
||||||
|
|
||||||
|
'''
|
404
src/calibre/db/backend.py
Normal file
404
src/calibre/db/backend.py
Normal file
@ -0,0 +1,404 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
# Imports {{{
|
||||||
|
import os, shutil, uuid, json
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
|
import apsw
|
||||||
|
|
||||||
|
from calibre import isbytestring, force_unicode, prints
|
||||||
|
from calibre.constants import (iswindows, filesystem_encoding,
|
||||||
|
preferred_encoding)
|
||||||
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
from calibre.library.schema_upgrades import SchemaUpgrade
|
||||||
|
from calibre.library.field_metadata import FieldMetadata
|
||||||
|
from calibre.ebooks.metadata import title_sort, author_to_author_sort
|
||||||
|
from calibre.utils.icu import strcmp
|
||||||
|
from calibre.utils.config import to_json, from_json, prefs, tweaks
|
||||||
|
from calibre.utils.date import utcfromtimestamp
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
'''
|
||||||
|
Differences in semantics from pysqlite:
|
||||||
|
|
||||||
|
1. execute/executemany/executescript operate in autocommit mode
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
||||||
|
class DynamicFilter(object): # {{{
|
||||||
|
|
||||||
|
'No longer used, present for legacy compatibility'
|
||||||
|
|
||||||
|
def __init__(self, name):
|
||||||
|
self.name = name
|
||||||
|
self.ids = frozenset([])
|
||||||
|
|
||||||
|
def __call__(self, id_):
|
||||||
|
return int(id_ in self.ids)
|
||||||
|
|
||||||
|
def change(self, ids):
|
||||||
|
self.ids = frozenset(ids)
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class DBPrefs(dict): # {{{
|
||||||
|
|
||||||
|
'Store preferences as key:value pairs in the db'
|
||||||
|
|
||||||
|
def __init__(self, db):
|
||||||
|
dict.__init__(self)
|
||||||
|
self.db = db
|
||||||
|
self.defaults = {}
|
||||||
|
self.disable_setting = False
|
||||||
|
for key, val in self.db.conn.get('SELECT key,val FROM preferences'):
|
||||||
|
try:
|
||||||
|
val = self.raw_to_object(val)
|
||||||
|
except:
|
||||||
|
prints('Failed to read value for:', key, 'from db')
|
||||||
|
continue
|
||||||
|
dict.__setitem__(self, key, val)
|
||||||
|
|
||||||
|
def raw_to_object(self, raw):
|
||||||
|
if not isinstance(raw, unicode):
|
||||||
|
raw = raw.decode(preferred_encoding)
|
||||||
|
return json.loads(raw, object_hook=from_json)
|
||||||
|
|
||||||
|
def to_raw(self, val):
|
||||||
|
return json.dumps(val, indent=2, default=to_json)
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
try:
|
||||||
|
return dict.__getitem__(self, key)
|
||||||
|
except KeyError:
|
||||||
|
return self.defaults[key]
|
||||||
|
|
||||||
|
def __delitem__(self, key):
|
||||||
|
dict.__delitem__(self, key)
|
||||||
|
self.db.conn.execute('DELETE FROM preferences WHERE key=?', (key,))
|
||||||
|
|
||||||
|
def __setitem__(self, key, val):
|
||||||
|
if self.disable_setting:
|
||||||
|
return
|
||||||
|
raw = self.to_raw(val)
|
||||||
|
self.db.conn.execute('INSERT OR REPLACE INTO preferences (key,val) VALUES (?,?)', (key,
|
||||||
|
raw))
|
||||||
|
dict.__setitem__(self, key, val)
|
||||||
|
|
||||||
|
def set(self, key, val):
|
||||||
|
self.__setitem__(key, val)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Extra collators {{{
|
||||||
|
def pynocase(one, two, encoding='utf-8'):
|
||||||
|
if isbytestring(one):
|
||||||
|
try:
|
||||||
|
one = one.decode(encoding, 'replace')
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
if isbytestring(two):
|
||||||
|
try:
|
||||||
|
two = two.decode(encoding, 'replace')
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return cmp(one.lower(), two.lower())
|
||||||
|
|
||||||
|
def _author_to_author_sort(x):
|
||||||
|
if not x: return ''
|
||||||
|
return author_to_author_sort(x.replace('|', ','))
|
||||||
|
|
||||||
|
def icu_collator(s1, s2):
|
||||||
|
return strcmp(force_unicode(s1, 'utf-8'), force_unicode(s2, 'utf-8'))
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class Connection(apsw.Connection): # {{{
|
||||||
|
|
||||||
|
BUSY_TIMEOUT = 2000 # milliseconds
|
||||||
|
|
||||||
|
def __init__(self, path):
|
||||||
|
apsw.Connection.__init__(self, path)
|
||||||
|
|
||||||
|
self.setbusytimeout(self.BUSY_TIMEOUT)
|
||||||
|
self.execute('pragma cache_size=5000')
|
||||||
|
self.conn.execute('pragma temp_store=2')
|
||||||
|
|
||||||
|
encoding = self.execute('pragma encoding').fetchone()[0]
|
||||||
|
self.conn.create_collation('PYNOCASE', partial(pynocase,
|
||||||
|
encoding=encoding))
|
||||||
|
|
||||||
|
self.conn.create_function('title_sort', 1, title_sort)
|
||||||
|
self.conn.create_function('author_to_author_sort', 1,
|
||||||
|
_author_to_author_sort)
|
||||||
|
|
||||||
|
self.conn.create_function('uuid4', 0, lambda : str(uuid.uuid4()))
|
||||||
|
|
||||||
|
# Dummy functions for dynamically created filters
|
||||||
|
self.conn.create_function('books_list_filter', 1, lambda x: 1)
|
||||||
|
self.conn.create_collation('icucollate', icu_collator)
|
||||||
|
|
||||||
|
def create_dynamic_filter(self, name):
|
||||||
|
f = DynamicFilter(name)
|
||||||
|
self.conn.create_function(name, 1, f)
|
||||||
|
|
||||||
|
def get(self, *args, **kw):
|
||||||
|
ans = self.cursor().execute(*args)
|
||||||
|
if kw.get('all', True):
|
||||||
|
return ans.fetchall()
|
||||||
|
for row in ans:
|
||||||
|
return ans[0]
|
||||||
|
|
||||||
|
def execute(self, sql, bindings=None):
|
||||||
|
cursor = self.cursor()
|
||||||
|
return cursor.execute(sql, bindings)
|
||||||
|
|
||||||
|
def executemany(self, sql, sequence_of_bindings):
|
||||||
|
return self.cursor().executemany(sql, sequence_of_bindings)
|
||||||
|
|
||||||
|
def executescript(self, sql):
|
||||||
|
with self:
|
||||||
|
# Use an explicit savepoint so that even if this is called
|
||||||
|
# while a transaction is active, it is atomic
|
||||||
|
return self.cursor().execute(sql)
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class DB(SchemaUpgrade):
|
||||||
|
|
||||||
|
PATH_LIMIT = 40 if iswindows else 100
|
||||||
|
WINDOWS_LIBRARY_PATH_LIMIT = 75
|
||||||
|
|
||||||
|
# Initialize database {{{
|
||||||
|
|
||||||
|
def __init__(self, library_path, default_prefs=None, read_only=False):
|
||||||
|
try:
|
||||||
|
if isbytestring(library_path):
|
||||||
|
library_path = library_path.decode(filesystem_encoding)
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
self.field_metadata = FieldMetadata()
|
||||||
|
|
||||||
|
self.library_path = os.path.abspath(library_path)
|
||||||
|
self.dbpath = os.path.join(library_path, 'metadata.db')
|
||||||
|
self.dbpath = os.environ.get('CALIBRE_OVERRIDE_DATABASE_PATH',
|
||||||
|
self.dbpath)
|
||||||
|
|
||||||
|
if iswindows and len(self.library_path) + 4*self.PATH_LIMIT + 10 > 259:
|
||||||
|
raise ValueError(_(
|
||||||
|
'Path to library too long. Must be less than'
|
||||||
|
' %d characters.')%(259-4*self.PATH_LIMIT-10))
|
||||||
|
exists = self._exists = os.path.exists(self.dbpath)
|
||||||
|
if not exists:
|
||||||
|
# Be more strict when creating new libraries as the old calculation
|
||||||
|
# allowed for max path lengths of 265 chars.
|
||||||
|
if (iswindows and len(self.library_path) >
|
||||||
|
self.WINDOWS_LIBRARY_PATH_LIMIT):
|
||||||
|
raise ValueError(_(
|
||||||
|
'Path to library too long. Must be less than'
|
||||||
|
' %d characters.')%self.WINDOWS_LIBRARY_PATH_LIMIT)
|
||||||
|
|
||||||
|
if read_only and os.path.exists(self.dbpath):
|
||||||
|
# Work on only a copy of metadata.db to ensure that
|
||||||
|
# metadata.db is not changed
|
||||||
|
pt = PersistentTemporaryFile('_metadata_ro.db')
|
||||||
|
pt.close()
|
||||||
|
shutil.copyfile(self.dbpath, pt.name)
|
||||||
|
self.dbpath = pt.name
|
||||||
|
|
||||||
|
self.is_case_sensitive = (not iswindows and
|
||||||
|
not os.path.exists(self.dbpath.replace('metadata.db',
|
||||||
|
'MeTAdAtA.dB')))
|
||||||
|
|
||||||
|
self._conn = None
|
||||||
|
|
||||||
|
if self.user_version == 0:
|
||||||
|
self.initialize_database()
|
||||||
|
|
||||||
|
SchemaUpgrade.__init__(self)
|
||||||
|
# Guarantee that the library_id is set
|
||||||
|
self.library_id
|
||||||
|
|
||||||
|
self.initialize_prefs(default_prefs)
|
||||||
|
|
||||||
|
# Fix legacy triggers and columns
|
||||||
|
self.conn.executescript('''
|
||||||
|
DROP TRIGGER IF EXISTS author_insert_trg;
|
||||||
|
CREATE TEMP TRIGGER author_insert_trg
|
||||||
|
AFTER INSERT ON authors
|
||||||
|
BEGIN
|
||||||
|
UPDATE authors SET sort=author_to_author_sort(NEW.name) WHERE id=NEW.id;
|
||||||
|
END;
|
||||||
|
DROP TRIGGER IF EXISTS author_update_trg;
|
||||||
|
CREATE TEMP TRIGGER author_update_trg
|
||||||
|
BEFORE UPDATE ON authors
|
||||||
|
BEGIN
|
||||||
|
UPDATE authors SET sort=author_to_author_sort(NEW.name)
|
||||||
|
WHERE id=NEW.id AND name <> NEW.name;
|
||||||
|
END;
|
||||||
|
UPDATE authors SET sort=author_to_author_sort(name) WHERE sort IS NULL;
|
||||||
|
''')
|
||||||
|
|
||||||
|
def initialize_prefs(self, default_prefs):
|
||||||
|
self.prefs = DBPrefs(self)
|
||||||
|
|
||||||
|
if default_prefs is not None and not self._exists:
|
||||||
|
# Only apply default prefs to a new database
|
||||||
|
for key in default_prefs:
|
||||||
|
# be sure that prefs not to be copied are listed below
|
||||||
|
if key not in frozenset(['news_to_be_synced']):
|
||||||
|
self.prefs[key] = default_prefs[key]
|
||||||
|
if 'field_metadata' in default_prefs:
|
||||||
|
fmvals = [f for f in default_prefs['field_metadata'].values()
|
||||||
|
if f['is_custom']]
|
||||||
|
for f in fmvals:
|
||||||
|
self.create_custom_column(f['label'], f['name'],
|
||||||
|
f['datatype'], f['is_multiple'] is not None,
|
||||||
|
f['is_editable'], f['display'])
|
||||||
|
|
||||||
|
defs = self.prefs.defaults
|
||||||
|
defs['gui_restriction'] = defs['cs_restriction'] = ''
|
||||||
|
defs['categories_using_hierarchy'] = []
|
||||||
|
defs['column_color_rules'] = []
|
||||||
|
|
||||||
|
# Migrate the bool tristate tweak
|
||||||
|
defs['bools_are_tristate'] = \
|
||||||
|
tweaks.get('bool_custom_columns_are_tristate', 'yes') == 'yes'
|
||||||
|
if self.prefs.get('bools_are_tristate') is None:
|
||||||
|
self.prefs.set('bools_are_tristate', defs['bools_are_tristate'])
|
||||||
|
|
||||||
|
# Migrate column coloring rules
|
||||||
|
if self.prefs.get('column_color_name_1', None) is not None:
|
||||||
|
from calibre.library.coloring import migrate_old_rule
|
||||||
|
old_rules = []
|
||||||
|
for i in range(1, 6):
|
||||||
|
col = self.prefs.get('column_color_name_'+str(i), None)
|
||||||
|
templ = self.prefs.get('column_color_template_'+str(i), None)
|
||||||
|
if col and templ:
|
||||||
|
try:
|
||||||
|
del self.prefs['column_color_name_'+str(i)]
|
||||||
|
rules = migrate_old_rule(self.field_metadata, templ)
|
||||||
|
for templ in rules:
|
||||||
|
old_rules.append((col, templ))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
if old_rules:
|
||||||
|
self.prefs['column_color_rules'] += old_rules
|
||||||
|
|
||||||
|
# Migrate saved search and user categories to db preference scheme
|
||||||
|
def migrate_preference(key, default):
|
||||||
|
oldval = prefs[key]
|
||||||
|
if oldval != default:
|
||||||
|
self.prefs[key] = oldval
|
||||||
|
prefs[key] = default
|
||||||
|
if key not in self.prefs:
|
||||||
|
self.prefs[key] = default
|
||||||
|
|
||||||
|
migrate_preference('user_categories', {})
|
||||||
|
migrate_preference('saved_searches', {})
|
||||||
|
|
||||||
|
# migrate grouped_search_terms
|
||||||
|
if self.prefs.get('grouped_search_terms', None) is None:
|
||||||
|
try:
|
||||||
|
ogst = tweaks.get('grouped_search_terms', {})
|
||||||
|
ngst = {}
|
||||||
|
for t in ogst:
|
||||||
|
ngst[icu_lower(t)] = ogst[t]
|
||||||
|
self.prefs.set('grouped_search_terms', ngst)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Rename any user categories with names that differ only in case
|
||||||
|
user_cats = self.prefs.get('user_categories', [])
|
||||||
|
catmap = {}
|
||||||
|
for uc in user_cats:
|
||||||
|
ucl = icu_lower(uc)
|
||||||
|
if ucl not in catmap:
|
||||||
|
catmap[ucl] = []
|
||||||
|
catmap[ucl].append(uc)
|
||||||
|
cats_changed = False
|
||||||
|
for uc in catmap:
|
||||||
|
if len(catmap[uc]) > 1:
|
||||||
|
prints('found user category case overlap', catmap[uc])
|
||||||
|
cat = catmap[uc][0]
|
||||||
|
suffix = 1
|
||||||
|
while icu_lower((cat + unicode(suffix))) in catmap:
|
||||||
|
suffix += 1
|
||||||
|
prints('Renaming user category %s to %s'%(cat, cat+unicode(suffix)))
|
||||||
|
user_cats[cat + unicode(suffix)] = user_cats[cat]
|
||||||
|
del user_cats[cat]
|
||||||
|
cats_changed = True
|
||||||
|
if cats_changed:
|
||||||
|
self.prefs.set('user_categories', user_cats)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def conn(self):
|
||||||
|
if self._conn is None:
|
||||||
|
self._conn = apsw.Connection(self.dbpath)
|
||||||
|
if self._exists and self.user_version == 0:
|
||||||
|
self._conn.close()
|
||||||
|
os.remove(self.dbpath)
|
||||||
|
self._conn = apsw.Connection(self.dbpath)
|
||||||
|
return self._conn
|
||||||
|
|
||||||
|
@dynamic_property
|
||||||
|
def user_version(self):
|
||||||
|
doc = 'The user version of this database'
|
||||||
|
|
||||||
|
def fget(self):
|
||||||
|
return self.conn.get('pragma user_version;', all=False)
|
||||||
|
|
||||||
|
def fset(self, val):
|
||||||
|
self.conn.execute('pragma user_version=%d'%int(val))
|
||||||
|
|
||||||
|
return property(doc=doc, fget=fget, fset=fset)
|
||||||
|
|
||||||
|
def initialize_database(self):
|
||||||
|
metadata_sqlite = P('metadata_sqlite.sql', data=True,
|
||||||
|
allow_user_override=False).decode('utf-8')
|
||||||
|
self.conn.executescript(metadata_sqlite)
|
||||||
|
if self.user_version == 0:
|
||||||
|
self.user_version = 1
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Database layer API {{{
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def exists_at(cls, path):
|
||||||
|
return path and os.path.exists(os.path.join(path, 'metadata.db'))
|
||||||
|
|
||||||
|
@dynamic_property
|
||||||
|
def library_id(self):
|
||||||
|
doc = ('The UUID for this library. As long as the user only operates'
|
||||||
|
' on libraries with calibre, it will be unique')
|
||||||
|
|
||||||
|
def fget(self):
|
||||||
|
if getattr(self, '_library_id_', None) is None:
|
||||||
|
ans = self.conn.get('SELECT uuid FROM library_id', all=False)
|
||||||
|
if ans is None:
|
||||||
|
ans = str(uuid.uuid4())
|
||||||
|
self.library_id = ans
|
||||||
|
else:
|
||||||
|
self._library_id_ = ans
|
||||||
|
return self._library_id_
|
||||||
|
|
||||||
|
def fset(self, val):
|
||||||
|
self._library_id_ = unicode(val)
|
||||||
|
self.conn.execute('''
|
||||||
|
DELETE FROM library_id;
|
||||||
|
INSERT INTO library_id (uuid) VALUES (?);
|
||||||
|
''', self._library_id_)
|
||||||
|
|
||||||
|
return property(doc=doc, fget=fget, fset=fset)
|
||||||
|
|
||||||
|
def last_modified(self):
|
||||||
|
''' Return last modified time as a UTC datetime object '''
|
||||||
|
return utcfromtimestamp(os.stat(self.dbpath).st_mtime)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user