mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Start work on new db backend
This commit is contained in:
parent
15d8272efe
commit
50dadb45cf
67
src/calibre/db/__init__.py
Normal file
67
src/calibre/db/__init__.py
Normal file
@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
'''
|
||||
Rewrite of the calibre database backend.
|
||||
|
||||
Broad Objectives:
|
||||
|
||||
* Use the sqlite db only as a datastore. i.e. do not do
|
||||
sorting/searching/concatenation or anything else in sqlite. Instead
|
||||
mirror the sqlite tables in memory, create caches and lookup maps from
|
||||
them and create a set_* API that updates the memory caches and the sqlite
|
||||
correctly.
|
||||
|
||||
* Move from keeping a list of books in memory as a cache to a per table
|
||||
cache. This allows much faster search and sort operations at the expense
|
||||
of slightly slower lookup operations. That slowdown can be mitigated by
|
||||
keeping lots of maps and updating them in the set_* API. Also
|
||||
get_categories becomes blazingly fast.
|
||||
|
||||
* Separate the database layer from the cache layer more cleanly. Rather
|
||||
than having the db layer refer to the cache layer and vice versa, the
|
||||
cache layer will refer to the db layer only and the new API will be
|
||||
defined on the cache layer.
|
||||
|
||||
* Get rid of index_is_id and other poor design decisions
|
||||
|
||||
* Minimize the API as much as possible and define it cleanly
|
||||
|
||||
* Do not change the on disk format of metadata.db at all (this is for
|
||||
backwards compatibility)
|
||||
|
||||
* Get rid of the need for a separate db access thread by switching to apsw
|
||||
to access sqlite, which is thread safe
|
||||
|
||||
* The new API will have methods to efficiently do bulk operations and will
|
||||
use shared/exclusive/pending locks to serialize access to the in-mem data
|
||||
structures. Use the same locking scheme as sqlite itself does.
|
||||
|
||||
How this will proceed:
|
||||
|
||||
1. Create the new API
|
||||
2. Create a test suite for it
|
||||
3. Write a replacement for LibraryDatabase2 that uses the new API
|
||||
internally
|
||||
4. Lots of testing of calibre with the new LibraryDatabase2
|
||||
5. Gradually migrate code to use the (much faster) new api wherever possible (the new api
|
||||
will be exposed via db.new_api)
|
||||
|
||||
I plan to work on this slowly, in parallel to normal calibre development
|
||||
work.
|
||||
|
||||
Various things that require other things before they can be migrated:
|
||||
1. From initialize_dynamic(): set_saved_searches,
|
||||
load_user_template_functions. Also add custom
|
||||
columns/categories/searches info into
|
||||
self.field_metadata. Finally, implement metadata dirtied
|
||||
functionality.
|
||||
|
||||
'''
|
404
src/calibre/db/backend.py
Normal file
404
src/calibre/db/backend.py
Normal file
@ -0,0 +1,404 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
# Imports {{{
|
||||
import os, shutil, uuid, json
|
||||
from functools import partial
|
||||
|
||||
import apsw
|
||||
|
||||
from calibre import isbytestring, force_unicode, prints
|
||||
from calibre.constants import (iswindows, filesystem_encoding,
|
||||
preferred_encoding)
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.library.schema_upgrades import SchemaUpgrade
|
||||
from calibre.library.field_metadata import FieldMetadata
|
||||
from calibre.ebooks.metadata import title_sort, author_to_author_sort
|
||||
from calibre.utils.icu import strcmp
|
||||
from calibre.utils.config import to_json, from_json, prefs, tweaks
|
||||
from calibre.utils.date import utcfromtimestamp
|
||||
# }}}
|
||||
|
||||
'''
|
||||
Differences in semantics from pysqlite:
|
||||
|
||||
1. execute/executemany/executescript operate in autocommit mode
|
||||
|
||||
'''
|
||||
|
||||
class DynamicFilter(object): # {{{
|
||||
|
||||
'No longer used, present for legacy compatibility'
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
self.ids = frozenset([])
|
||||
|
||||
def __call__(self, id_):
|
||||
return int(id_ in self.ids)
|
||||
|
||||
def change(self, ids):
|
||||
self.ids = frozenset(ids)
|
||||
# }}}
|
||||
|
||||
class DBPrefs(dict): # {{{
|
||||
|
||||
'Store preferences as key:value pairs in the db'
|
||||
|
||||
def __init__(self, db):
|
||||
dict.__init__(self)
|
||||
self.db = db
|
||||
self.defaults = {}
|
||||
self.disable_setting = False
|
||||
for key, val in self.db.conn.get('SELECT key,val FROM preferences'):
|
||||
try:
|
||||
val = self.raw_to_object(val)
|
||||
except:
|
||||
prints('Failed to read value for:', key, 'from db')
|
||||
continue
|
||||
dict.__setitem__(self, key, val)
|
||||
|
||||
def raw_to_object(self, raw):
|
||||
if not isinstance(raw, unicode):
|
||||
raw = raw.decode(preferred_encoding)
|
||||
return json.loads(raw, object_hook=from_json)
|
||||
|
||||
def to_raw(self, val):
|
||||
return json.dumps(val, indent=2, default=to_json)
|
||||
|
||||
def __getitem__(self, key):
|
||||
try:
|
||||
return dict.__getitem__(self, key)
|
||||
except KeyError:
|
||||
return self.defaults[key]
|
||||
|
||||
def __delitem__(self, key):
|
||||
dict.__delitem__(self, key)
|
||||
self.db.conn.execute('DELETE FROM preferences WHERE key=?', (key,))
|
||||
|
||||
def __setitem__(self, key, val):
|
||||
if self.disable_setting:
|
||||
return
|
||||
raw = self.to_raw(val)
|
||||
self.db.conn.execute('INSERT OR REPLACE INTO preferences (key,val) VALUES (?,?)', (key,
|
||||
raw))
|
||||
dict.__setitem__(self, key, val)
|
||||
|
||||
def set(self, key, val):
|
||||
self.__setitem__(key, val)
|
||||
|
||||
# }}}
|
||||
|
||||
# Extra collators {{{
|
||||
def pynocase(one, two, encoding='utf-8'):
|
||||
if isbytestring(one):
|
||||
try:
|
||||
one = one.decode(encoding, 'replace')
|
||||
except:
|
||||
pass
|
||||
if isbytestring(two):
|
||||
try:
|
||||
two = two.decode(encoding, 'replace')
|
||||
except:
|
||||
pass
|
||||
return cmp(one.lower(), two.lower())
|
||||
|
||||
def _author_to_author_sort(x):
|
||||
if not x: return ''
|
||||
return author_to_author_sort(x.replace('|', ','))
|
||||
|
||||
def icu_collator(s1, s2):
|
||||
return strcmp(force_unicode(s1, 'utf-8'), force_unicode(s2, 'utf-8'))
|
||||
# }}}
|
||||
|
||||
class Connection(apsw.Connection): # {{{
|
||||
|
||||
BUSY_TIMEOUT = 2000 # milliseconds
|
||||
|
||||
def __init__(self, path):
|
||||
apsw.Connection.__init__(self, path)
|
||||
|
||||
self.setbusytimeout(self.BUSY_TIMEOUT)
|
||||
self.execute('pragma cache_size=5000')
|
||||
self.conn.execute('pragma temp_store=2')
|
||||
|
||||
encoding = self.execute('pragma encoding').fetchone()[0]
|
||||
self.conn.create_collation('PYNOCASE', partial(pynocase,
|
||||
encoding=encoding))
|
||||
|
||||
self.conn.create_function('title_sort', 1, title_sort)
|
||||
self.conn.create_function('author_to_author_sort', 1,
|
||||
_author_to_author_sort)
|
||||
|
||||
self.conn.create_function('uuid4', 0, lambda : str(uuid.uuid4()))
|
||||
|
||||
# Dummy functions for dynamically created filters
|
||||
self.conn.create_function('books_list_filter', 1, lambda x: 1)
|
||||
self.conn.create_collation('icucollate', icu_collator)
|
||||
|
||||
def create_dynamic_filter(self, name):
|
||||
f = DynamicFilter(name)
|
||||
self.conn.create_function(name, 1, f)
|
||||
|
||||
def get(self, *args, **kw):
|
||||
ans = self.cursor().execute(*args)
|
||||
if kw.get('all', True):
|
||||
return ans.fetchall()
|
||||
for row in ans:
|
||||
return ans[0]
|
||||
|
||||
def execute(self, sql, bindings=None):
|
||||
cursor = self.cursor()
|
||||
return cursor.execute(sql, bindings)
|
||||
|
||||
def executemany(self, sql, sequence_of_bindings):
|
||||
return self.cursor().executemany(sql, sequence_of_bindings)
|
||||
|
||||
def executescript(self, sql):
|
||||
with self:
|
||||
# Use an explicit savepoint so that even if this is called
|
||||
# while a transaction is active, it is atomic
|
||||
return self.cursor().execute(sql)
|
||||
# }}}
|
||||
|
||||
class DB(SchemaUpgrade):
|
||||
|
||||
PATH_LIMIT = 40 if iswindows else 100
|
||||
WINDOWS_LIBRARY_PATH_LIMIT = 75
|
||||
|
||||
# Initialize database {{{
|
||||
|
||||
def __init__(self, library_path, default_prefs=None, read_only=False):
|
||||
try:
|
||||
if isbytestring(library_path):
|
||||
library_path = library_path.decode(filesystem_encoding)
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
self.field_metadata = FieldMetadata()
|
||||
|
||||
self.library_path = os.path.abspath(library_path)
|
||||
self.dbpath = os.path.join(library_path, 'metadata.db')
|
||||
self.dbpath = os.environ.get('CALIBRE_OVERRIDE_DATABASE_PATH',
|
||||
self.dbpath)
|
||||
|
||||
if iswindows and len(self.library_path) + 4*self.PATH_LIMIT + 10 > 259:
|
||||
raise ValueError(_(
|
||||
'Path to library too long. Must be less than'
|
||||
' %d characters.')%(259-4*self.PATH_LIMIT-10))
|
||||
exists = self._exists = os.path.exists(self.dbpath)
|
||||
if not exists:
|
||||
# Be more strict when creating new libraries as the old calculation
|
||||
# allowed for max path lengths of 265 chars.
|
||||
if (iswindows and len(self.library_path) >
|
||||
self.WINDOWS_LIBRARY_PATH_LIMIT):
|
||||
raise ValueError(_(
|
||||
'Path to library too long. Must be less than'
|
||||
' %d characters.')%self.WINDOWS_LIBRARY_PATH_LIMIT)
|
||||
|
||||
if read_only and os.path.exists(self.dbpath):
|
||||
# Work on only a copy of metadata.db to ensure that
|
||||
# metadata.db is not changed
|
||||
pt = PersistentTemporaryFile('_metadata_ro.db')
|
||||
pt.close()
|
||||
shutil.copyfile(self.dbpath, pt.name)
|
||||
self.dbpath = pt.name
|
||||
|
||||
self.is_case_sensitive = (not iswindows and
|
||||
not os.path.exists(self.dbpath.replace('metadata.db',
|
||||
'MeTAdAtA.dB')))
|
||||
|
||||
self._conn = None
|
||||
|
||||
if self.user_version == 0:
|
||||
self.initialize_database()
|
||||
|
||||
SchemaUpgrade.__init__(self)
|
||||
# Guarantee that the library_id is set
|
||||
self.library_id
|
||||
|
||||
self.initialize_prefs(default_prefs)
|
||||
|
||||
# Fix legacy triggers and columns
|
||||
self.conn.executescript('''
|
||||
DROP TRIGGER IF EXISTS author_insert_trg;
|
||||
CREATE TEMP TRIGGER author_insert_trg
|
||||
AFTER INSERT ON authors
|
||||
BEGIN
|
||||
UPDATE authors SET sort=author_to_author_sort(NEW.name) WHERE id=NEW.id;
|
||||
END;
|
||||
DROP TRIGGER IF EXISTS author_update_trg;
|
||||
CREATE TEMP TRIGGER author_update_trg
|
||||
BEFORE UPDATE ON authors
|
||||
BEGIN
|
||||
UPDATE authors SET sort=author_to_author_sort(NEW.name)
|
||||
WHERE id=NEW.id AND name <> NEW.name;
|
||||
END;
|
||||
UPDATE authors SET sort=author_to_author_sort(name) WHERE sort IS NULL;
|
||||
''')
|
||||
|
||||
def initialize_prefs(self, default_prefs):
|
||||
self.prefs = DBPrefs(self)
|
||||
|
||||
if default_prefs is not None and not self._exists:
|
||||
# Only apply default prefs to a new database
|
||||
for key in default_prefs:
|
||||
# be sure that prefs not to be copied are listed below
|
||||
if key not in frozenset(['news_to_be_synced']):
|
||||
self.prefs[key] = default_prefs[key]
|
||||
if 'field_metadata' in default_prefs:
|
||||
fmvals = [f for f in default_prefs['field_metadata'].values()
|
||||
if f['is_custom']]
|
||||
for f in fmvals:
|
||||
self.create_custom_column(f['label'], f['name'],
|
||||
f['datatype'], f['is_multiple'] is not None,
|
||||
f['is_editable'], f['display'])
|
||||
|
||||
defs = self.prefs.defaults
|
||||
defs['gui_restriction'] = defs['cs_restriction'] = ''
|
||||
defs['categories_using_hierarchy'] = []
|
||||
defs['column_color_rules'] = []
|
||||
|
||||
# Migrate the bool tristate tweak
|
||||
defs['bools_are_tristate'] = \
|
||||
tweaks.get('bool_custom_columns_are_tristate', 'yes') == 'yes'
|
||||
if self.prefs.get('bools_are_tristate') is None:
|
||||
self.prefs.set('bools_are_tristate', defs['bools_are_tristate'])
|
||||
|
||||
# Migrate column coloring rules
|
||||
if self.prefs.get('column_color_name_1', None) is not None:
|
||||
from calibre.library.coloring import migrate_old_rule
|
||||
old_rules = []
|
||||
for i in range(1, 6):
|
||||
col = self.prefs.get('column_color_name_'+str(i), None)
|
||||
templ = self.prefs.get('column_color_template_'+str(i), None)
|
||||
if col and templ:
|
||||
try:
|
||||
del self.prefs['column_color_name_'+str(i)]
|
||||
rules = migrate_old_rule(self.field_metadata, templ)
|
||||
for templ in rules:
|
||||
old_rules.append((col, templ))
|
||||
except:
|
||||
pass
|
||||
if old_rules:
|
||||
self.prefs['column_color_rules'] += old_rules
|
||||
|
||||
# Migrate saved search and user categories to db preference scheme
|
||||
def migrate_preference(key, default):
|
||||
oldval = prefs[key]
|
||||
if oldval != default:
|
||||
self.prefs[key] = oldval
|
||||
prefs[key] = default
|
||||
if key not in self.prefs:
|
||||
self.prefs[key] = default
|
||||
|
||||
migrate_preference('user_categories', {})
|
||||
migrate_preference('saved_searches', {})
|
||||
|
||||
# migrate grouped_search_terms
|
||||
if self.prefs.get('grouped_search_terms', None) is None:
|
||||
try:
|
||||
ogst = tweaks.get('grouped_search_terms', {})
|
||||
ngst = {}
|
||||
for t in ogst:
|
||||
ngst[icu_lower(t)] = ogst[t]
|
||||
self.prefs.set('grouped_search_terms', ngst)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Rename any user categories with names that differ only in case
|
||||
user_cats = self.prefs.get('user_categories', [])
|
||||
catmap = {}
|
||||
for uc in user_cats:
|
||||
ucl = icu_lower(uc)
|
||||
if ucl not in catmap:
|
||||
catmap[ucl] = []
|
||||
catmap[ucl].append(uc)
|
||||
cats_changed = False
|
||||
for uc in catmap:
|
||||
if len(catmap[uc]) > 1:
|
||||
prints('found user category case overlap', catmap[uc])
|
||||
cat = catmap[uc][0]
|
||||
suffix = 1
|
||||
while icu_lower((cat + unicode(suffix))) in catmap:
|
||||
suffix += 1
|
||||
prints('Renaming user category %s to %s'%(cat, cat+unicode(suffix)))
|
||||
user_cats[cat + unicode(suffix)] = user_cats[cat]
|
||||
del user_cats[cat]
|
||||
cats_changed = True
|
||||
if cats_changed:
|
||||
self.prefs.set('user_categories', user_cats)
|
||||
|
||||
@property
|
||||
def conn(self):
|
||||
if self._conn is None:
|
||||
self._conn = apsw.Connection(self.dbpath)
|
||||
if self._exists and self.user_version == 0:
|
||||
self._conn.close()
|
||||
os.remove(self.dbpath)
|
||||
self._conn = apsw.Connection(self.dbpath)
|
||||
return self._conn
|
||||
|
||||
@dynamic_property
|
||||
def user_version(self):
|
||||
doc = 'The user version of this database'
|
||||
|
||||
def fget(self):
|
||||
return self.conn.get('pragma user_version;', all=False)
|
||||
|
||||
def fset(self, val):
|
||||
self.conn.execute('pragma user_version=%d'%int(val))
|
||||
|
||||
return property(doc=doc, fget=fget, fset=fset)
|
||||
|
||||
def initialize_database(self):
|
||||
metadata_sqlite = P('metadata_sqlite.sql', data=True,
|
||||
allow_user_override=False).decode('utf-8')
|
||||
self.conn.executescript(metadata_sqlite)
|
||||
if self.user_version == 0:
|
||||
self.user_version = 1
|
||||
# }}}
|
||||
|
||||
# Database layer API {{{
|
||||
|
||||
@classmethod
|
||||
def exists_at(cls, path):
|
||||
return path and os.path.exists(os.path.join(path, 'metadata.db'))
|
||||
|
||||
@dynamic_property
|
||||
def library_id(self):
|
||||
doc = ('The UUID for this library. As long as the user only operates'
|
||||
' on libraries with calibre, it will be unique')
|
||||
|
||||
def fget(self):
|
||||
if getattr(self, '_library_id_', None) is None:
|
||||
ans = self.conn.get('SELECT uuid FROM library_id', all=False)
|
||||
if ans is None:
|
||||
ans = str(uuid.uuid4())
|
||||
self.library_id = ans
|
||||
else:
|
||||
self._library_id_ = ans
|
||||
return self._library_id_
|
||||
|
||||
def fset(self, val):
|
||||
self._library_id_ = unicode(val)
|
||||
self.conn.execute('''
|
||||
DELETE FROM library_id;
|
||||
INSERT INTO library_id (uuid) VALUES (?);
|
||||
''', self._library_id_)
|
||||
|
||||
return property(doc=doc, fget=fget, fset=fset)
|
||||
|
||||
def last_modified(self):
|
||||
''' Return last modified time as a UTC datetime object '''
|
||||
return utcfromtimestamp(os.stat(self.dbpath).st_mtime)
|
||||
|
||||
# }}}
|
||||
|
Loading…
x
Reference in New Issue
Block a user