diff --git a/src/calibre/db/__init__.py b/src/calibre/db/__init__.py new file mode 100644 index 0000000000..4384cab2da --- /dev/null +++ b/src/calibre/db/__init__.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + + +''' +Rewrite of the calibre database backend. + +Broad Objectives: + + * Use the sqlite db only as a datastore. i.e. do not do + sorting/searching/concatenation or anything else in sqlite. Instead + mirror the sqlite tables in memory, create caches and lookup maps from + them and create a set_* API that updates the memory caches and the sqlite + correctly. + + * Move from keeping a list of books in memory as a cache to a per table + cache. This allows much faster search and sort operations at the expense + of slightly slower lookup operations. That slowdown can be mitigated by + keeping lots of maps and updating them in the set_* API. Also + get_categories becomes blazingly fast. + + * Separate the database layer from the cache layer more cleanly. Rather + than having the db layer refer to the cache layer and vice versa, the + cache layer will refer to the db layer only and the new API will be + defined on the cache layer. + + * Get rid of index_is_id and other poor design decisions + + * Minimize the API as much as possible and define it cleanly + + * Do not change the on disk format of metadata.db at all (this is for + backwards compatibility) + + * Get rid of the need for a separate db access thread by switching to apsw + to access sqlite, which is thread safe + + * The new API will have methods to efficiently do bulk operations and will + use shared/exclusive/pending locks to serialize access to the in-mem data + structures. Use the same locking scheme as sqlite itself does. + +How this will proceed: + + 1. Create the new API + 2. Create a test suite for it + 3. Write a replacement for LibraryDatabase2 that uses the new API + internally + 4. Lots of testing of calibre with the new LibraryDatabase2 + 5. Gradually migrate code to use the (much faster) new api wherever possible (the new api + will be exposed via db.new_api) + + I plan to work on this slowly, in parallel to normal calibre development + work. + +Various things that require other things before they can be migrated: + 1. From initialize_dynamic(): set_saved_searches, + load_user_template_functions. Also add custom + columns/categories/searches info into + self.field_metadata. Finally, implement metadata dirtied + functionality. + +''' diff --git a/src/calibre/db/backend.py b/src/calibre/db/backend.py new file mode 100644 index 0000000000..4e6c028b93 --- /dev/null +++ b/src/calibre/db/backend.py @@ -0,0 +1,404 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2011, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +# Imports {{{ +import os, shutil, uuid, json +from functools import partial + +import apsw + +from calibre import isbytestring, force_unicode, prints +from calibre.constants import (iswindows, filesystem_encoding, + preferred_encoding) +from calibre.ptempfile import PersistentTemporaryFile +from calibre.library.schema_upgrades import SchemaUpgrade +from calibre.library.field_metadata import FieldMetadata +from calibre.ebooks.metadata import title_sort, author_to_author_sort +from calibre.utils.icu import strcmp +from calibre.utils.config import to_json, from_json, prefs, tweaks +from calibre.utils.date import utcfromtimestamp +# }}} + +''' +Differences in semantics from pysqlite: + + 1. execute/executemany/executescript operate in autocommit mode + +''' + +class DynamicFilter(object): # {{{ + + 'No longer used, present for legacy compatibility' + + def __init__(self, name): + self.name = name + self.ids = frozenset([]) + + def __call__(self, id_): + return int(id_ in self.ids) + + def change(self, ids): + self.ids = frozenset(ids) +# }}} + +class DBPrefs(dict): # {{{ + + 'Store preferences as key:value pairs in the db' + + def __init__(self, db): + dict.__init__(self) + self.db = db + self.defaults = {} + self.disable_setting = False + for key, val in self.db.conn.get('SELECT key,val FROM preferences'): + try: + val = self.raw_to_object(val) + except: + prints('Failed to read value for:', key, 'from db') + continue + dict.__setitem__(self, key, val) + + def raw_to_object(self, raw): + if not isinstance(raw, unicode): + raw = raw.decode(preferred_encoding) + return json.loads(raw, object_hook=from_json) + + def to_raw(self, val): + return json.dumps(val, indent=2, default=to_json) + + def __getitem__(self, key): + try: + return dict.__getitem__(self, key) + except KeyError: + return self.defaults[key] + + def __delitem__(self, key): + dict.__delitem__(self, key) + self.db.conn.execute('DELETE FROM preferences WHERE key=?', (key,)) + + def __setitem__(self, key, val): + if self.disable_setting: + return + raw = self.to_raw(val) + self.db.conn.execute('INSERT OR REPLACE INTO preferences (key,val) VALUES (?,?)', (key, + raw)) + dict.__setitem__(self, key, val) + + def set(self, key, val): + self.__setitem__(key, val) + +# }}} + +# Extra collators {{{ +def pynocase(one, two, encoding='utf-8'): + if isbytestring(one): + try: + one = one.decode(encoding, 'replace') + except: + pass + if isbytestring(two): + try: + two = two.decode(encoding, 'replace') + except: + pass + return cmp(one.lower(), two.lower()) + +def _author_to_author_sort(x): + if not x: return '' + return author_to_author_sort(x.replace('|', ',')) + +def icu_collator(s1, s2): + return strcmp(force_unicode(s1, 'utf-8'), force_unicode(s2, 'utf-8')) +# }}} + +class Connection(apsw.Connection): # {{{ + + BUSY_TIMEOUT = 2000 # milliseconds + + def __init__(self, path): + apsw.Connection.__init__(self, path) + + self.setbusytimeout(self.BUSY_TIMEOUT) + self.execute('pragma cache_size=5000') + self.conn.execute('pragma temp_store=2') + + encoding = self.execute('pragma encoding').fetchone()[0] + self.conn.create_collation('PYNOCASE', partial(pynocase, + encoding=encoding)) + + self.conn.create_function('title_sort', 1, title_sort) + self.conn.create_function('author_to_author_sort', 1, + _author_to_author_sort) + + self.conn.create_function('uuid4', 0, lambda : str(uuid.uuid4())) + + # Dummy functions for dynamically created filters + self.conn.create_function('books_list_filter', 1, lambda x: 1) + self.conn.create_collation('icucollate', icu_collator) + + def create_dynamic_filter(self, name): + f = DynamicFilter(name) + self.conn.create_function(name, 1, f) + + def get(self, *args, **kw): + ans = self.cursor().execute(*args) + if kw.get('all', True): + return ans.fetchall() + for row in ans: + return ans[0] + + def execute(self, sql, bindings=None): + cursor = self.cursor() + return cursor.execute(sql, bindings) + + def executemany(self, sql, sequence_of_bindings): + return self.cursor().executemany(sql, sequence_of_bindings) + + def executescript(self, sql): + with self: + # Use an explicit savepoint so that even if this is called + # while a transaction is active, it is atomic + return self.cursor().execute(sql) +# }}} + +class DB(SchemaUpgrade): + + PATH_LIMIT = 40 if iswindows else 100 + WINDOWS_LIBRARY_PATH_LIMIT = 75 + + # Initialize database {{{ + + def __init__(self, library_path, default_prefs=None, read_only=False): + try: + if isbytestring(library_path): + library_path = library_path.decode(filesystem_encoding) + except: + import traceback + traceback.print_exc() + + self.field_metadata = FieldMetadata() + + self.library_path = os.path.abspath(library_path) + self.dbpath = os.path.join(library_path, 'metadata.db') + self.dbpath = os.environ.get('CALIBRE_OVERRIDE_DATABASE_PATH', + self.dbpath) + + if iswindows and len(self.library_path) + 4*self.PATH_LIMIT + 10 > 259: + raise ValueError(_( + 'Path to library too long. Must be less than' + ' %d characters.')%(259-4*self.PATH_LIMIT-10)) + exists = self._exists = os.path.exists(self.dbpath) + if not exists: + # Be more strict when creating new libraries as the old calculation + # allowed for max path lengths of 265 chars. + if (iswindows and len(self.library_path) > + self.WINDOWS_LIBRARY_PATH_LIMIT): + raise ValueError(_( + 'Path to library too long. Must be less than' + ' %d characters.')%self.WINDOWS_LIBRARY_PATH_LIMIT) + + if read_only and os.path.exists(self.dbpath): + # Work on only a copy of metadata.db to ensure that + # metadata.db is not changed + pt = PersistentTemporaryFile('_metadata_ro.db') + pt.close() + shutil.copyfile(self.dbpath, pt.name) + self.dbpath = pt.name + + self.is_case_sensitive = (not iswindows and + not os.path.exists(self.dbpath.replace('metadata.db', + 'MeTAdAtA.dB'))) + + self._conn = None + + if self.user_version == 0: + self.initialize_database() + + SchemaUpgrade.__init__(self) + # Guarantee that the library_id is set + self.library_id + + self.initialize_prefs(default_prefs) + + # Fix legacy triggers and columns + self.conn.executescript(''' + DROP TRIGGER IF EXISTS author_insert_trg; + CREATE TEMP TRIGGER author_insert_trg + AFTER INSERT ON authors + BEGIN + UPDATE authors SET sort=author_to_author_sort(NEW.name) WHERE id=NEW.id; + END; + DROP TRIGGER IF EXISTS author_update_trg; + CREATE TEMP TRIGGER author_update_trg + BEFORE UPDATE ON authors + BEGIN + UPDATE authors SET sort=author_to_author_sort(NEW.name) + WHERE id=NEW.id AND name <> NEW.name; + END; + UPDATE authors SET sort=author_to_author_sort(name) WHERE sort IS NULL; + ''') + + def initialize_prefs(self, default_prefs): + self.prefs = DBPrefs(self) + + if default_prefs is not None and not self._exists: + # Only apply default prefs to a new database + for key in default_prefs: + # be sure that prefs not to be copied are listed below + if key not in frozenset(['news_to_be_synced']): + self.prefs[key] = default_prefs[key] + if 'field_metadata' in default_prefs: + fmvals = [f for f in default_prefs['field_metadata'].values() + if f['is_custom']] + for f in fmvals: + self.create_custom_column(f['label'], f['name'], + f['datatype'], f['is_multiple'] is not None, + f['is_editable'], f['display']) + + defs = self.prefs.defaults + defs['gui_restriction'] = defs['cs_restriction'] = '' + defs['categories_using_hierarchy'] = [] + defs['column_color_rules'] = [] + + # Migrate the bool tristate tweak + defs['bools_are_tristate'] = \ + tweaks.get('bool_custom_columns_are_tristate', 'yes') == 'yes' + if self.prefs.get('bools_are_tristate') is None: + self.prefs.set('bools_are_tristate', defs['bools_are_tristate']) + + # Migrate column coloring rules + if self.prefs.get('column_color_name_1', None) is not None: + from calibre.library.coloring import migrate_old_rule + old_rules = [] + for i in range(1, 6): + col = self.prefs.get('column_color_name_'+str(i), None) + templ = self.prefs.get('column_color_template_'+str(i), None) + if col and templ: + try: + del self.prefs['column_color_name_'+str(i)] + rules = migrate_old_rule(self.field_metadata, templ) + for templ in rules: + old_rules.append((col, templ)) + except: + pass + if old_rules: + self.prefs['column_color_rules'] += old_rules + + # Migrate saved search and user categories to db preference scheme + def migrate_preference(key, default): + oldval = prefs[key] + if oldval != default: + self.prefs[key] = oldval + prefs[key] = default + if key not in self.prefs: + self.prefs[key] = default + + migrate_preference('user_categories', {}) + migrate_preference('saved_searches', {}) + + # migrate grouped_search_terms + if self.prefs.get('grouped_search_terms', None) is None: + try: + ogst = tweaks.get('grouped_search_terms', {}) + ngst = {} + for t in ogst: + ngst[icu_lower(t)] = ogst[t] + self.prefs.set('grouped_search_terms', ngst) + except: + pass + + # Rename any user categories with names that differ only in case + user_cats = self.prefs.get('user_categories', []) + catmap = {} + for uc in user_cats: + ucl = icu_lower(uc) + if ucl not in catmap: + catmap[ucl] = [] + catmap[ucl].append(uc) + cats_changed = False + for uc in catmap: + if len(catmap[uc]) > 1: + prints('found user category case overlap', catmap[uc]) + cat = catmap[uc][0] + suffix = 1 + while icu_lower((cat + unicode(suffix))) in catmap: + suffix += 1 + prints('Renaming user category %s to %s'%(cat, cat+unicode(suffix))) + user_cats[cat + unicode(suffix)] = user_cats[cat] + del user_cats[cat] + cats_changed = True + if cats_changed: + self.prefs.set('user_categories', user_cats) + + @property + def conn(self): + if self._conn is None: + self._conn = apsw.Connection(self.dbpath) + if self._exists and self.user_version == 0: + self._conn.close() + os.remove(self.dbpath) + self._conn = apsw.Connection(self.dbpath) + return self._conn + + @dynamic_property + def user_version(self): + doc = 'The user version of this database' + + def fget(self): + return self.conn.get('pragma user_version;', all=False) + + def fset(self, val): + self.conn.execute('pragma user_version=%d'%int(val)) + + return property(doc=doc, fget=fget, fset=fset) + + def initialize_database(self): + metadata_sqlite = P('metadata_sqlite.sql', data=True, + allow_user_override=False).decode('utf-8') + self.conn.executescript(metadata_sqlite) + if self.user_version == 0: + self.user_version = 1 + # }}} + + # Database layer API {{{ + + @classmethod + def exists_at(cls, path): + return path and os.path.exists(os.path.join(path, 'metadata.db')) + + @dynamic_property + def library_id(self): + doc = ('The UUID for this library. As long as the user only operates' + ' on libraries with calibre, it will be unique') + + def fget(self): + if getattr(self, '_library_id_', None) is None: + ans = self.conn.get('SELECT uuid FROM library_id', all=False) + if ans is None: + ans = str(uuid.uuid4()) + self.library_id = ans + else: + self._library_id_ = ans + return self._library_id_ + + def fset(self, val): + self._library_id_ = unicode(val) + self.conn.execute(''' + DELETE FROM library_id; + INSERT INTO library_id (uuid) VALUES (?); + ''', self._library_id_) + + return property(doc=doc, fget=fget, fset=fset) + + def last_modified(self): + ''' Return last modified time as a UTC datetime object ''' + return utcfromtimestamp(os.stat(self.dbpath).st_mtime) + + # }}} +