From ca3f9b841d0aaa51a0705aaa9fe9187b736fb52f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 25 Nov 2010 22:30:40 -0700 Subject: [PATCH] Implement high performance C module for sqlite custom aggregators. To enable edit line 153 of library.sqlite. In my testing showed between 1 and 5% decrease in startup time (varies depending on how many multi-author books in the library). Disabled for now, pending more testing --- setup/build_environment.py | 2 + setup/extensions.py | 7 +- setup/installer/windows/notes.rst | 6 + src/calibre/library/sqlite.py | 32 ++++- src/calibre/library/sqlite_custom.c | 173 ++++++++++++++++++++++++++++ 5 files changed, 216 insertions(+), 4 deletions(-) create mode 100644 src/calibre/library/sqlite_custom.c diff --git a/setup/build_environment.py b/setup/build_environment.py index 6c4cf04479..c021ebc6a6 100644 --- a/setup/build_environment.py +++ b/setup/build_environment.py @@ -90,11 +90,13 @@ fc_lib = '/usr/lib' podofo_inc = '/usr/include/podofo' podofo_lib = '/usr/lib' chmlib_inc_dirs = chmlib_lib_dirs = [] +sqlite_inc_dirs = [] if iswindows: prefix = r'C:\cygwin\home\kovid\sw' sw_inc_dir = os.path.join(prefix, 'include') sw_lib_dir = os.path.join(prefix, 'lib') + sqlite_inc_dirs = [sw_inc_dir] fc_inc = os.path.join(sw_inc_dir, 'fontconfig') fc_lib = sw_lib_dir chmlib_inc_dirs = consolidate('CHMLIB_INC_DIR', os.path.join(prefix, diff --git a/setup/extensions.py b/setup/extensions.py index f68a35974e..d4ac8e188c 100644 --- a/setup/extensions.py +++ b/setup/extensions.py @@ -18,7 +18,7 @@ from setup.build_environment import fc_inc, fc_lib, chmlib_inc_dirs, \ QMAKE, msvc, MT, win_inc, win_lib, png_inc_dirs, win_ddk, \ magick_inc_dirs, magick_lib_dirs, png_lib_dirs, png_libs, \ magick_error, magick_libs, ft_lib_dirs, ft_libs, jpg_libs, \ - jpg_lib_dirs, chmlib_lib_dirs + jpg_lib_dirs, chmlib_lib_dirs, sqlite_inc_dirs MT isunix = islinux or isosx or isfreebsd @@ -58,6 +58,11 @@ if iswindows: extensions = [ + Extension('sqlite_custom', + ['calibre/library/sqlite_custom.c'], + inc_dirs=sqlite_inc_dirs + ), + Extension('chmlib', ['calibre/utils/chm/swig_chm.c'], libraries=['ChmLib' if iswindows else 'chm'], diff --git a/setup/installer/windows/notes.rst b/setup/installer/windows/notes.rst index 45aa4d2afb..c45cd4cfc9 100644 --- a/setup/installer/windows/notes.rst +++ b/setup/installer/windows/notes.rst @@ -32,6 +32,12 @@ Run the following command to install python dependencies:: Install BeautifulSoup 3.0.x manually into site-packages (3.1.x parses broken HTML very poorly) + +SQLite +--------- + +Put sqlite3*.h from the sqlite windows amlgamation in ~/sw/include + Qt -------- diff --git a/src/calibre/library/sqlite.py b/src/calibre/library/sqlite.py index eb3302086d..94338433a5 100644 --- a/src/calibre/library/sqlite.py +++ b/src/calibre/library/sqlite.py @@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en' Wrapper for multi-threaded access to a single sqlite database connection. Serializes all calls. ''' -import sqlite3 as sqlite, traceback, time, uuid +import sqlite3 as sqlite, traceback, time, uuid, sys, os from sqlite3 import IntegrityError, OperationalError from threading import Thread from Queue import Queue @@ -19,6 +19,7 @@ from calibre.ebooks.metadata import title_sort, author_to_author_sort from calibre.utils.config import tweaks from calibre.utils.date import parse_date, isoformat from calibre import isbytestring +from calibre.constants import iswindows, DEBUG global_lock = RLock() @@ -114,6 +115,22 @@ def pynocase(one, two, encoding='utf-8'): pass return cmp(one.lower(), two.lower()) + +def load_c_extensions(conn, debug=DEBUG): + try: + conn.enable_load_extension(True) + ext_path = os.path.join(sys.extensions_location, 'sqlite_custom.'+ + ('pyd' if iswindows else 'so')) + conn.load_extension(ext_path) + conn.enable_load_extension(False) + return True + except Exception, e: + if debug: + print 'Failed to load high performance sqlite C extension' + print e + return False + + class DBThread(Thread): CLOSE = '-------close---------' @@ -131,11 +148,14 @@ class DBThread(Thread): def connect(self): self.conn = sqlite.connect(self.path, factory=Connection, detect_types=sqlite.PARSE_DECLTYPES|sqlite.PARSE_COLNAMES) + self.conn.execute('pragma cache_size=5000') encoding = self.conn.execute('pragma encoding').fetchone()[0] + c_ext_loaded = False #load_c_extensions(self.conn) self.conn.row_factory = sqlite.Row if self.row_factory else lambda cursor, row : list(row) self.conn.create_aggregate('concat', 1, Concatenate) - self.conn.create_aggregate('sortconcat', 2, SortedConcatenate) - self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate) + if not c_ext_loaded: + self.conn.create_aggregate('sortconcat', 2, SortedConcatenate) + self.conn.create_aggregate('sort_concat', 2, SafeSortedConcatenate) self.conn.create_collation('PYNOCASE', partial(pynocase, encoding=encoding)) if tweaks['title_series_sorting'] == 'strictly_alphabetic': @@ -263,3 +283,9 @@ def connect(dbpath, row_factory=None): if conn.proxy.unhandled_error[0] is not None: raise DatabaseException(*conn.proxy.unhandled_error) return conn + +def test(): + c = sqlite.connect(':memory:') + if load_c_extensions(c, True): + print 'Loaded C extension successfully' + diff --git a/src/calibre/library/sqlite_custom.c b/src/calibre/library/sqlite_custom.c new file mode 100644 index 0000000000..650c474c2c --- /dev/null +++ b/src/calibre/library/sqlite_custom.c @@ -0,0 +1,173 @@ +#define UNICODE +#include + + +#include + +#include +SQLITE_EXTENSION_INIT1 + +#ifdef _MSC_VER +#define MYEXPORT __declspec(dllexport) +#else +#define MYEXPORT +#endif + +// sortconcat {{{ + +typedef struct { + unsigned char *val; + int index; + int length; +} SortConcatItem; + +typedef struct { + SortConcatItem **vals; + int count; + int length; +} SortConcatList; + +static void sort_concat_step(sqlite3_context *context, int argc, sqlite3_value **argv) { + const unsigned char *val; + int idx, sz; + SortConcatList *list; + + assert(argc == 2); + + list = (SortConcatList*) sqlite3_aggregate_context(context, sizeof(*list)); + if (list == NULL) return; + + if (list->vals == NULL) { + list->vals = (SortConcatItem**)calloc(100, sizeof(SortConcatItem*)); + if (list->vals == NULL) return; + list->length = 100; + list->count = 0; + } + + if (list->count == list->length) { + list->vals = (SortConcatItem**)realloc(list->vals, list->length + 100); + if (list->vals == NULL) return; + list->length = list->length + 100; + } + + list->vals[list->count] = (SortConcatItem*)calloc(1, sizeof(SortConcatItem)); + if (list->vals[list->count] == NULL) return; + + idx = sqlite3_value_int(argv[0]); + val = sqlite3_value_text(argv[1]); + sz = sqlite3_value_bytes(argv[1]); + if (idx == 0 || val == NULL || sz == 0) {free(list->vals[list->count]); return;} + + + + list->vals[list->count]->val = (unsigned char*)calloc(sz, sizeof(unsigned char)); + if (list->vals[list->count]->val == NULL) + {free(list->vals[list->count]); return;} + list->vals[list->count]->index = idx; + list->vals[list->count]->length = sz; + memcpy(list->vals[list->count]->val, val, sz); + list->count = list->count + 1; + +} + +static void sort_concat_free(SortConcatList *list) { + int i; + if (list == NULL) return; + for (i = 0; i < list->count; i++) { + free(list->vals[i]->val); + free(list->vals[i]); + } +} + +static int sort_concat_cmp(const void *a_, const void *b_) { + return (*((SortConcatItem**)a_))->index - (*((SortConcatItem**)b_))->index; +} + +static unsigned char* sort_concat_do_finalize(SortConcatList *list, const unsigned char join) { + unsigned char *ans, *pos; + int sz = 0, i; + + for (i = 0; i < list->count; i++) { + sz += list->vals[i]->length; + } + sz += list->count; + + ans = (unsigned char *) calloc(sz, sizeof(unsigned char)); + if (ans == NULL) return ans; + + pos = ans; + for (i = 0; i < list->count; i++) { + if (list->vals[i]->length > 0) { + memcpy(pos, list->vals[i]->val, list->vals[i]->length); + pos += list->vals[i]->length; + if (i < list->count -1) { *pos = join; pos += 1; } + } + } + + return ans; + +} + +static void sort_concat_finalize(sqlite3_context *context) { + SortConcatList *list; + unsigned char *ans; + + list = (SortConcatList*) sqlite3_aggregate_context(context, sizeof(*list)); + + if (list != NULL && list->vals != NULL && list->count > 0) { + qsort(list->vals, list->count, sizeof(list->vals[0]), sort_concat_cmp); + ans = sort_concat_do_finalize(list, ','); + if (ans != NULL) sqlite3_result_text(context, (char*)ans, -1, SQLITE_TRANSIENT); + free(ans); + sort_concat_free(list); + } + +} + +static void sort_concat_finalize2(sqlite3_context *context) { + SortConcatList *list; + unsigned char *ans; + + list = (SortConcatList*) sqlite3_aggregate_context(context, sizeof(*list)); + + if (list != NULL && list->vals != NULL && list->count > 0) { + qsort(list->vals, list->count, sizeof(list->vals[0]), sort_concat_cmp); + ans = sort_concat_do_finalize(list, '|'); + if (ans != NULL) sqlite3_result_text(context, (char*)ans, -1, SQLITE_TRANSIENT); + free(ans); + sort_concat_free(list); + } + +} + +// }}} + +MYEXPORT int sqlite3_extension_init( + sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi){ + SQLITE_EXTENSION_INIT2(pApi); + sqlite3_create_function(db, "sortconcat", 2, SQLITE_UTF8, NULL, NULL, sort_concat_step, sort_concat_finalize); + sqlite3_create_function(db, "sort_concat", 2, SQLITE_UTF8, NULL, NULL, sort_concat_step, sort_concat_finalize2); + return 0; +} + +static PyObject * +sqlite_custom_init_funcs(PyObject *self, PyObject *args) { + Py_RETURN_NONE; +} + +static PyMethodDef sqlite_custom_methods[] = { + {"init_funcs", sqlite_custom_init_funcs, METH_VARARGS, + "init_funcs()\n\nInitialize module." + }, + + {NULL, NULL, 0, NULL} +}; + +PyMODINIT_FUNC +initsqlite_custom(void) { + PyObject *m; + m = Py_InitModule3("sqlite_custom", sqlite_custom_methods, + "Implementation of custom sqlite methods in C for speed." + ); + if (m == NULL) return; +}