Start work on ICU tokenizer for FTS

This commit is contained in:
Kovid Goyal 2021-06-14 08:23:10 +05:30
parent 4df402cacf
commit e4b13d4ccb
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
4 changed files with 107 additions and 0 deletions

View File

@ -71,6 +71,11 @@
"sources": "calibre/library/sqlite_custom.c",
"inc_dirs": "!sqlite_inc_dirs"
},
{
"name": "sqlite_extension",
"sources": "calibre/db/sqlite_extension.cpp",
"inc_dirs": "!sqlite_inc_dirs"
},
{
"name": "lzx",
"sources": "calibre/utils/lzx/lzxmodule.c calibre/utils/lzx/compressor.c calibre/utils/lzx/lzxd.c calibre/utils/lzx/lzc.c calibre/utils/lzx/lzxc.c",

View File

@ -320,6 +320,15 @@ class Plugins(collections.Mapping):
except Exception as err:
return None, str(err)
def load_apsw_extension(self, conn, name):
conn.enableloadextension(True)
try:
ext = 'pyd' if iswindows else 'so'
path = os.path.join(plugins_loc, f'{name}.{ext}')
conn.loadextension(path, f'calibre_{name}_init')
finally:
conn.enableloadextension(False)
plugins = None
if plugins is None:

View File

@ -0,0 +1,76 @@
/*
* sqlite_extension.cpp
* Copyright (C) 2021 Kovid Goyal <kovid at kovidgoyal.net>
*
* Distributed under terms of the GPL3 license.
*/
#define UNICODE
#include <Python.h>
#include <stdlib.h>
#include <sqlite3ext.h>
SQLITE_EXTENSION_INIT1
static int
fts5_api_from_db(sqlite3 *db, fts5_api **ppApi) {
sqlite3_stmt *pStmt = 0;
*ppApi = 0;
int rc = sqlite3_prepare(db, "SELECT fts5(?1)", -1, &pStmt, 0);
if (rc == SQLITE_OK) {
sqlite3_bind_pointer(pStmt, 1, reinterpret_cast<void *>(ppApi), "fts5_api_ptr", 0);
(void)sqlite3_step(pStmt);
rc = sqlite3_finalize(pStmt);
}
return rc;
}
extern "C" {
#ifdef _MSC_VER
#define MYEXPORT __declspec(dllexport)
#else
#define MYEXPORT __attribute__ ((visibility ("default")))
#endif
MYEXPORT int
calibre_sqlite_extension_init(sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi){
SQLITE_EXTENSION_INIT2(pApi);
fts5_api *fts5api = NULL;
int rc = fts5_api_from_db(db, &fts5api);
if (rc != SQLITE_OK) {
*pzErrMsg = (char*)"Failed to get FTS 5 API with error code";
return rc;
}
if (!fts5api || fts5api->iVersion < 2) {
*pzErrMsg = (char*)"FTS 5 iVersion too old or NULL pointer";
return SQLITE_ERROR;
}
return SQLITE_OK;
}
}
static PyMethodDef methods[] = {
{NULL, NULL, 0, NULL}
};
static int
exec_module(PyObject *mod) { return 0; }
static PyModuleDef_Slot slots[] = { {Py_mod_exec, (void*)exec_module}, {0, NULL} };
static struct PyModuleDef module_def = {
.m_base = PyModuleDef_HEAD_INIT,
.m_name = "sqlite_extension",
.m_doc = "Implement ICU based tokenizer for FTS5",
.m_methods = methods,
.m_slots = slots,
};
extern "C" {
CALIBRE_MODINIT_FUNC PyInit_sqlite_extension(void) { return PyModuleDef_Init(&module_def); }
}

View File

@ -0,0 +1,17 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
# License: GPLv3 Copyright: 2021, Kovid Goyal <kovid at kovidgoyal.net>
from calibre.db.tests.base import BaseTest
class FTSTest(BaseTest):
def test_basic_fts(self): # {{{
from apsw import Connection
from calibre.constants import plugins
conn = Connection(':memory:')
plugins.load_apsw_extension(conn, 'sqlite_extension')
# }}}