mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Basic test to tokenizer
This commit is contained in:
parent
c819fcb870
commit
79ea88ddb8
@ -3,25 +3,49 @@
|
|||||||
# License: GPLv3 Copyright: 2021, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPLv3 Copyright: 2021, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
|
||||||
from calibre.db.tests.base import BaseTest
|
import builtins
|
||||||
|
import sys
|
||||||
from apsw import Connection
|
from apsw import Connection
|
||||||
|
|
||||||
from calibre.constants import plugins
|
from calibre.constants import plugins
|
||||||
|
from calibre.db.tests.base import BaseTest
|
||||||
|
|
||||||
|
|
||||||
|
def print(*args, **kwargs):
|
||||||
|
kwargs['file'] = sys.__stdout__
|
||||||
|
builtins.print(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
class TestConn(Connection):
|
class TestConn(Connection):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, remove_diacritics=True):
|
||||||
super().__init__(':memory:')
|
super().__init__(':memory:')
|
||||||
plugins.load_apsw_extension(self, 'sqlite_extension')
|
plugins.load_apsw_extension(self, 'sqlite_extension')
|
||||||
self.cursor().execute("CREATE VIRTUAL TABLE fts_table USING fts5(t, tokenize = 'unicode61 remove_diacritics 2')")
|
options = []
|
||||||
|
if remove_diacritics:
|
||||||
|
options.append('remove_diacritics'), options.append('2')
|
||||||
|
options = ' '.join(options)
|
||||||
|
self.execute(f'''
|
||||||
|
CREATE VIRTUAL TABLE fts_table USING fts5(t, tokenize = 'unicode61 {options}');
|
||||||
|
CREATE VIRTUAL TABLE fts_row USING fts5vocab(fts_table, row);
|
||||||
|
''')
|
||||||
|
|
||||||
|
def execute(self, *a):
|
||||||
|
return self.cursor().execute(*a)
|
||||||
|
|
||||||
def insert_text(self, text):
|
def insert_text(self, text):
|
||||||
self.cursor().execute('INSERT INTO fts_table(t) VALUES (?)', (text,))
|
self.execute('INSERT INTO fts_table(t) VALUES (?)', (text,))
|
||||||
|
|
||||||
|
def term_row_counts(self):
|
||||||
|
return dict(self.execute('SELECT term,doc FROM fts_row'))
|
||||||
|
|
||||||
|
|
||||||
class FTSTest(BaseTest):
|
class FTSTest(BaseTest):
|
||||||
|
ae = BaseTest.assertEqual
|
||||||
|
|
||||||
def test_basic_fts(self): # {{{
|
def test_basic_fts(self): # {{{
|
||||||
conn = TestConn()
|
conn = TestConn()
|
||||||
conn.insert_text('two words, and a period. With another.')
|
conn.insert_text('two words, and a period. With another.')
|
||||||
|
conn.insert_text('and another')
|
||||||
|
self.ae(conn.term_row_counts(), {'a': 1, 'and': 2, 'another': 2, 'period': 1, 'two': 1, 'with': 1, 'words': 1})
|
||||||
# }}}
|
# }}}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user