More fts5 tests

This commit is contained in:
Kovid Goyal 2023-11-03 13:56:41 +05:30
parent 63a2fa9474
commit 31611e434a
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -46,7 +46,7 @@ CREATE VIRTUAL TABLE fts_row USING fts5vocab(fts_table, row);
def search(self, query, highlight_start='>', highlight_end='<', snippet_size=4): def search(self, query, highlight_start='>', highlight_end='<', snippet_size=4):
snippet_size=max(1, min(snippet_size, 64)) snippet_size=max(1, min(snippet_size, 64))
stmt = ( stmt = (
f'SELECT snippet(fts_table, 0, "{highlight_start}", "{highlight_end}", "", {snippet_size})' f"SELECT snippet(fts_table, 0, '{highlight_start}', '{highlight_end}', '', {snippet_size})"
' FROM fts_table WHERE fts_table MATCH ? ORDER BY RANK' ' FROM fts_table WHERE fts_table MATCH ? ORDER BY RANK'
) )
return list(self.execute(stmt, (unicode_normalize(query),))) return list(self.execute(stmt, (unicode_normalize(query),)))
@ -71,13 +71,14 @@ class FTSTest(BaseTest):
set_ui_language('en') set_ui_language('en')
def test_fts_tokenize(self): # {{{ def test_fts_tokenize(self): # {{{
from calibre_extensions.sqlite_extension import set_ui_language from calibre_extensions.sqlite_extension import set_ui_language, FTS5_TOKENIZE_QUERY, FTS5_TOKENIZE_DOCUMENT
def t(x, s, e, f=0): def t(x, s, e, f=0):
return {'text': x, 'start': s, 'end': e, 'flags': f} return {'text': x, 'start': s, 'end': e, 'flags': f}
def tt(text, *expected_tokens): def tt(text, *expected_tokens, for_query=False):
q = tuple(x['text'] for x in tokenize(text)) flags = FTS5_TOKENIZE_QUERY if for_query else FTS5_TOKENIZE_DOCUMENT
q = tuple(x['text'] for x in tokenize(text, flags=flags))
self.ae(q, expected_tokens) self.ae(q, expected_tokens)
self.ae( self.ae(
@ -105,6 +106,8 @@ class FTSTest(BaseTest):
[t("a", 0, 1), t('😀', 1, 5), t('smile', 5, 10)] [t("a", 0, 1), t('😀', 1, 5), t('smile', 5, 10)]
) )
tt("你don't叫mess", '', "don't", '', 'mess')
tt("你don't叫mess", '', "don't", '', 'mess', for_query=True)
tt('你叫什么名字', '', '', '什么', '名字') tt('你叫什么名字', '', '', '什么', '名字')
tt('你叫abc', '', '', 'abc') tt('你叫abc', '', '', 'abc')
tt('a你b叫什么名字', 'a', '', 'b', '', '什么', '名字') tt('a你b叫什么名字', 'a', '', 'b', '', '什么', '名字')
@ -135,6 +138,7 @@ class FTSTest(BaseTest):
conn = TestConn() conn = TestConn()
conn.insert_text("你don't叫mess") conn.insert_text("你don't叫mess")
self.ae(conn.term_row_counts(), {"don't": 1, 'mess': 1, '': 1, '': 1})
self.ae(conn.search("mess"), [("你don't叫>mess<",)]) self.ae(conn.search("mess"), [("你don't叫>mess<",)])
self.ae(conn.search('''"don't"'''), [("你>don't<叫mess",)]) self.ae(conn.search('''"don't"'''), [("你>don't<叫mess",)])
self.ae(conn.search(""), [(">你<don't叫mess",)]) self.ae(conn.search(""), [(">你<don't叫mess",)])