Fix script block loop

use the correct language based iterator and also update the start of the
block correctly
This commit is contained in:
Kovid Goyal 2021-06-19 14:27:54 +05:30
parent fafacae005
commit a547ffd26e
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 10 additions and 12 deletions

View File

@ -276,23 +276,20 @@ public:
IteratorDescription state;
state.language = ""; state.script = USCRIPT_COMMON;
int32_t start_script_block_at = offset;
BreakIterator &word_iterator = ensure_lang_iterator(state.language);
while (offset < str.length()) {
while (offset < str.length()) {
UChar32 ch = str.char32At(offset);
if (at_script_boundary(state, ch)) {
if (offset > start_script_block_at) {
if ((rc = tokenize_script_block(
str, start_script_block_at, offset,
for_query, callback, callback_ctx, word_iterator)) != SQLITE_OK) return rc;
}
break;
UChar32 ch = str.char32At(offset);
if (at_script_boundary(state, ch)) {
if (offset > start_script_block_at) {
if ((rc = tokenize_script_block(
str, start_script_block_at, offset,
for_query, callback, callback_ctx, ensure_lang_iterator(state.language))) != SQLITE_OK) return rc;
}
offset = str.moveIndex32(offset, 1);
start_script_block_at = offset;
}
offset = str.moveIndex32(offset, 1);
}
if (offset > start_script_block_at) {
rc = tokenize_script_block(str, start_script_block_at, offset, for_query, callback, callback_ctx, word_iterator);
rc = tokenize_script_block(str, start_script_block_at, offset, for_query, callback, callback_ctx, ensure_lang_iterator(state.language));
}
return rc;
}

View File

@ -126,6 +126,7 @@ class FTSTest(BaseTest):
self.ae(conn.search("mess"), [("你don't叫>mess<",)])
self.ae(conn.search('''"don't"'''), [("你>don't<叫mess",)])
self.ae(conn.search(""), [(">你<don't叫mess",)])
self.ae(conn.search(""), [("你don't>叫<mess",)])
# }}}
def test_fts_query_syntax(self): # {{{