Case folding should happen after diacritics removal not before

This commit is contained in:
Kovid Goyal 2021-06-28 20:03:29 +05:30
parent 558b477328
commit 5b10767860
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -268,11 +268,12 @@ private:
} }
if (is_token) { if (is_token) {
icu::UnicodeString token(str, token_start_pos, token_end_pos - token_start_pos); icu::UnicodeString token(str, token_start_pos, token_end_pos - token_start_pos);
token.foldCase(U_FOLD_CASE_DEFAULT); token.foldCase();
if ((rc = send_token(token, token_start_pos, token_end_pos, stemmer)) != SQLITE_OK) return rc; if ((rc = send_token(token, token_start_pos, token_end_pos, stemmer)) != SQLITE_OK) return rc;
if (!for_query && remove_diacritics) { if (!for_query && remove_diacritics) {
icu::UnicodeString tt(token); icu::UnicodeString tt(str, token_start_pos, token_end_pos - token_start_pos);
diacritics_remover->transliterate(tt); diacritics_remover->transliterate(tt);
tt.foldCase();
if (tt != token) { if (tt != token) {
if ((rc = send_token(tt, token_start_pos, token_end_pos, stemmer, FTS5_TOKEN_COLOCATED)) != SQLITE_OK) return rc; if ((rc = send_token(tt, token_start_pos, token_end_pos, stemmer, FTS5_TOKEN_COLOCATED)) != SQLITE_OK) return rc;
} }