diff --git a/src/calibre/db/sqlite_extension.cpp b/src/calibre/db/sqlite_extension.cpp index 59d1c20264..b29ba04927 100644 --- a/src/calibre/db/sqlite_extension.cpp +++ b/src/calibre/db/sqlite_extension.cpp @@ -104,7 +104,6 @@ struct char_cmp { typedef std::unique_ptr BreakIterator; - class Tokenizer { private: bool remove_diacritics; @@ -125,11 +124,14 @@ private: case U_DECIMAL_DIGIT_NUMBER: case U_LETTER_NUMBER: case U_OTHER_NUMBER: + case U_CURRENCY_SYMBOL: + case U_OTHER_SYMBOL: case U_PRIVATE_USE_CHAR: return true; default: - return false; + break;; } + return false; } int send_token(const icu::UnicodeString &token, int32_t start_offset, int32_t end_offset, int flags = 0) { diff --git a/src/calibre/db/tests/fts.py b/src/calibre/db/tests/fts.py index 3273634209..c3ff7f2ccf 100644 --- a/src/calibre/db/tests/fts.py +++ b/src/calibre/db/tests/fts.py @@ -65,6 +65,22 @@ class FTSTest(BaseTest): tokenize("Some wörds"), [t('some', 0, 4), t('wörds', 5, 11), t('words', 5, 11, 1)] ) + self.ae( + tokenize("don't 'bug'"), + [t("don't", 0, 5), t('bug', 7, 10)] + ) + self.ae( + tokenize("a,b. c"), + [t("a", 0, 1), t('b', 2, 3), t('c', 5, 6)] + ) + self.ae( + tokenize("a*b+c"), + [t("a", 0, 1), t('b', 2, 3), t('c', 4, 5)] + ) + self.ae( + tokenize("a😀smile"), + [t("a", 0, 1), t('😀', 1, 5), t('smile', 5, 10)] + ) # }}} def test_fts_basic(self): # {{{