mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add currency and other symbols to allowed token characters
This commit is contained in:
parent
2cf31be2ba
commit
6ef1ec1656
@ -104,7 +104,6 @@ struct char_cmp {
|
|||||||
|
|
||||||
typedef std::unique_ptr<icu::BreakIterator> BreakIterator;
|
typedef std::unique_ptr<icu::BreakIterator> BreakIterator;
|
||||||
|
|
||||||
|
|
||||||
class Tokenizer {
|
class Tokenizer {
|
||||||
private:
|
private:
|
||||||
bool remove_diacritics;
|
bool remove_diacritics;
|
||||||
@ -125,11 +124,14 @@ private:
|
|||||||
case U_DECIMAL_DIGIT_NUMBER:
|
case U_DECIMAL_DIGIT_NUMBER:
|
||||||
case U_LETTER_NUMBER:
|
case U_LETTER_NUMBER:
|
||||||
case U_OTHER_NUMBER:
|
case U_OTHER_NUMBER:
|
||||||
|
case U_CURRENCY_SYMBOL:
|
||||||
|
case U_OTHER_SYMBOL:
|
||||||
case U_PRIVATE_USE_CHAR:
|
case U_PRIVATE_USE_CHAR:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return false;
|
break;;
|
||||||
}
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int send_token(const icu::UnicodeString &token, int32_t start_offset, int32_t end_offset, int flags = 0) {
|
int send_token(const icu::UnicodeString &token, int32_t start_offset, int32_t end_offset, int flags = 0) {
|
||||||
|
@ -65,6 +65,22 @@ class FTSTest(BaseTest):
|
|||||||
tokenize("Some wörds"),
|
tokenize("Some wörds"),
|
||||||
[t('some', 0, 4), t('wörds', 5, 11), t('words', 5, 11, 1)]
|
[t('some', 0, 4), t('wörds', 5, 11), t('words', 5, 11, 1)]
|
||||||
)
|
)
|
||||||
|
self.ae(
|
||||||
|
tokenize("don't 'bug'"),
|
||||||
|
[t("don't", 0, 5), t('bug', 7, 10)]
|
||||||
|
)
|
||||||
|
self.ae(
|
||||||
|
tokenize("a,b. c"),
|
||||||
|
[t("a", 0, 1), t('b', 2, 3), t('c', 5, 6)]
|
||||||
|
)
|
||||||
|
self.ae(
|
||||||
|
tokenize("a*b+c"),
|
||||||
|
[t("a", 0, 1), t('b', 2, 3), t('c', 4, 5)]
|
||||||
|
)
|
||||||
|
self.ae(
|
||||||
|
tokenize("a😀smile"),
|
||||||
|
[t("a", 0, 1), t('😀', 1, 5), t('smile', 5, 10)]
|
||||||
|
)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def test_fts_basic(self): # {{{
|
def test_fts_basic(self): # {{{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user