diff --git a/src/calibre/utils/icu.c b/src/calibre/utils/icu.c index c6b6564ff3..ec9a778b58 100644 --- a/src/calibre/utils/icu.c +++ b/src/calibre/utils/icu.c @@ -618,7 +618,8 @@ icu_BreakIterator_index(icu_BreakIterator *self, PyObject *args, PyObject *kwarg if (self->type == UBRK_WORD && ubrk_getRuleStatus(self->break_iterator) == UBRK_WORD_NONE) continue; // We are not at the start of a word if (self->text_len >= prev + sz && memcmp(self->text + prev, buf, sz * sizeof(UChar)) == 0) { - // Needle is present at text[prev:] we have to check if it is followed by a non-hyphen boundary + // Needle is present at text[prev:] we have to check if it is not surrounded by hyphen boundaries + if (prev > 0 && (self->text[prev-1] == 0x2d || self->text[prev-1] == 0x2010)) continue; // At a hyphen boundary if( ubrk_isBoundary(self->break_iterator, prev + sz) && (self->text_len == prev + sz || (self->text[prev + sz] != 0x2d && self->text[prev + sz] != 0x2010)) diff --git a/src/calibre/utils/icu_test.py b/src/calibre/utils/icu_test.py index 4f7a474dba..5ddd0bc345 100644 --- a/src/calibre/utils/icu_test.py +++ b/src/calibre/utils/icu_test.py @@ -166,6 +166,7 @@ class TestICU(unittest.TestCase): ('one', 'one-two one', 8), ('one-two', 'one-two-three one-two', 14), ('one', 'onet one', 5), + ('two', 'one-two two', 8), ('i', 'i', 0), ('i', 'six i', 4), ('i', '', -1), ('', '', -1), ('', 'i', -1),