mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edit Book: Fix replacement of hyphenated words in the spell checker not working
This commit is contained in:
parent
f01c2e96fd
commit
446e7a9b0b
@ -603,7 +603,7 @@ icu_BreakIterator_index(icu_BreakIterator *self, PyObject *args, PyObject *kwarg
|
||||
#endif
|
||||
|
||||
UChar *buf = NULL;
|
||||
int32_t prev = 0, p = 0, sz = 0, tsz = 0, ans = -1;
|
||||
int32_t prev = 0, p = 0, sz = 0, ans = -1;
|
||||
PyObject *token = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O", &token)) return NULL;
|
||||
@ -617,21 +617,26 @@ icu_BreakIterator_index(icu_BreakIterator *self, PyObject *args, PyObject *kwarg
|
||||
prev = p; p = ubrk_next(self->break_iterator);
|
||||
if (self->type == UBRK_WORD && ubrk_getRuleStatus(self->break_iterator) == UBRK_WORD_NONE)
|
||||
continue; // We are not at the start of a word
|
||||
tsz = (p == UBRK_DONE) ? self->text_len - prev : p - prev;
|
||||
if (sz == tsz && memcmp(self->text + prev, buf, sz * sizeof(UChar)) == 0) {
|
||||
#ifdef PY_UNICODE_WIDE
|
||||
ans = u_countChar32(self->text, prev);
|
||||
#else
|
||||
ans = prev;
|
||||
if (self->text_len >= prev + sz && memcmp(self->text + prev, buf, sz * sizeof(UChar)) == 0) {
|
||||
// Needle is present at text[prev:] we have to check if it is followed by a non-hyphen boundary
|
||||
if(
|
||||
ubrk_isBoundary(self->break_iterator, prev + sz) &&
|
||||
(self->text_len == prev + sz || (self->text[prev + sz] != 0x2d && self->text[prev + sz] != 0x2010))
|
||||
) {
|
||||
ans = prev; break; // Found word surrounded by non-hyphen boundaries
|
||||
}
|
||||
if (p != UBRK_DONE) ubrk_isBoundary(self->break_iterator, p); // Reset the iterator to its position before the call to ubrk_isBoundary
|
||||
}
|
||||
}
|
||||
#ifdef Py_UNICODE_WIDE
|
||||
if (ans > 0) ans = u_countChar32(self->text, ans);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
Py_END_ALLOW_THREADS;
|
||||
|
||||
|
||||
end:
|
||||
free(buf);
|
||||
return Py_BuildValue("i", ans);
|
||||
return Py_BuildValue("l", (long int)ans);
|
||||
|
||||
} // }}}
|
||||
|
||||
|
@ -156,13 +156,24 @@ class TestICU(unittest.TestCase):
|
||||
self.ae(split(u'I I\'m'), ['I', "I'm"])
|
||||
self.ae(split(u'out-of-the-box'), ['out-of-the-box'])
|
||||
self.ae(split(u'-one two-'), ['one', 'two'])
|
||||
self.ae(split_into_words_and_positions('one \U0001f431 three'), [(0, 3), (6 if sys.maxunicode >= 0x10ffff else 7, 5)])
|
||||
self.ae(0, index_of('i', 'i'))
|
||||
self.ae(4, index_of('i', 'six i'))
|
||||
self.ae(-1, index_of('i', ''))
|
||||
self.ae(-1, index_of('', ''))
|
||||
self.ae(-1, index_of('', 'i'))
|
||||
self.ae(-1, index_of('i', 'six clicks'))
|
||||
self.ae(split_into_words_and_positions('one \U0001f431 three'), [(0, 3), (7 if icu.is_narrow_build else 6, 5)])
|
||||
for needle, haystack, pos in (
|
||||
('word', 'a word b', 2),
|
||||
('word', 'a word', 2),
|
||||
('one-two', 'a one-two punch', 2),
|
||||
('one-two', 'one-two punch', 0),
|
||||
('one-two', 'one-two', 0),
|
||||
('one', 'one-two one', 8),
|
||||
('one-two', 'one-two-three one-two', 14),
|
||||
('one', 'onet one', 5),
|
||||
('i', 'i', 0),
|
||||
('i', 'six i', 4),
|
||||
('i', '', -1), ('', '', -1), ('', 'i', -1),
|
||||
('i', 'six clicks', -1),
|
||||
('i', '\U0001f431 i', (3 if icu.is_narrow_build else 2)),
|
||||
):
|
||||
fpos = index_of(needle, haystack)
|
||||
self.ae(pos, fpos, 'Failed to find index of %r in %r (%d != %d)' % (needle, haystack, pos, fpos))
|
||||
|
||||
class TestRunner(unittest.main):
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user