Edit Book: Fix replacement of hyphenated words in the spell checker not working

This commit is contained in:
Kovid Goyal 2014-07-12 11:01:37 +05:30
parent f01c2e96fd
commit 446e7a9b0b
2 changed files with 33 additions and 17 deletions

View File

@ -603,7 +603,7 @@ icu_BreakIterator_index(icu_BreakIterator *self, PyObject *args, PyObject *kwarg
#endif #endif
UChar *buf = NULL; UChar *buf = NULL;
int32_t prev = 0, p = 0, sz = 0, tsz = 0, ans = -1; int32_t prev = 0, p = 0, sz = 0, ans = -1;
PyObject *token = NULL; PyObject *token = NULL;
if (!PyArg_ParseTuple(args, "O", &token)) return NULL; if (!PyArg_ParseTuple(args, "O", &token)) return NULL;
@ -617,21 +617,26 @@ icu_BreakIterator_index(icu_BreakIterator *self, PyObject *args, PyObject *kwarg
prev = p; p = ubrk_next(self->break_iterator); prev = p; p = ubrk_next(self->break_iterator);
if (self->type == UBRK_WORD && ubrk_getRuleStatus(self->break_iterator) == UBRK_WORD_NONE) if (self->type == UBRK_WORD && ubrk_getRuleStatus(self->break_iterator) == UBRK_WORD_NONE)
continue; // We are not at the start of a word continue; // We are not at the start of a word
tsz = (p == UBRK_DONE) ? self->text_len - prev : p - prev; if (self->text_len >= prev + sz && memcmp(self->text + prev, buf, sz * sizeof(UChar)) == 0) {
if (sz == tsz && memcmp(self->text + prev, buf, sz * sizeof(UChar)) == 0) { // Needle is present at text[prev:] we have to check if it is followed by a non-hyphen boundary
#ifdef PY_UNICODE_WIDE if(
ans = u_countChar32(self->text, prev); ubrk_isBoundary(self->break_iterator, prev + sz) &&
#else (self->text_len == prev + sz || (self->text[prev + sz] != 0x2d && self->text[prev + sz] != 0x2010))
ans = prev; ) {
#endif ans = prev; break; // Found word surrounded by non-hyphen boundaries
break; }
if (p != UBRK_DONE) ubrk_isBoundary(self->break_iterator, p); // Reset the iterator to its position before the call to ubrk_isBoundary
} }
} }
#ifdef Py_UNICODE_WIDE
if (ans > 0) ans = u_countChar32(self->text, ans);
#endif
Py_END_ALLOW_THREADS; Py_END_ALLOW_THREADS;
end: end:
free(buf); free(buf);
return Py_BuildValue("i", ans); return Py_BuildValue("l", (long int)ans);
} // }}} } // }}}

View File

@ -156,13 +156,24 @@ class TestICU(unittest.TestCase):
self.ae(split(u'I I\'m'), ['I', "I'm"]) self.ae(split(u'I I\'m'), ['I', "I'm"])
self.ae(split(u'out-of-the-box'), ['out-of-the-box']) self.ae(split(u'out-of-the-box'), ['out-of-the-box'])
self.ae(split(u'-one two-'), ['one', 'two']) self.ae(split(u'-one two-'), ['one', 'two'])
self.ae(split_into_words_and_positions('one \U0001f431 three'), [(0, 3), (6 if sys.maxunicode >= 0x10ffff else 7, 5)]) self.ae(split_into_words_and_positions('one \U0001f431 three'), [(0, 3), (7 if icu.is_narrow_build else 6, 5)])
self.ae(0, index_of('i', 'i')) for needle, haystack, pos in (
self.ae(4, index_of('i', 'six i')) ('word', 'a word b', 2),
self.ae(-1, index_of('i', '')) ('word', 'a word', 2),
self.ae(-1, index_of('', '')) ('one-two', 'a one-two punch', 2),
self.ae(-1, index_of('', 'i')) ('one-two', 'one-two punch', 0),
self.ae(-1, index_of('i', 'six clicks')) ('one-two', 'one-two', 0),
('one', 'one-two one', 8),
('one-two', 'one-two-three one-two', 14),
('one', 'onet one', 5),
('i', 'i', 0),
('i', 'six i', 4),
('i', '', -1), ('', '', -1), ('', 'i', -1),
('i', 'six clicks', -1),
('i', '\U0001f431 i', (3 if icu.is_narrow_build else 2)),
):
fpos = index_of(needle, haystack)
self.ae(pos, fpos, 'Failed to find index of %r in %r (%d != %d)' % (needle, haystack, pos, fpos))
class TestRunner(unittest.main): class TestRunner(unittest.main):