mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Edit Book: Fix spell check treating the unicode hyphen (U+2010) differently from the normal hyphen (U+002d). Fixes #1656319 [Problems with hyphen in spell checking](https://bugs.launchpad.net/calibre/+bug/1656319)
This commit is contained in:
parent
6011d45ef3
commit
97a1a92e1b
@ -44,6 +44,7 @@ class UserDictionary(object):
|
|||||||
return {'name':self.name, 'is_active': self.is_active, 'words':[
|
return {'name':self.name, 'is_active': self.is_active, 'words':[
|
||||||
(w, l) for w, l in self.words]}
|
(w, l) for w, l in self.words]}
|
||||||
|
|
||||||
|
|
||||||
_builtins = _custom = None
|
_builtins = _custom = None
|
||||||
|
|
||||||
|
|
||||||
@ -82,6 +83,7 @@ def custom_dictionaries(reread=False):
|
|||||||
_custom = frozenset(dics)
|
_custom = frozenset(dics)
|
||||||
return _custom
|
return _custom
|
||||||
|
|
||||||
|
|
||||||
default_en_locale = 'en-US'
|
default_en_locale = 'en-US'
|
||||||
try:
|
try:
|
||||||
ul = parse_lang_code(get_system_locale() or 'en-US')
|
ul = parse_lang_code(get_system_locale() or 'en-US')
|
||||||
@ -368,7 +370,7 @@ class Dictionaries(object):
|
|||||||
d = self.dictionary_for_locale(locale)
|
d = self.dictionary_for_locale(locale)
|
||||||
if d is not None:
|
if d is not None:
|
||||||
try:
|
try:
|
||||||
ans = d.obj.recognized(word)
|
ans = d.obj.recognized(word.replace('\u2010', '-'))
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
@ -381,6 +383,7 @@ class Dictionaries(object):
|
|||||||
def suggestions(self, word, locale=None):
|
def suggestions(self, word, locale=None):
|
||||||
locale = locale or self.default_locale
|
locale = locale or self.default_locale
|
||||||
d = self.dictionary_for_locale(locale)
|
d = self.dictionary_for_locale(locale)
|
||||||
|
has_unicode_hyphen = '\u2010' in word
|
||||||
ans = ()
|
ans = ()
|
||||||
|
|
||||||
def add_suggestion(w, ans):
|
def add_suggestion(w, ans):
|
||||||
@ -407,6 +410,8 @@ class Dictionaries(object):
|
|||||||
fw = w1 + m.group() + ' ' + capitalize(w2)
|
fw = w1 + m.group() + ' ' + capitalize(w2)
|
||||||
ans = add_suggestion(fw, ans)
|
ans = add_suggestion(fw, ans)
|
||||||
|
|
||||||
|
if has_unicode_hyphen:
|
||||||
|
ans = tuple(w.replace('-', '\u2010') for w in ans)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
@ -418,6 +423,12 @@ def test_dictionaries():
|
|||||||
sg = partial(dictionaries.suggestions, locale=eng)
|
sg = partial(dictionaries.suggestions, locale=eng)
|
||||||
if not rec('recognized'):
|
if not rec('recognized'):
|
||||||
raise ValueError('recognized not recognized')
|
raise ValueError('recognized not recognized')
|
||||||
|
if not rec('one-half'):
|
||||||
|
raise ValueError('one-half not recognized')
|
||||||
|
if not rec('one\u2010half'):
|
||||||
|
raise ValueError('one\u2010half not recognized with unicode hyphen (U+2010)')
|
||||||
|
if 'one\u2010half' not in sg('oone\u2010half'):
|
||||||
|
raise ValueError('Unicode hyphen not preserved in suggestions')
|
||||||
if 'adequately' not in sg('ade-quately'):
|
if 'adequately' not in sg('ade-quately'):
|
||||||
raise ValueError('adequately not in %s' % sg('ade-quately'))
|
raise ValueError('adequately not in %s' % sg('ade-quately'))
|
||||||
if 'magic. Wand' not in sg('magic.wand'):
|
if 'magic. Wand' not in sg('magic.wand'):
|
||||||
@ -425,5 +436,6 @@ def test_dictionaries():
|
|||||||
d = load_dictionary(get_dictionary(parse_lang_code('es'))).obj
|
d = load_dictionary(get_dictionary(parse_lang_code('es'))).obj
|
||||||
assert d.recognized('Achí')
|
assert d.recognized('Achí')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
test_dictionaries()
|
test_dictionaries()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user