Edit Book: Spell check: When suggesting alternative for a hyphenated word, ensure the first suggestion is the word formed by removing the hyphen, if that is a valid word

This commit is contained in:
Kovid Goyal 2014-07-12 18:46:52 +05:30
parent a4919c2a59
commit 97b192eb7b

View File

@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
import cPickle, os, glob, shutil import cPickle, os, glob, shutil, re
from collections import namedtuple from collections import namedtuple
from operator import attrgetter from operator import attrgetter
from itertools import chain from itertools import chain
@ -181,6 +181,7 @@ def load_dictionary(dictionary):
class Dictionaries(object): class Dictionaries(object):
def __init__(self): def __init__(self):
self.remove_hyphenation = re.compile('[\u2010-]+')
self.dictionaries = {} self.dictionaries = {}
self.word_cache = {} self.word_cache = {}
self.ignored_words = set() self.ignored_words = set()
@ -356,14 +357,21 @@ class Dictionaries(object):
def suggestions(self, word, locale=None): def suggestions(self, word, locale=None):
locale = locale or self.default_locale locale = locale or self.default_locale
d = self.dictionary_for_locale(locale) d = self.dictionary_for_locale(locale)
ans = ()
if d is not None: if d is not None:
try: try:
return d.obj.suggest(unicode(word)) ans = d.obj.suggest(unicode(word))
except ValueError: except ValueError:
pass pass
return () else:
dehyphenated_word = self.remove_hyphenation.sub('', word)
if len(dehyphenated_word) != len(word) and self.recognized(dehyphenated_word, locale):
# Ensure the de-hyphenated word is present and is the first suggestion
ans = (dehyphenated_word,) + tuple(x for x in ans if x != dehyphenated_word)
return ans
if __name__ == '__main__': if __name__ == '__main__':
dictionaries = Dictionaries() dictionaries = Dictionaries()
dictionaries.initialize() dictionaries.initialize()
print (dictionaries.recognized('recognized', parse_lang_code('en'))) print (dictionaries.recognized('recognized', parse_lang_code('en')))
print (dictionaries.suggestions('ade-quately', parse_lang_code('en')))