From 97b192eb7b95c3e64025be864f8666d0130881b8 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 12 Jul 2014 18:46:52 +0530 Subject: [PATCH] Edit Book: Spell check: When suggesting alternative for a hyphenated word, ensure the first suggestion is the word formed by removing the hyphen, if that is a valid word --- src/calibre/spell/dictionary.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/calibre/spell/dictionary.py b/src/calibre/spell/dictionary.py index 8906caed6c..c939f6ac65 100644 --- a/src/calibre/spell/dictionary.py +++ b/src/calibre/spell/dictionary.py @@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2014, Kovid Goyal ' -import cPickle, os, glob, shutil +import cPickle, os, glob, shutil, re from collections import namedtuple from operator import attrgetter from itertools import chain @@ -181,6 +181,7 @@ def load_dictionary(dictionary): class Dictionaries(object): def __init__(self): + self.remove_hyphenation = re.compile('[\u2010-]+') self.dictionaries = {} self.word_cache = {} self.ignored_words = set() @@ -356,14 +357,21 @@ class Dictionaries(object): def suggestions(self, word, locale=None): locale = locale or self.default_locale d = self.dictionary_for_locale(locale) + ans = () if d is not None: try: - return d.obj.suggest(unicode(word)) + ans = d.obj.suggest(unicode(word)) except ValueError: pass - return () + else: + dehyphenated_word = self.remove_hyphenation.sub('', word) + if len(dehyphenated_word) != len(word) and self.recognized(dehyphenated_word, locale): + # Ensure the de-hyphenated word is present and is the first suggestion + ans = (dehyphenated_word,) + tuple(x for x in ans if x != dehyphenated_word) + return ans if __name__ == '__main__': dictionaries = Dictionaries() dictionaries.initialize() print (dictionaries.recognized('recognized', parse_lang_code('en'))) + print (dictionaries.suggestions('ade-quately', parse_lang_code('en')))