Spellcheck: When checking French text list all words that have the same stem as a single word. So j'aime and aime are listed as a single word.

This commit is contained in:
Kovid Goyal 2014-04-21 12:16:19 +05:30
parent 643d305548
commit 16a5c262eb
2 changed files with 26 additions and 9 deletions

View File

@ -19,7 +19,7 @@ _patterns = None
class Patterns(object): class Patterns(object):
__slots__ = ('sanitize_invisible_pat', 'split_pat', 'digit_pat') __slots__ = ('sanitize_invisible_pat', 'split_pat', 'digit_pat', 'fr_elision_pat')
def __init__(self): def __init__(self):
import regex import regex
@ -30,6 +30,10 @@ class Patterns(object):
r'\W+', flags=regex.VERSION1 | regex.WORD | regex.FULLCASE | regex.UNICODE) r'\W+', flags=regex.VERSION1 | regex.WORD | regex.FULLCASE | regex.UNICODE)
self.digit_pat = regex.compile( self.digit_pat = regex.compile(
r'^\d+$', flags=regex.VERSION1 | regex.WORD | regex.UNICODE) r'^\d+$', flags=regex.VERSION1 | regex.WORD | regex.UNICODE)
# French words with prefixes are reduced to the stem word, so that the
# words appear only once in the word list
self.fr_elision_pat = regex.compile(
u"^(?:l|d|m|t|s|j|c|ç|lorsqu|puisqu|quoiqu|qu)[']", flags=regex.UNICODE | regex.VERSION1 | regex.IGNORECASE)
def patterns(): def patterns():
global _patterns global _patterns
@ -39,16 +43,19 @@ def patterns():
class Location(object): class Location(object):
__slots__ = ('file_name', 'sourceline', 'original_word', 'location_node', 'node_item') __slots__ = ('file_name', 'sourceline', 'original_word', 'location_node', 'node_item', 'elided_prefix')
def __init__(self, file_name=None, sourceline=None, original_word=None, location_node=None, node_item=(None, None)): def __init__(self, file_name=None, elided_prefix='', original_word=None, location_node=None, node_item=(None, None)):
self.file_name, self.sourceline, self.original_word = file_name, sourceline, original_word self.file_name, self.elided_prefix, self.original_word = file_name, elided_prefix, original_word
self.location_node, self.node_item = location_node, node_item self.location_node, self.node_item, self.sourceline = location_node, node_item, location_node.sourceline
def __repr__(self): def __repr__(self):
return '%s:%s' % (self.file_name, self.sourceline) return '%s @ %s:%s' % (self.original_word, self.file_name, self.sourceline)
__str__ = __repr__ __str__ = __repr__
def replace(self, new_word):
self.original_word = self.elided_prefix + new_word
def filter_words(word): def filter_words(word):
if not word: if not word:
return False return False
@ -68,9 +75,16 @@ def add_words(text, node, words, file_name, locale, node_item):
candidates = get_words(text, locale.langcode) candidates = get_words(text, locale.langcode)
if candidates: if candidates:
p = patterns() p = patterns()
is_fr = locale.langcode == 'fra'
for word in candidates: for word in candidates:
sword = p.sanitize_invisible_pat.sub('', word) sword = p.sanitize_invisible_pat.sub('', word)
loc = Location(file_name, node.sourceline, word, node, node_item) elided_prefix = ''
if is_fr:
m = p.fr_elision_pat.match(sword)
if m is not None and len(sword) > len(elided_prefix):
elided_prefix = m.group()
sword = sword[len(elided_prefix):]
loc = Location(file_name, elided_prefix, word, node, node_item)
words[(sword, locale)].append(loc) words[(sword, locale)].append(loc)
def add_words_from_attr(node, attr, words, file_name, locale): def add_words_from_attr(node, attr, words, file_name, locale):
@ -184,7 +198,8 @@ def replace_word(container, new_word, locations, locale):
text = node.get(attr) text = node.get(attr)
else: else:
text = getattr(node, attr) text = getattr(node, attr)
text, replaced = replace(text, loc.original_word, new_word, locale.langcode) replacement = loc.elided_prefix + new_word
text, replaced = replace(text, loc.original_word, replacement, locale.langcode)
if replaced: if replaced:
if is_attr: if is_attr:
node.set(attr, text) node.set(attr, text)

View File

@ -702,7 +702,7 @@ class WordsModel(QAbstractTableModel):
def replace_word(self, w, new_word): def replace_word(self, w, new_word):
for location in self.words[w]: for location in self.words[w]:
location.original_word = new_word location.replace(new_word)
if w[0] == new_word: if w[0] == new_word:
return w return w
new_key = (new_word, w[1]) new_key = (new_word, w[1])
@ -1164,6 +1164,7 @@ class SpellCheck(Dialog):
d.exec_() d.exec_()
# }}} # }}}
# Find next occurrence {{{
def find_next(word, locations, current_editor, current_editor_name, def find_next(word, locations, current_editor, current_editor_name,
gui_parent, show_editor, edit_file): gui_parent, show_editor, edit_file):
files = OrderedDict() files = OrderedDict()
@ -1196,6 +1197,7 @@ def find_next(word, locations, current_editor, current_editor_name,
show_editor(file_name) show_editor(file_name)
return True return True
return False return False
# }}}
if __name__ == '__main__': if __name__ == '__main__':
app = QApplication([]) app = QApplication([])