mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Spellcheck: When checking French text list all words that have the same stem as a single word. So j'aime and aime are listed as a single word.
This commit is contained in:
parent
643d305548
commit
16a5c262eb
@ -19,7 +19,7 @@ _patterns = None
|
||||
|
||||
class Patterns(object):
|
||||
|
||||
__slots__ = ('sanitize_invisible_pat', 'split_pat', 'digit_pat')
|
||||
__slots__ = ('sanitize_invisible_pat', 'split_pat', 'digit_pat', 'fr_elision_pat')
|
||||
|
||||
def __init__(self):
|
||||
import regex
|
||||
@ -30,6 +30,10 @@ class Patterns(object):
|
||||
r'\W+', flags=regex.VERSION1 | regex.WORD | regex.FULLCASE | regex.UNICODE)
|
||||
self.digit_pat = regex.compile(
|
||||
r'^\d+$', flags=regex.VERSION1 | regex.WORD | regex.UNICODE)
|
||||
# French words with prefixes are reduced to the stem word, so that the
|
||||
# words appear only once in the word list
|
||||
self.fr_elision_pat = regex.compile(
|
||||
u"^(?:l|d|m|t|s|j|c|ç|lorsqu|puisqu|quoiqu|qu)['’]", flags=regex.UNICODE | regex.VERSION1 | regex.IGNORECASE)
|
||||
|
||||
def patterns():
|
||||
global _patterns
|
||||
@ -39,16 +43,19 @@ def patterns():
|
||||
|
||||
class Location(object):
|
||||
|
||||
__slots__ = ('file_name', 'sourceline', 'original_word', 'location_node', 'node_item')
|
||||
__slots__ = ('file_name', 'sourceline', 'original_word', 'location_node', 'node_item', 'elided_prefix')
|
||||
|
||||
def __init__(self, file_name=None, sourceline=None, original_word=None, location_node=None, node_item=(None, None)):
|
||||
self.file_name, self.sourceline, self.original_word = file_name, sourceline, original_word
|
||||
self.location_node, self.node_item = location_node, node_item
|
||||
def __init__(self, file_name=None, elided_prefix='', original_word=None, location_node=None, node_item=(None, None)):
|
||||
self.file_name, self.elided_prefix, self.original_word = file_name, elided_prefix, original_word
|
||||
self.location_node, self.node_item, self.sourceline = location_node, node_item, location_node.sourceline
|
||||
|
||||
def __repr__(self):
|
||||
return '%s:%s' % (self.file_name, self.sourceline)
|
||||
return '%s @ %s:%s' % (self.original_word, self.file_name, self.sourceline)
|
||||
__str__ = __repr__
|
||||
|
||||
def replace(self, new_word):
|
||||
self.original_word = self.elided_prefix + new_word
|
||||
|
||||
def filter_words(word):
|
||||
if not word:
|
||||
return False
|
||||
@ -68,9 +75,16 @@ def add_words(text, node, words, file_name, locale, node_item):
|
||||
candidates = get_words(text, locale.langcode)
|
||||
if candidates:
|
||||
p = patterns()
|
||||
is_fr = locale.langcode == 'fra'
|
||||
for word in candidates:
|
||||
sword = p.sanitize_invisible_pat.sub('', word)
|
||||
loc = Location(file_name, node.sourceline, word, node, node_item)
|
||||
elided_prefix = ''
|
||||
if is_fr:
|
||||
m = p.fr_elision_pat.match(sword)
|
||||
if m is not None and len(sword) > len(elided_prefix):
|
||||
elided_prefix = m.group()
|
||||
sword = sword[len(elided_prefix):]
|
||||
loc = Location(file_name, elided_prefix, word, node, node_item)
|
||||
words[(sword, locale)].append(loc)
|
||||
|
||||
def add_words_from_attr(node, attr, words, file_name, locale):
|
||||
@ -184,7 +198,8 @@ def replace_word(container, new_word, locations, locale):
|
||||
text = node.get(attr)
|
||||
else:
|
||||
text = getattr(node, attr)
|
||||
text, replaced = replace(text, loc.original_word, new_word, locale.langcode)
|
||||
replacement = loc.elided_prefix + new_word
|
||||
text, replaced = replace(text, loc.original_word, replacement, locale.langcode)
|
||||
if replaced:
|
||||
if is_attr:
|
||||
node.set(attr, text)
|
||||
|
@ -702,7 +702,7 @@ class WordsModel(QAbstractTableModel):
|
||||
|
||||
def replace_word(self, w, new_word):
|
||||
for location in self.words[w]:
|
||||
location.original_word = new_word
|
||||
location.replace(new_word)
|
||||
if w[0] == new_word:
|
||||
return w
|
||||
new_key = (new_word, w[1])
|
||||
@ -1164,6 +1164,7 @@ class SpellCheck(Dialog):
|
||||
d.exec_()
|
||||
# }}}
|
||||
|
||||
# Find next occurrence {{{
|
||||
def find_next(word, locations, current_editor, current_editor_name,
|
||||
gui_parent, show_editor, edit_file):
|
||||
files = OrderedDict()
|
||||
@ -1196,6 +1197,7 @@ def find_next(word, locations, current_editor, current_editor_name,
|
||||
show_editor(file_name)
|
||||
return True
|
||||
return False
|
||||
# }}}
|
||||
|
||||
if __name__ == '__main__':
|
||||
app = QApplication([])
|
||||
|
Loading…
x
Reference in New Issue
Block a user