mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Better fix for ignoring soft-hyphens when checking spelling live
This commit is contained in:
parent
ae962ae3b1
commit
511d3c6ba4
@ -11,7 +11,7 @@ from collections import namedtuple
|
|||||||
|
|
||||||
from PyQt5.Qt import QFont, QTextBlockUserData, QTextCharFormat
|
from PyQt5.Qt import QFont, QTextBlockUserData, QTextCharFormat
|
||||||
|
|
||||||
from calibre.ebooks.oeb.polish.spell import html_spell_tags, xml_spell_tags
|
from calibre.ebooks.oeb.polish.spell import html_spell_tags, xml_spell_tags, patterns
|
||||||
from calibre.spell.dictionary import parse_lang_code
|
from calibre.spell.dictionary import parse_lang_code
|
||||||
from calibre.spell.break_iterator import split_into_words_and_positions
|
from calibre.spell.break_iterator import split_into_words_and_positions
|
||||||
from calibre.gui2.tweak_book import dictionaries, tprefs, verify_link
|
from calibre.gui2.tweak_book import dictionaries, tprefs, verify_link
|
||||||
@ -76,7 +76,18 @@ def spell_property(sfmt, locale):
|
|||||||
return s
|
return s
|
||||||
|
|
||||||
|
|
||||||
_speedup.init(spell_property, dictionaries.recognized, split_into_words_and_positions)
|
def sanitizing_recognizer():
|
||||||
|
sanitize = patterns().sanitize_invisible_pat.sub
|
||||||
|
r = dictionaries.recognized
|
||||||
|
|
||||||
|
def recognized(word, locale=None):
|
||||||
|
word = sanitize('', word).strip()
|
||||||
|
return r(word, locale)
|
||||||
|
|
||||||
|
return recognized
|
||||||
|
|
||||||
|
|
||||||
|
_speedup.init(spell_property, sanitizing_recognizer(), split_into_words_and_positions)
|
||||||
del spell_property
|
del spell_property
|
||||||
check_spelling = _speedup.check_spelling
|
check_spelling = _speedup.check_spelling
|
||||||
|
|
||||||
|
@ -34,11 +34,6 @@ dprefs.defaults['user_dictionaries'] = [{'name':_('Default'), 'is_active':True,
|
|||||||
not_present = object()
|
not_present = object()
|
||||||
|
|
||||||
|
|
||||||
def normalize_word(word):
|
|
||||||
# remove soft hyphens
|
|
||||||
return unicode_type(word).replace('\u00ad', '')
|
|
||||||
|
|
||||||
|
|
||||||
class UserDictionary(object):
|
class UserDictionary(object):
|
||||||
|
|
||||||
__slots__ = ('name', 'is_active', 'words')
|
__slots__ = ('name', 'is_active', 'words')
|
||||||
@ -231,17 +226,14 @@ class Dictionaries(object):
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
def ignore_word(self, word, locale):
|
def ignore_word(self, word, locale):
|
||||||
word = normalize_word(word)
|
|
||||||
self.ignored_words.add((word, locale.langcode))
|
self.ignored_words.add((word, locale.langcode))
|
||||||
self.word_cache[(word, locale)] = True
|
self.word_cache[(word, locale)] = True
|
||||||
|
|
||||||
def unignore_word(self, word, locale):
|
def unignore_word(self, word, locale):
|
||||||
word = normalize_word(word)
|
|
||||||
self.ignored_words.discard((word, locale.langcode))
|
self.ignored_words.discard((word, locale.langcode))
|
||||||
self.word_cache.pop((word, locale), None)
|
self.word_cache.pop((word, locale), None)
|
||||||
|
|
||||||
def is_word_ignored(self, word, locale):
|
def is_word_ignored(self, word, locale):
|
||||||
word = normalize_word(word)
|
|
||||||
return (word, locale.langcode) in self.ignored_words
|
return (word, locale.langcode) in self.ignored_words
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -275,14 +267,12 @@ class Dictionaries(object):
|
|||||||
for d in itervalues(self.dictionaries):
|
for d in itervalues(self.dictionaries):
|
||||||
if d and getattr(d.primary_locale, 'langcode', None) == langcode:
|
if d and getattr(d.primary_locale, 'langcode', None) == langcode:
|
||||||
for word in words:
|
for word in words:
|
||||||
word = normalize_word(word)
|
|
||||||
d.obj.add(word)
|
d.obj.add(word)
|
||||||
|
|
||||||
def remove_user_words(self, words, langcode):
|
def remove_user_words(self, words, langcode):
|
||||||
for d in itervalues(self.dictionaries):
|
for d in itervalues(self.dictionaries):
|
||||||
if d and d.primary_locale.langcode == langcode:
|
if d and d.primary_locale.langcode == langcode:
|
||||||
for word in words:
|
for word in words:
|
||||||
word = normalize_word(word)
|
|
||||||
d.obj.remove(word)
|
d.obj.remove(word)
|
||||||
|
|
||||||
def add_to_user_dictionary(self, name, word, locale):
|
def add_to_user_dictionary(self, name, word, locale):
|
||||||
@ -291,11 +281,9 @@ class Dictionaries(object):
|
|||||||
raise ValueError('Cannot add to the dictionary named: %s as no such dictionary exists' % name)
|
raise ValueError('Cannot add to the dictionary named: %s as no such dictionary exists' % name)
|
||||||
wl = len(ud.words)
|
wl = len(ud.words)
|
||||||
if isinstance(word, (set, frozenset)):
|
if isinstance(word, (set, frozenset)):
|
||||||
word = frozenset(map(normalize_word, word))
|
|
||||||
ud.words |= word
|
ud.words |= word
|
||||||
self.add_user_words(word, locale.langcode)
|
self.add_user_words(word, locale.langcode)
|
||||||
else:
|
else:
|
||||||
word = normalize_word(word)
|
|
||||||
ud.words.add((word, locale.langcode))
|
ud.words.add((word, locale.langcode))
|
||||||
self.add_user_words((word,), locale.langcode)
|
self.add_user_words((word,), locale.langcode)
|
||||||
if len(ud.words) > wl:
|
if len(ud.words) > wl:
|
||||||
@ -308,7 +296,6 @@ class Dictionaries(object):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def remove_from_user_dictionaries(self, word, locale):
|
def remove_from_user_dictionaries(self, word, locale):
|
||||||
word = normalize_word(word)
|
|
||||||
key = (word, locale.langcode)
|
key = (word, locale.langcode)
|
||||||
changed = False
|
changed = False
|
||||||
for ud in self.active_user_dictionaries:
|
for ud in self.active_user_dictionaries:
|
||||||
@ -324,7 +311,7 @@ class Dictionaries(object):
|
|||||||
def remove_from_user_dictionary(self, name, words):
|
def remove_from_user_dictionary(self, name, words):
|
||||||
changed = False
|
changed = False
|
||||||
removals = defaultdict(set)
|
removals = defaultdict(set)
|
||||||
keys = [(normalize_word(w), l.langcode) for w, l in words]
|
keys = [(w, l.langcode) for w, l in words]
|
||||||
for d in self.all_user_dictionaries:
|
for d in self.all_user_dictionaries:
|
||||||
if d.name == name:
|
if d.name == name:
|
||||||
for key in keys:
|
for key in keys:
|
||||||
@ -341,7 +328,6 @@ class Dictionaries(object):
|
|||||||
return changed
|
return changed
|
||||||
|
|
||||||
def word_in_user_dictionary(self, word, locale):
|
def word_in_user_dictionary(self, word, locale):
|
||||||
word = normalize_word(word)
|
|
||||||
key = (word, locale.langcode)
|
key = (word, locale.langcode)
|
||||||
for ud in self.active_user_dictionaries:
|
for ud in self.active_user_dictionaries:
|
||||||
if key in ud.words:
|
if key in ud.words:
|
||||||
@ -377,7 +363,6 @@ class Dictionaries(object):
|
|||||||
return changed
|
return changed
|
||||||
|
|
||||||
def recognized(self, word, locale=None):
|
def recognized(self, word, locale=None):
|
||||||
word = normalize_word(word)
|
|
||||||
locale = locale or self.default_locale
|
locale = locale or self.default_locale
|
||||||
key = (word, locale)
|
key = (word, locale)
|
||||||
ans = self.word_cache.get(key, None)
|
ans = self.word_cache.get(key, None)
|
||||||
@ -406,7 +391,6 @@ class Dictionaries(object):
|
|||||||
return ans
|
return ans
|
||||||
|
|
||||||
def suggestions(self, word, locale=None):
|
def suggestions(self, word, locale=None):
|
||||||
word = normalize_word(word)
|
|
||||||
locale = locale or self.default_locale
|
locale = locale or self.default_locale
|
||||||
d = self.dictionary_for_locale(locale)
|
d = self.dictionary_for_locale(locale)
|
||||||
has_unicode_hyphen = '\u2010' in word
|
has_unicode_hyphen = '\u2010' in word
|
||||||
|
Loading…
x
Reference in New Issue
Block a user