mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Spell check: Fix 'Show net occurrence' sometimes showing the word in an incorrect location, for example in an attribute where spell check is not performed.
This commit is contained in:
parent
36c937c6ba
commit
b1a45f3147
@ -83,28 +83,33 @@ def add_words_from_text(node, attr, words, file_name, locale):
|
||||
|
||||
_opf_file_as = '{%s}file-as' % OPF_NAMESPACES['opf']
|
||||
|
||||
opf_spell_tags = {'title', 'creator', 'subject', 'description', 'publisher'}
|
||||
|
||||
# We can only use barename() for tag names and simple attribute checks so that
|
||||
# this code matches up with the syntax highlighter base spell checking
|
||||
|
||||
def read_words_from_opf(root, words, file_name, book_locale):
|
||||
for tag in root.xpath('//*[namespace-uri()="%s"]' % OPF_NAMESPACES['dc']):
|
||||
tagname = barename(tag.tag)
|
||||
if not tag.text or tagname in {'identifier', 'language', 'date'}:
|
||||
continue
|
||||
add_words_from_text(tag, 'text', words, file_name, book_locale)
|
||||
for tag in root.iterdescendants('*'):
|
||||
if tag.text is not None and barename(tag.tag) in opf_spell_tags:
|
||||
add_words_from_text(tag, 'text', words, file_name, book_locale)
|
||||
add_words_from_attr(tag, _opf_file_as, words, file_name, book_locale)
|
||||
|
||||
ncx_spell_tags = {'text'}
|
||||
xml_spell_tags = opf_spell_tags | ncx_spell_tags
|
||||
|
||||
def read_words_from_ncx(root, words, file_name, book_locale):
|
||||
for tag in root.xpath('//*[local-name()="text"]'):
|
||||
if not tag.text:
|
||||
continue
|
||||
add_words_from_text(tag, 'text', words, file_name, book_locale)
|
||||
if tag.text is not None:
|
||||
add_words_from_text(tag, 'text', words, file_name, book_locale)
|
||||
|
||||
html_spell_tags = {'script', 'style', 'link'}
|
||||
|
||||
def read_words_from_html_tag(tag, words, file_name, parent_locale, locale):
|
||||
tagname = barename(tag.tag)
|
||||
if tagname not in {'script', 'style', 'link', 'head'}:
|
||||
if tag.text is not None:
|
||||
add_words_from_text(tag, 'text', words, file_name, locale)
|
||||
for attr in {'alt', 'title'}:
|
||||
add_words_from_attr(tag, attr, words, file_name, locale)
|
||||
if tag.tail is not None:
|
||||
if tag.text is not None and barename(tag.tag) not in html_spell_tags:
|
||||
add_words_from_text(tag, 'text', words, file_name, locale)
|
||||
for attr in {'alt', 'title'}:
|
||||
add_words_from_attr(tag, attr, words, file_name, locale)
|
||||
if tag.tail is not None and tag.getparent() is not None and barename(tag.getparent().tag) not in html_spell_tags:
|
||||
add_words_from_text(tag, 'tail', words, file_name, parent_locale)
|
||||
|
||||
def locale_from_tag(tag):
|
||||
|
@ -17,3 +17,6 @@ class NullSmarts(object):
|
||||
def get_smart_selection(self, editor, update=True):
|
||||
return editor.selected_text
|
||||
|
||||
def verify_for_spellcheck(self, cursor, highlighter):
|
||||
return False
|
||||
|
||||
|
@ -14,6 +14,7 @@ from PyQt4.Qt import QTextEdit
|
||||
|
||||
from calibre import prepare_string_for_xml
|
||||
from calibre.gui2 import error_dialog
|
||||
from calibre.gui2.tweak_book.editor.syntax.html import ATTR_NAME, ATTR_END
|
||||
|
||||
get_offset = itemgetter(0)
|
||||
PARAGRAPH_SEPARATOR = '\u2029'
|
||||
@ -43,6 +44,20 @@ def next_tag_boundary(block, offset, forward=True):
|
||||
offset = -1 if forward else sys.maxint
|
||||
return None, None
|
||||
|
||||
def next_attr_boundary(block, offset, forward=True):
|
||||
while block.isValid():
|
||||
ud = block.userData()
|
||||
if ud is not None:
|
||||
attributes = sorted(ud.attributes, key=get_offset, reverse=not forward)
|
||||
for boundary in attributes:
|
||||
if forward and boundary.offset >= offset:
|
||||
return block, boundary
|
||||
if not forward and boundary.offset <= offset:
|
||||
return block, boundary
|
||||
block = block.next() if forward else block.previous()
|
||||
offset = -1 if forward else sys.maxint
|
||||
return None, None
|
||||
|
||||
def find_closest_containing_tag(block, offset, max_tags=sys.maxint):
|
||||
''' Find the closest containing tag. To find it, we search for the first
|
||||
opening tag that does not have a matching closing tag before the specified
|
||||
@ -79,6 +94,29 @@ def find_closest_containing_tag(block, offset, max_tags=sys.maxint):
|
||||
max_tags -= 1
|
||||
return None # Could not find a containing tag
|
||||
|
||||
def find_tag_definition(block, offset):
|
||||
''' Return the <tag | > definition, if any that (block, offset) is inside. '''
|
||||
block, boundary = next_tag_boundary(block, offset, forward=False)
|
||||
if not boundary.is_start:
|
||||
return None, False
|
||||
tag_start = boundary
|
||||
closing = tag_start.closing
|
||||
tag = tag_start.name or tag_start.prefix
|
||||
if tag_start.name and tag_start.prefix:
|
||||
tag = tag_start.prefix + ':' + tag
|
||||
return tag, closing
|
||||
|
||||
def find_containing_attribute(block, offset):
|
||||
block, boundary = next_attr_boundary(block, offset, forward=False)
|
||||
if block is None:
|
||||
return None
|
||||
if boundary.type is ATTR_NAME or boundary.data is ATTR_END:
|
||||
return None # offset is not inside an attribute value
|
||||
block, boundary = next_attr_boundary(block, boundary.offset - 1, forward=False)
|
||||
if block is not None and boundary.type == ATTR_NAME:
|
||||
return boundary.data
|
||||
return None
|
||||
|
||||
def find_closing_tag(tag, max_tags=sys.maxint):
|
||||
''' Find the closing tag corresponding to the specified tag. To find it we
|
||||
search for the first closing tag after the specified tag that does not
|
||||
@ -241,3 +279,33 @@ class HTMLSmarts(NullSmarts):
|
||||
c.setPosition(pos + 1 + len(name))
|
||||
editor.setTextCursor(c)
|
||||
|
||||
def verify_for_spellcheck(self, cursor, highlighter):
|
||||
# Return True iff the cursor is in a location where spelling is
|
||||
# checked (inside a tag or inside a checked attribute)
|
||||
block = cursor.block()
|
||||
start_pos = cursor.anchor() - block.position()
|
||||
end_pos = cursor.position() - block.position()
|
||||
start_tag, closing = find_tag_definition(block, start_pos)
|
||||
if closing:
|
||||
return False
|
||||
end_tag, closing = find_tag_definition(block, end_pos)
|
||||
if closing:
|
||||
return False
|
||||
if start_tag is None and end_tag is None:
|
||||
# We are in normal text, check that the containing tag is
|
||||
# allowed for spell checking.
|
||||
tag = find_closest_containing_tag(block, start_pos)
|
||||
if tag is not None and highlighter.tag_ok_for_spell(tag.name.split(':')[-1]):
|
||||
return True
|
||||
if start_tag != end_tag:
|
||||
return False
|
||||
|
||||
# Now we check if we are in an allowed attribute
|
||||
sa = find_containing_attribute(block, start_pos)
|
||||
ea = find_containing_attribute(block, end_pos)
|
||||
|
||||
if sa == ea and sa in highlighter.spell_attributes:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
@ -31,6 +31,8 @@ class SyntaxHighlighter(QSyntaxHighlighter):
|
||||
|
||||
state_map = {0:lambda state, text, i, formats:[(len(text), None)]}
|
||||
create_formats_func = lambda highlighter: {}
|
||||
spell_attributes = ()
|
||||
tag_ok_for_spell = lambda x: False
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
QSyntaxHighlighter.__init__(self, *args, **kwargs)
|
||||
|
@ -12,6 +12,7 @@ from collections import namedtuple
|
||||
|
||||
from PyQt4.Qt import QFont, QTextBlockUserData
|
||||
|
||||
from calibre.ebooks.oeb.polish.spell import html_spell_tags, xml_spell_tags
|
||||
from calibre.gui2.tweak_book.editor import SyntaxTextCharFormat
|
||||
from calibre.gui2.tweak_book.editor.syntax.base import SyntaxHighlighter, run_loop
|
||||
from calibre.gui2.tweak_book.editor.syntax.css import create_formats as create_css_formats, state_map as css_state_map, State as CSSState
|
||||
@ -46,6 +47,7 @@ CSS = 11
|
||||
|
||||
TagStart = namedtuple('TagStart', 'offset prefix name closing is_start')
|
||||
TagEnd = namedtuple('TagEnd', 'offset self_closing is_start')
|
||||
Attr = namedtuple('Attr', 'offset type data')
|
||||
|
||||
class Tag(object):
|
||||
|
||||
@ -76,13 +78,14 @@ class State(object):
|
||||
|
||||
__slots__ = ('tag_being_defined', 'tags', 'is_bold', 'is_italic',
|
||||
'current_lang', 'parse', 'get_user_data', 'set_user_data',
|
||||
'css_formats', 'stack', 'sub_parser_state', 'default_lang')
|
||||
'css_formats', 'stack', 'sub_parser_state', 'default_lang',
|
||||
'attribute_name',)
|
||||
|
||||
def __init__(self):
|
||||
self.tags = []
|
||||
self.is_bold = self.is_italic = False
|
||||
self.tag_being_defined = self.current_lang = self.get_user_data = self.set_user_data = \
|
||||
self.css_formats = self.stack = self.sub_parser_state = self.default_lang = None
|
||||
self.css_formats = self.stack = self.sub_parser_state = self.default_lang = self.attribute_name = None
|
||||
self.parse = NORMAL
|
||||
|
||||
def copy(self):
|
||||
@ -101,13 +104,14 @@ class State(object):
|
||||
return self.stack.index_for(self)
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.parse, self.sub_parser_state, self.tag_being_defined, tuple(self.tags)))
|
||||
return hash((self.parse, self.sub_parser_state, self.tag_being_defined, self.attribute_name, tuple(self.tags)))
|
||||
|
||||
def __eq__(self, other):
|
||||
return (
|
||||
self.parse == getattr(other, 'parse', -1) and
|
||||
self.sub_parser_state == getattr(other, 'sub_parser_state', -1) and
|
||||
self.tag_being_defined == getattr(other, 'tag_being_defined', False) and
|
||||
self.attribute_name == getattr(other, 'attribute_name', False) and
|
||||
self.tags == getattr(other, 'tags', None)
|
||||
)
|
||||
|
||||
@ -194,6 +198,7 @@ class HTMLUserData(QTextBlockUserData):
|
||||
def __init__(self):
|
||||
QTextBlockUserData.__init__(self)
|
||||
self.tags = []
|
||||
self.attributes = []
|
||||
|
||||
def add_tag_data(state, tag):
|
||||
ud = q = state.get_user_data()
|
||||
@ -203,6 +208,16 @@ def add_tag_data(state, tag):
|
||||
if q is None:
|
||||
state.set_user_data(ud)
|
||||
|
||||
ATTR_NAME, ATTR_VALUE, ATTR_START, ATTR_END = object(), object(), object(), object()
|
||||
|
||||
def add_attr_data(state, data_type, data, offset):
|
||||
ud = q = state.get_user_data()
|
||||
if ud is None:
|
||||
ud = HTMLUserData()
|
||||
ud.attributes.append(Attr(offset, data_type, data))
|
||||
if q is None:
|
||||
state.set_user_data(ud)
|
||||
|
||||
def css(state, text, i, formats):
|
||||
' Inside a <style> tag '
|
||||
pat = cdata_close_pats['style']
|
||||
@ -320,7 +335,9 @@ def opening_tag(cdata_tags, state, text, i, formats):
|
||||
if m is None:
|
||||
return [(1, formats['?'])]
|
||||
state.parse = ATTRIBUTE_NAME
|
||||
prefix, name = m.group().partition(':')[0::2]
|
||||
attrname = state.attribute_name = m.group()
|
||||
add_attr_data(state, ATTR_NAME, attrname, m.start())
|
||||
prefix, name = attrname.partition(':')[0::2]
|
||||
if prefix and name:
|
||||
return [(len(prefix) + 1, formats['nsprefix']), (len(name), formats['attr'])]
|
||||
return [(len(prefix), formats['attr'])]
|
||||
@ -333,11 +350,9 @@ def attribute_name(state, text, i, formats):
|
||||
if ch == '=':
|
||||
state.parse = ATTRIBUTE_VALUE
|
||||
return [(1, formats['attr'])]
|
||||
# Standalone attribute with no value
|
||||
state.parse = IN_OPENING_TAG
|
||||
if ch in {'>', '/'}:
|
||||
# Standalone attribute with no value
|
||||
return [(0, None)]
|
||||
return [(1, formats['no-attr-value'])]
|
||||
return [(0, None)]
|
||||
|
||||
def attribute_value(state, text, i, formats):
|
||||
' After attribute = '
|
||||
@ -356,12 +371,14 @@ def attribute_value(state, text, i, formats):
|
||||
def quoted_val(state, text, i, formats):
|
||||
' A quoted attribute value '
|
||||
quote = '"' if state.parse is DQ_VAL else "'"
|
||||
add_attr_data(state, ATTR_VALUE, ATTR_START, i)
|
||||
pos = text.find(quote, i)
|
||||
if pos == -1:
|
||||
num = len(text) - i
|
||||
else:
|
||||
num = pos - i + 1
|
||||
state.parse = IN_OPENING_TAG
|
||||
add_attr_data(state, ATTR_VALUE, ATTR_END, i + num)
|
||||
return [(num, formats['string'])]
|
||||
|
||||
def closing_tag(state, text, i, formats):
|
||||
@ -447,6 +464,7 @@ class HTMLHighlighter(SyntaxHighlighter):
|
||||
|
||||
state_map = state_map
|
||||
create_formats_func = create_formats
|
||||
spell_attributes = ('alt', 'title')
|
||||
|
||||
def create_formats(self):
|
||||
super(HTMLHighlighter, self).create_formats()
|
||||
@ -460,9 +478,16 @@ class HTMLHighlighter(SyntaxHighlighter):
|
||||
ans = self.default_state.stack.state_for(val) or self.default_state
|
||||
return ans.copy()
|
||||
|
||||
def tag_ok_for_spell(self, name):
|
||||
return name not in html_spell_tags
|
||||
|
||||
class XMLHighlighter(HTMLHighlighter):
|
||||
|
||||
state_map = xml_state_map
|
||||
spell_attributes = ('opf:file-as',)
|
||||
|
||||
def tag_ok_for_spell(self, name):
|
||||
return name in xml_spell_tags
|
||||
|
||||
if __name__ == '__main__':
|
||||
from calibre.gui2.tweak_book.editor.widget import launch_editor
|
||||
|
@ -378,28 +378,35 @@ class TextEdit(PlainTextEdit):
|
||||
self.saved_matches[save_match] = (pat, m)
|
||||
return True
|
||||
|
||||
def find_word_from_line(self, word, lang, lnum, from_cursor=True):
|
||||
def find_spell_word(self, original_words, lang, from_cursor=True):
|
||||
c = self.textCursor()
|
||||
c.setPosition(c.position())
|
||||
if not from_cursor or c.blockNumber() != lnum - 1:
|
||||
lnum = max(1, min(self.blockCount(), lnum))
|
||||
if not from_cursor:
|
||||
c.movePosition(c.Start)
|
||||
c.movePosition(c.NextBlock, n=lnum - 1)
|
||||
c.movePosition(c.StartOfLine)
|
||||
offset = c.block().position()
|
||||
c.movePosition(c.End, c.KeepAnchor)
|
||||
|
||||
def find_word(haystack):
|
||||
for w in original_words:
|
||||
idx = index_of(w, haystack, lang=lang)
|
||||
if idx > -1:
|
||||
return idx, w
|
||||
return -1, None
|
||||
|
||||
while True:
|
||||
text = unicode(c.selectedText()).rstrip('\0')
|
||||
idx, word = find_word(text)
|
||||
if idx == -1:
|
||||
return False
|
||||
c.setPosition(c.anchor() + idx)
|
||||
c.setPosition(c.position() + string_length(word), c.KeepAnchor)
|
||||
if self.smarts.verify_for_spellcheck(c, self.highlighter):
|
||||
self.setTextCursor(c)
|
||||
self.centerCursor()
|
||||
return True
|
||||
c.setPosition(c.position())
|
||||
c.movePosition(c.End, c.KeepAnchor)
|
||||
else:
|
||||
offset = c.block().position() + c.positionInBlock()
|
||||
c.movePosition(c.End, c.KeepAnchor)
|
||||
text = unicode(c.selectedText()).rstrip('\0')
|
||||
idx = index_of(word, text, lang=lang)
|
||||
if idx == -1:
|
||||
return False
|
||||
c.setPosition(offset + idx)
|
||||
c.setPosition(c.position() + string_length(word), c.KeepAnchor)
|
||||
self.setTextCursor(c)
|
||||
self.centerCursor()
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def replace(self, pat, template, saved_match='gui'):
|
||||
c = self.textCursor()
|
||||
|
@ -189,8 +189,8 @@ class Editor(QMainWindow):
|
||||
def find(self, *args, **kwargs):
|
||||
return self.editor.find(*args, **kwargs)
|
||||
|
||||
def find_word_from_line(self, *args, **kwargs):
|
||||
return self.editor.find_word_from_line(*args, **kwargs)
|
||||
def find_spell_word(self, *args, **kwargs):
|
||||
return self.editor.find_spell_word(*args, **kwargs)
|
||||
|
||||
def replace(self, *args, **kwargs):
|
||||
return self.editor.replace(*args, **kwargs)
|
||||
|
@ -1036,10 +1036,10 @@ def find_next(word, locations, current_editor, current_editor_name,
|
||||
files[l.file_name].append(l)
|
||||
except KeyError:
|
||||
files[l.file_name] = [l]
|
||||
start_locations = set()
|
||||
|
||||
if current_editor_name not in files:
|
||||
current_editor = current_editor_name = None
|
||||
current_editor_name = None
|
||||
locations = [(fname, {l.original_word for l in _locations}, False) for fname, _locations in files.iteritems()]
|
||||
else:
|
||||
# Re-order the list of locations to search so that we search in the
|
||||
# current editor first
|
||||
@ -1047,20 +1047,17 @@ def find_next(word, locations, current_editor, current_editor_name,
|
||||
idx = lfiles.index(current_editor_name)
|
||||
before, after = lfiles[:idx], lfiles[idx+1:]
|
||||
lfiles = after + before + [current_editor_name]
|
||||
lnum = current_editor.current_line + 1
|
||||
start_locations = [l for l in files[current_editor_name] if l.sourceline >= lnum]
|
||||
locations = list(start_locations)
|
||||
locations = [(current_editor_name, {l.original_word for l in files[current_editor_name]}, True)]
|
||||
for fname in lfiles:
|
||||
locations.extend(files[fname])
|
||||
start_locations = set(start_locations)
|
||||
locations.append((fname, {l.original_word for l in files[fname]}, False))
|
||||
|
||||
for location in locations:
|
||||
ed = editors.get(location.file_name, None)
|
||||
for file_name, original_words, from_cursor in locations:
|
||||
ed = editors.get(file_name, None)
|
||||
if ed is None:
|
||||
edit_file(location.file_name)
|
||||
ed = editors[location.file_name]
|
||||
if ed.find_word_from_line(location.original_word, word[1].langcode, location.sourceline, from_cursor=location in start_locations):
|
||||
show_editor(location.file_name)
|
||||
edit_file(file_name)
|
||||
ed = editors[file_name]
|
||||
if ed.find_spell_word(original_words, word[1].langcode, from_cursor=from_cursor):
|
||||
show_editor(file_name)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user