diff --git a/src/calibre/ebooks/oeb/polish/spell.py b/src/calibre/ebooks/oeb/polish/spell.py index f800ab1340..859c78cfdc 100644 --- a/src/calibre/ebooks/oeb/polish/spell.py +++ b/src/calibre/ebooks/oeb/polish/spell.py @@ -83,28 +83,33 @@ def add_words_from_text(node, attr, words, file_name, locale): _opf_file_as = '{%s}file-as' % OPF_NAMESPACES['opf'] +opf_spell_tags = {'title', 'creator', 'subject', 'description', 'publisher'} + +# We can only use barename() for tag names and simple attribute checks so that +# this code matches up with the syntax highlighter base spell checking + def read_words_from_opf(root, words, file_name, book_locale): - for tag in root.xpath('//*[namespace-uri()="%s"]' % OPF_NAMESPACES['dc']): - tagname = barename(tag.tag) - if not tag.text or tagname in {'identifier', 'language', 'date'}: - continue - add_words_from_text(tag, 'text', words, file_name, book_locale) + for tag in root.iterdescendants('*'): + if tag.text is not None and barename(tag.tag) in opf_spell_tags: + add_words_from_text(tag, 'text', words, file_name, book_locale) add_words_from_attr(tag, _opf_file_as, words, file_name, book_locale) +ncx_spell_tags = {'text'} +xml_spell_tags = opf_spell_tags | ncx_spell_tags + def read_words_from_ncx(root, words, file_name, book_locale): for tag in root.xpath('//*[local-name()="text"]'): - if not tag.text: - continue - add_words_from_text(tag, 'text', words, file_name, book_locale) + if tag.text is not None: + add_words_from_text(tag, 'text', words, file_name, book_locale) + +html_spell_tags = {'script', 'style', 'link'} def read_words_from_html_tag(tag, words, file_name, parent_locale, locale): - tagname = barename(tag.tag) - if tagname not in {'script', 'style', 'link', 'head'}: - if tag.text is not None: - add_words_from_text(tag, 'text', words, file_name, locale) - for attr in {'alt', 'title'}: - add_words_from_attr(tag, attr, words, file_name, locale) - if tag.tail is not None: + if tag.text is not None and barename(tag.tag) not in html_spell_tags: + add_words_from_text(tag, 'text', words, file_name, locale) + for attr in {'alt', 'title'}: + add_words_from_attr(tag, attr, words, file_name, locale) + if tag.tail is not None and tag.getparent() is not None and barename(tag.getparent().tag) not in html_spell_tags: add_words_from_text(tag, 'tail', words, file_name, parent_locale) def locale_from_tag(tag): diff --git a/src/calibre/gui2/tweak_book/editor/smart/__init__.py b/src/calibre/gui2/tweak_book/editor/smart/__init__.py index b13c22032a..b5f54e5ccb 100644 --- a/src/calibre/gui2/tweak_book/editor/smart/__init__.py +++ b/src/calibre/gui2/tweak_book/editor/smart/__init__.py @@ -17,3 +17,6 @@ class NullSmarts(object): def get_smart_selection(self, editor, update=True): return editor.selected_text + def verify_for_spellcheck(self, cursor, highlighter): + return False + diff --git a/src/calibre/gui2/tweak_book/editor/smart/html.py b/src/calibre/gui2/tweak_book/editor/smart/html.py index 32aa06d587..b09685a030 100644 --- a/src/calibre/gui2/tweak_book/editor/smart/html.py +++ b/src/calibre/gui2/tweak_book/editor/smart/html.py @@ -14,6 +14,7 @@ from PyQt4.Qt import QTextEdit from calibre import prepare_string_for_xml from calibre.gui2 import error_dialog +from calibre.gui2.tweak_book.editor.syntax.html import ATTR_NAME, ATTR_END get_offset = itemgetter(0) PARAGRAPH_SEPARATOR = '\u2029' @@ -43,6 +44,20 @@ def next_tag_boundary(block, offset, forward=True): offset = -1 if forward else sys.maxint return None, None +def next_attr_boundary(block, offset, forward=True): + while block.isValid(): + ud = block.userData() + if ud is not None: + attributes = sorted(ud.attributes, key=get_offset, reverse=not forward) + for boundary in attributes: + if forward and boundary.offset >= offset: + return block, boundary + if not forward and boundary.offset <= offset: + return block, boundary + block = block.next() if forward else block.previous() + offset = -1 if forward else sys.maxint + return None, None + def find_closest_containing_tag(block, offset, max_tags=sys.maxint): ''' Find the closest containing tag. To find it, we search for the first opening tag that does not have a matching closing tag before the specified @@ -79,6 +94,29 @@ def find_closest_containing_tag(block, offset, max_tags=sys.maxint): max_tags -= 1 return None # Could not find a containing tag +def find_tag_definition(block, offset): + ''' Return the definition, if any that (block, offset) is inside. ''' + block, boundary = next_tag_boundary(block, offset, forward=False) + if not boundary.is_start: + return None, False + tag_start = boundary + closing = tag_start.closing + tag = tag_start.name or tag_start.prefix + if tag_start.name and tag_start.prefix: + tag = tag_start.prefix + ':' + tag + return tag, closing + +def find_containing_attribute(block, offset): + block, boundary = next_attr_boundary(block, offset, forward=False) + if block is None: + return None + if boundary.type is ATTR_NAME or boundary.data is ATTR_END: + return None # offset is not inside an attribute value + block, boundary = next_attr_boundary(block, boundary.offset - 1, forward=False) + if block is not None and boundary.type == ATTR_NAME: + return boundary.data + return None + def find_closing_tag(tag, max_tags=sys.maxint): ''' Find the closing tag corresponding to the specified tag. To find it we search for the first closing tag after the specified tag that does not @@ -241,3 +279,33 @@ class HTMLSmarts(NullSmarts): c.setPosition(pos + 1 + len(name)) editor.setTextCursor(c) + def verify_for_spellcheck(self, cursor, highlighter): + # Return True iff the cursor is in a location where spelling is + # checked (inside a tag or inside a checked attribute) + block = cursor.block() + start_pos = cursor.anchor() - block.position() + end_pos = cursor.position() - block.position() + start_tag, closing = find_tag_definition(block, start_pos) + if closing: + return False + end_tag, closing = find_tag_definition(block, end_pos) + if closing: + return False + if start_tag is None and end_tag is None: + # We are in normal text, check that the containing tag is + # allowed for spell checking. + tag = find_closest_containing_tag(block, start_pos) + if tag is not None and highlighter.tag_ok_for_spell(tag.name.split(':')[-1]): + return True + if start_tag != end_tag: + return False + + # Now we check if we are in an allowed attribute + sa = find_containing_attribute(block, start_pos) + ea = find_containing_attribute(block, end_pos) + + if sa == ea and sa in highlighter.spell_attributes: + return True + + return False + diff --git a/src/calibre/gui2/tweak_book/editor/syntax/base.py b/src/calibre/gui2/tweak_book/editor/syntax/base.py index a25cdf591a..5661781480 100644 --- a/src/calibre/gui2/tweak_book/editor/syntax/base.py +++ b/src/calibre/gui2/tweak_book/editor/syntax/base.py @@ -31,6 +31,8 @@ class SyntaxHighlighter(QSyntaxHighlighter): state_map = {0:lambda state, text, i, formats:[(len(text), None)]} create_formats_func = lambda highlighter: {} + spell_attributes = () + tag_ok_for_spell = lambda x: False def __init__(self, *args, **kwargs): QSyntaxHighlighter.__init__(self, *args, **kwargs) diff --git a/src/calibre/gui2/tweak_book/editor/syntax/html.py b/src/calibre/gui2/tweak_book/editor/syntax/html.py index a85949d289..badfbff973 100644 --- a/src/calibre/gui2/tweak_book/editor/syntax/html.py +++ b/src/calibre/gui2/tweak_book/editor/syntax/html.py @@ -12,6 +12,7 @@ from collections import namedtuple from PyQt4.Qt import QFont, QTextBlockUserData +from calibre.ebooks.oeb.polish.spell import html_spell_tags, xml_spell_tags from calibre.gui2.tweak_book.editor import SyntaxTextCharFormat from calibre.gui2.tweak_book.editor.syntax.base import SyntaxHighlighter, run_loop from calibre.gui2.tweak_book.editor.syntax.css import create_formats as create_css_formats, state_map as css_state_map, State as CSSState @@ -46,6 +47,7 @@ CSS = 11 TagStart = namedtuple('TagStart', 'offset prefix name closing is_start') TagEnd = namedtuple('TagEnd', 'offset self_closing is_start') +Attr = namedtuple('Attr', 'offset type data') class Tag(object): @@ -76,13 +78,14 @@ class State(object): __slots__ = ('tag_being_defined', 'tags', 'is_bold', 'is_italic', 'current_lang', 'parse', 'get_user_data', 'set_user_data', - 'css_formats', 'stack', 'sub_parser_state', 'default_lang') + 'css_formats', 'stack', 'sub_parser_state', 'default_lang', + 'attribute_name',) def __init__(self): self.tags = [] self.is_bold = self.is_italic = False self.tag_being_defined = self.current_lang = self.get_user_data = self.set_user_data = \ - self.css_formats = self.stack = self.sub_parser_state = self.default_lang = None + self.css_formats = self.stack = self.sub_parser_state = self.default_lang = self.attribute_name = None self.parse = NORMAL def copy(self): @@ -101,13 +104,14 @@ class State(object): return self.stack.index_for(self) def __hash__(self): - return hash((self.parse, self.sub_parser_state, self.tag_being_defined, tuple(self.tags))) + return hash((self.parse, self.sub_parser_state, self.tag_being_defined, self.attribute_name, tuple(self.tags))) def __eq__(self, other): return ( self.parse == getattr(other, 'parse', -1) and self.sub_parser_state == getattr(other, 'sub_parser_state', -1) and self.tag_being_defined == getattr(other, 'tag_being_defined', False) and + self.attribute_name == getattr(other, 'attribute_name', False) and self.tags == getattr(other, 'tags', None) ) @@ -194,6 +198,7 @@ class HTMLUserData(QTextBlockUserData): def __init__(self): QTextBlockUserData.__init__(self) self.tags = [] + self.attributes = [] def add_tag_data(state, tag): ud = q = state.get_user_data() @@ -203,6 +208,16 @@ def add_tag_data(state, tag): if q is None: state.set_user_data(ud) +ATTR_NAME, ATTR_VALUE, ATTR_START, ATTR_END = object(), object(), object(), object() + +def add_attr_data(state, data_type, data, offset): + ud = q = state.get_user_data() + if ud is None: + ud = HTMLUserData() + ud.attributes.append(Attr(offset, data_type, data)) + if q is None: + state.set_user_data(ud) + def css(state, text, i, formats): ' Inside a