Edit Book: Fix the "Search ignoring markup tool" not ignoring comments/processing instructions, etc. Fixes #1651160 [Search ignoring HTML markup finds within <?xml directive and HTML comments](https://bugs.launchpad.net/calibre/+bug/1651160)

This commit is contained in:
Kovid Goyal 2016-12-20 11:23:20 +05:30
parent 7199d30fa1
commit 306bb0ff2f
2 changed files with 24 additions and 2 deletions

View File

@ -8,6 +8,7 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
import sys, re import sys, re
from operator import itemgetter from operator import itemgetter
from itertools import chain
from cssutils import parseStyle from cssutils import parseStyle
from PyQt5.Qt import QTextEdit, Qt, QTextCursor from PyQt5.Qt import QTextEdit, Qt, QTextCursor
@ -700,6 +701,16 @@ class Smarts(NullSmarts):
c.setPosition(cstart) c.setPosition(cstart)
block = c.block() block = c.block()
in_text = find_tag_definition(block, 0)[0] is None in_text = find_tag_definition(block, 0)[0] is None
if in_text:
# Check if we are in comment/PI/etc.
pb = block.previous()
while pb.isValid():
boundaries = pb.userData().non_tag_structures
if boundaries:
if boundaries[-1].is_start:
in_text = False
break
pb = pb.previous()
def append(text, start): def append(text, start):
text = text.replace(PARAGRAPH_SEPARATOR, '\n') text = text.replace(PARAGRAPH_SEPARATOR, '\n')
@ -714,7 +725,8 @@ class Smarts(NullSmarts):
chunks.append((text, start + max(extra, 0))) chunks.append((text, start + max(extra, 0)))
while block.isValid() and block.position() <= cend: while block.isValid() and block.position() <= cend:
boundaries = sorted(block.userData().tags, key=get_offset) ud = block.userData()
boundaries = sorted(chain(ud.tags, ud.non_tag_structures), key=get_offset)
if not boundaries: if not boundaries:
# Add the whole line # Add the whole line
if in_text: if in_text:

View File

@ -51,6 +51,7 @@ CSS = 11
TagStart = namedtuple('TagStart', 'offset prefix name closing is_start') TagStart = namedtuple('TagStart', 'offset prefix name closing is_start')
TagEnd = namedtuple('TagEnd', 'offset self_closing is_start') TagEnd = namedtuple('TagEnd', 'offset self_closing is_start')
NonTagBoundary = namedtuple('NonTagBoundary', 'offset is_start type')
Attr = namedtuple('Attr', 'offset type data') Attr = namedtuple('Attr', 'offset type data')
LINK_ATTRS = frozenset(('href', 'src', 'poster', 'xlink:href')) LINK_ATTRS = frozenset(('href', 'src', 'poster', 'xlink:href'))
@ -62,6 +63,7 @@ def refresh_spell_check_status():
global do_spell_check global do_spell_check
do_spell_check = tprefs['inline_spell_check'] and hasattr(dictionaries, 'active_user_dictionaries') do_spell_check = tprefs['inline_spell_check'] and hasattr(dictionaries, 'active_user_dictionaries')
from calibre.constants import plugins from calibre.constants import plugins
_speedup = plugins['html'][0] _speedup = plugins['html'][0]
@ -223,12 +225,13 @@ class HTMLUserData(QTextBlockUserData):
QTextBlockUserData.__init__(self) QTextBlockUserData.__init__(self)
self.tags = [] self.tags = []
self.attributes = [] self.attributes = []
self.non_tag_structures = []
self.state = State() self.state = State()
self.css_user_data = None self.css_user_data = None
self.doc_name = None self.doc_name = None
def clear(self, state=None, doc_name=None): def clear(self, state=None, doc_name=None):
self.tags, self.attributes = [], [] self.tags, self.attributes, self.non_tag_structures = [], [], []
self.state = State() if state is None else state self.state = State() if state is None else state
self.doc_name = doc_name self.doc_name = doc_name
@ -247,6 +250,7 @@ class XMLUserData(HTMLUserData):
def add_tag_data(user_data, tag): def add_tag_data(user_data, tag):
user_data.tags.append(tag) user_data.tags.append(tag)
ATTR_NAME, ATTR_VALUE, ATTR_START, ATTR_END = object(), object(), object(), object() ATTR_NAME, ATTR_VALUE, ATTR_START, ATTR_END = object(), object(), object(), object()
@ -333,14 +337,17 @@ def normal(state, text, i, formats, user_data):
if ch == '<': if ch == '<':
if text[i:i+4] == '<!--': if text[i:i+4] == '<!--':
state.parse, fmt = IN_COMMENT, formats['comment'] state.parse, fmt = IN_COMMENT, formats['comment']
user_data.non_tag_structures.append(NonTagBoundary(i, True, IN_COMMENT))
return [(4, fmt)] return [(4, fmt)]
if text[i:i+2] == '<?': if text[i:i+2] == '<?':
state.parse, fmt = IN_PI, formats['preproc'] state.parse, fmt = IN_PI, formats['preproc']
user_data.non_tag_structures.append(NonTagBoundary(i, True, IN_PI))
return [(2, fmt)] return [(2, fmt)]
if text[i:i+2] == '<!' and text[i+2:].lstrip().lower().startswith('doctype'): if text[i:i+2] == '<!' and text[i+2:].lstrip().lower().startswith('doctype'):
state.parse, fmt = IN_DOCTYPE, formats['preproc'] state.parse, fmt = IN_DOCTYPE, formats['preproc']
user_data.non_tag_structures.append(NonTagBoundary(i, True, IN_DOCTYPE))
return [(2, fmt)] return [(2, fmt)]
m = tag_name_pat.match(text, i + 1) m = tag_name_pat.match(text, i + 1)
@ -497,10 +504,12 @@ def in_comment(state, text, i, formats, user_data):
if pos == -1: if pos == -1:
num = len(text) - i num = len(text) - i
else: else:
user_data.non_tag_structures.append(NonTagBoundary(pos, False, state.parse))
num = pos - i + len(end) num = pos - i + len(end)
state.parse = NORMAL state.parse = NORMAL
return [(num, fmt)] return [(num, fmt)]
state_map = { state_map = {
NORMAL:normal, NORMAL:normal,
IN_OPENING_TAG: partial(opening_tag, cdata_tags), IN_OPENING_TAG: partial(opening_tag, cdata_tags),
@ -616,6 +625,7 @@ def profile():
del doc del doc
del app del app
if __name__ == '__main__': if __name__ == '__main__':
from calibre.gui2.tweak_book.editor.widget import launch_editor from calibre.gui2.tweak_book.editor.widget import launch_editor
launch_editor('''\ launch_editor('''\