Edit Book: Fix the "Search ignoring markup tool" not ignoring comments/processing instructions, etc. Fixes #1651160 [Search ignoring HTML markup finds within <?xml directive and HTML comments](https://bugs.launchpad.net/calibre/+bug/1651160)

This commit is contained in:
Kovid Goyal 2016-12-20 11:23:20 +05:30
parent 7199d30fa1
commit 306bb0ff2f
2 changed files with 24 additions and 2 deletions

View File

@ -8,6 +8,7 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
import sys, re
from operator import itemgetter
from itertools import chain
from cssutils import parseStyle
from PyQt5.Qt import QTextEdit, Qt, QTextCursor
@ -700,6 +701,16 @@ class Smarts(NullSmarts):
c.setPosition(cstart)
block = c.block()
in_text = find_tag_definition(block, 0)[0] is None
if in_text:
# Check if we are in comment/PI/etc.
pb = block.previous()
while pb.isValid():
boundaries = pb.userData().non_tag_structures
if boundaries:
if boundaries[-1].is_start:
in_text = False
break
pb = pb.previous()
def append(text, start):
text = text.replace(PARAGRAPH_SEPARATOR, '\n')
@ -714,7 +725,8 @@ class Smarts(NullSmarts):
chunks.append((text, start + max(extra, 0)))
while block.isValid() and block.position() <= cend:
boundaries = sorted(block.userData().tags, key=get_offset)
ud = block.userData()
boundaries = sorted(chain(ud.tags, ud.non_tag_structures), key=get_offset)
if not boundaries:
# Add the whole line
if in_text:

View File

@ -51,6 +51,7 @@ CSS = 11
TagStart = namedtuple('TagStart', 'offset prefix name closing is_start')
TagEnd = namedtuple('TagEnd', 'offset self_closing is_start')
NonTagBoundary = namedtuple('NonTagBoundary', 'offset is_start type')
Attr = namedtuple('Attr', 'offset type data')
LINK_ATTRS = frozenset(('href', 'src', 'poster', 'xlink:href'))
@ -62,6 +63,7 @@ def refresh_spell_check_status():
global do_spell_check
do_spell_check = tprefs['inline_spell_check'] and hasattr(dictionaries, 'active_user_dictionaries')
from calibre.constants import plugins
_speedup = plugins['html'][0]
@ -223,12 +225,13 @@ class HTMLUserData(QTextBlockUserData):
QTextBlockUserData.__init__(self)
self.tags = []
self.attributes = []
self.non_tag_structures = []
self.state = State()
self.css_user_data = None
self.doc_name = None
def clear(self, state=None, doc_name=None):
self.tags, self.attributes = [], []
self.tags, self.attributes, self.non_tag_structures = [], [], []
self.state = State() if state is None else state
self.doc_name = doc_name
@ -247,6 +250,7 @@ class XMLUserData(HTMLUserData):
def add_tag_data(user_data, tag):
user_data.tags.append(tag)
ATTR_NAME, ATTR_VALUE, ATTR_START, ATTR_END = object(), object(), object(), object()
@ -333,14 +337,17 @@ def normal(state, text, i, formats, user_data):
if ch == '<':
if text[i:i+4] == '<!--':
state.parse, fmt = IN_COMMENT, formats['comment']
user_data.non_tag_structures.append(NonTagBoundary(i, True, IN_COMMENT))
return [(4, fmt)]
if text[i:i+2] == '<?':
state.parse, fmt = IN_PI, formats['preproc']
user_data.non_tag_structures.append(NonTagBoundary(i, True, IN_PI))
return [(2, fmt)]
if text[i:i+2] == '<!' and text[i+2:].lstrip().lower().startswith('doctype'):
state.parse, fmt = IN_DOCTYPE, formats['preproc']
user_data.non_tag_structures.append(NonTagBoundary(i, True, IN_DOCTYPE))
return [(2, fmt)]
m = tag_name_pat.match(text, i + 1)
@ -497,10 +504,12 @@ def in_comment(state, text, i, formats, user_data):
if pos == -1:
num = len(text) - i
else:
user_data.non_tag_structures.append(NonTagBoundary(pos, False, state.parse))
num = pos - i + len(end)
state.parse = NORMAL
return [(num, fmt)]
state_map = {
NORMAL:normal,
IN_OPENING_TAG: partial(opening_tag, cdata_tags),
@ -616,6 +625,7 @@ def profile():
del doc
del app
if __name__ == '__main__':
from calibre.gui2.tweak_book.editor.widget import launch_editor
launch_editor('''\