mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Edit Book: Fix the "Search ignoring markup tool" not ignoring comments/processing instructions, etc. Fixes #1651160 [Search ignoring HTML markup finds within <?xml directive and HTML comments](https://bugs.launchpad.net/calibre/+bug/1651160)
This commit is contained in:
parent
7199d30fa1
commit
306bb0ff2f
@ -8,6 +8,7 @@ __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
|
|
||||||
import sys, re
|
import sys, re
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
from itertools import chain
|
||||||
|
|
||||||
from cssutils import parseStyle
|
from cssutils import parseStyle
|
||||||
from PyQt5.Qt import QTextEdit, Qt, QTextCursor
|
from PyQt5.Qt import QTextEdit, Qt, QTextCursor
|
||||||
@ -700,6 +701,16 @@ class Smarts(NullSmarts):
|
|||||||
c.setPosition(cstart)
|
c.setPosition(cstart)
|
||||||
block = c.block()
|
block = c.block()
|
||||||
in_text = find_tag_definition(block, 0)[0] is None
|
in_text = find_tag_definition(block, 0)[0] is None
|
||||||
|
if in_text:
|
||||||
|
# Check if we are in comment/PI/etc.
|
||||||
|
pb = block.previous()
|
||||||
|
while pb.isValid():
|
||||||
|
boundaries = pb.userData().non_tag_structures
|
||||||
|
if boundaries:
|
||||||
|
if boundaries[-1].is_start:
|
||||||
|
in_text = False
|
||||||
|
break
|
||||||
|
pb = pb.previous()
|
||||||
|
|
||||||
def append(text, start):
|
def append(text, start):
|
||||||
text = text.replace(PARAGRAPH_SEPARATOR, '\n')
|
text = text.replace(PARAGRAPH_SEPARATOR, '\n')
|
||||||
@ -714,7 +725,8 @@ class Smarts(NullSmarts):
|
|||||||
chunks.append((text, start + max(extra, 0)))
|
chunks.append((text, start + max(extra, 0)))
|
||||||
|
|
||||||
while block.isValid() and block.position() <= cend:
|
while block.isValid() and block.position() <= cend:
|
||||||
boundaries = sorted(block.userData().tags, key=get_offset)
|
ud = block.userData()
|
||||||
|
boundaries = sorted(chain(ud.tags, ud.non_tag_structures), key=get_offset)
|
||||||
if not boundaries:
|
if not boundaries:
|
||||||
# Add the whole line
|
# Add the whole line
|
||||||
if in_text:
|
if in_text:
|
||||||
|
@ -51,6 +51,7 @@ CSS = 11
|
|||||||
|
|
||||||
TagStart = namedtuple('TagStart', 'offset prefix name closing is_start')
|
TagStart = namedtuple('TagStart', 'offset prefix name closing is_start')
|
||||||
TagEnd = namedtuple('TagEnd', 'offset self_closing is_start')
|
TagEnd = namedtuple('TagEnd', 'offset self_closing is_start')
|
||||||
|
NonTagBoundary = namedtuple('NonTagBoundary', 'offset is_start type')
|
||||||
Attr = namedtuple('Attr', 'offset type data')
|
Attr = namedtuple('Attr', 'offset type data')
|
||||||
|
|
||||||
LINK_ATTRS = frozenset(('href', 'src', 'poster', 'xlink:href'))
|
LINK_ATTRS = frozenset(('href', 'src', 'poster', 'xlink:href'))
|
||||||
@ -62,6 +63,7 @@ def refresh_spell_check_status():
|
|||||||
global do_spell_check
|
global do_spell_check
|
||||||
do_spell_check = tprefs['inline_spell_check'] and hasattr(dictionaries, 'active_user_dictionaries')
|
do_spell_check = tprefs['inline_spell_check'] and hasattr(dictionaries, 'active_user_dictionaries')
|
||||||
|
|
||||||
|
|
||||||
from calibre.constants import plugins
|
from calibre.constants import plugins
|
||||||
|
|
||||||
_speedup = plugins['html'][0]
|
_speedup = plugins['html'][0]
|
||||||
@ -223,12 +225,13 @@ class HTMLUserData(QTextBlockUserData):
|
|||||||
QTextBlockUserData.__init__(self)
|
QTextBlockUserData.__init__(self)
|
||||||
self.tags = []
|
self.tags = []
|
||||||
self.attributes = []
|
self.attributes = []
|
||||||
|
self.non_tag_structures = []
|
||||||
self.state = State()
|
self.state = State()
|
||||||
self.css_user_data = None
|
self.css_user_data = None
|
||||||
self.doc_name = None
|
self.doc_name = None
|
||||||
|
|
||||||
def clear(self, state=None, doc_name=None):
|
def clear(self, state=None, doc_name=None):
|
||||||
self.tags, self.attributes = [], []
|
self.tags, self.attributes, self.non_tag_structures = [], [], []
|
||||||
self.state = State() if state is None else state
|
self.state = State() if state is None else state
|
||||||
self.doc_name = doc_name
|
self.doc_name = doc_name
|
||||||
|
|
||||||
@ -247,6 +250,7 @@ class XMLUserData(HTMLUserData):
|
|||||||
def add_tag_data(user_data, tag):
|
def add_tag_data(user_data, tag):
|
||||||
user_data.tags.append(tag)
|
user_data.tags.append(tag)
|
||||||
|
|
||||||
|
|
||||||
ATTR_NAME, ATTR_VALUE, ATTR_START, ATTR_END = object(), object(), object(), object()
|
ATTR_NAME, ATTR_VALUE, ATTR_START, ATTR_END = object(), object(), object(), object()
|
||||||
|
|
||||||
|
|
||||||
@ -333,14 +337,17 @@ def normal(state, text, i, formats, user_data):
|
|||||||
if ch == '<':
|
if ch == '<':
|
||||||
if text[i:i+4] == '<!--':
|
if text[i:i+4] == '<!--':
|
||||||
state.parse, fmt = IN_COMMENT, formats['comment']
|
state.parse, fmt = IN_COMMENT, formats['comment']
|
||||||
|
user_data.non_tag_structures.append(NonTagBoundary(i, True, IN_COMMENT))
|
||||||
return [(4, fmt)]
|
return [(4, fmt)]
|
||||||
|
|
||||||
if text[i:i+2] == '<?':
|
if text[i:i+2] == '<?':
|
||||||
state.parse, fmt = IN_PI, formats['preproc']
|
state.parse, fmt = IN_PI, formats['preproc']
|
||||||
|
user_data.non_tag_structures.append(NonTagBoundary(i, True, IN_PI))
|
||||||
return [(2, fmt)]
|
return [(2, fmt)]
|
||||||
|
|
||||||
if text[i:i+2] == '<!' and text[i+2:].lstrip().lower().startswith('doctype'):
|
if text[i:i+2] == '<!' and text[i+2:].lstrip().lower().startswith('doctype'):
|
||||||
state.parse, fmt = IN_DOCTYPE, formats['preproc']
|
state.parse, fmt = IN_DOCTYPE, formats['preproc']
|
||||||
|
user_data.non_tag_structures.append(NonTagBoundary(i, True, IN_DOCTYPE))
|
||||||
return [(2, fmt)]
|
return [(2, fmt)]
|
||||||
|
|
||||||
m = tag_name_pat.match(text, i + 1)
|
m = tag_name_pat.match(text, i + 1)
|
||||||
@ -497,10 +504,12 @@ def in_comment(state, text, i, formats, user_data):
|
|||||||
if pos == -1:
|
if pos == -1:
|
||||||
num = len(text) - i
|
num = len(text) - i
|
||||||
else:
|
else:
|
||||||
|
user_data.non_tag_structures.append(NonTagBoundary(pos, False, state.parse))
|
||||||
num = pos - i + len(end)
|
num = pos - i + len(end)
|
||||||
state.parse = NORMAL
|
state.parse = NORMAL
|
||||||
return [(num, fmt)]
|
return [(num, fmt)]
|
||||||
|
|
||||||
|
|
||||||
state_map = {
|
state_map = {
|
||||||
NORMAL:normal,
|
NORMAL:normal,
|
||||||
IN_OPENING_TAG: partial(opening_tag, cdata_tags),
|
IN_OPENING_TAG: partial(opening_tag, cdata_tags),
|
||||||
@ -616,6 +625,7 @@ def profile():
|
|||||||
del doc
|
del doc
|
||||||
del app
|
del app
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
from calibre.gui2.tweak_book.editor.widget import launch_editor
|
from calibre.gui2.tweak_book.editor.widget import launch_editor
|
||||||
launch_editor('''\
|
launch_editor('''\
|
||||||
|
Loading…
x
Reference in New Issue
Block a user