mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement text search for HTML files
This commit is contained in:
parent
3c27f28fdb
commit
298b664669
@ -10,7 +10,7 @@ import sys, re
|
||||
from operator import itemgetter
|
||||
|
||||
from cssutils import parseStyle
|
||||
from PyQt5.Qt import QTextEdit, Qt
|
||||
from PyQt5.Qt import QTextEdit, Qt, QTextCursor
|
||||
|
||||
from calibre import prepare_string_for_xml, xml_entity_to_unicode
|
||||
from calibre.ebooks.oeb.polish.container import OEB_DOCS
|
||||
@ -672,9 +672,62 @@ class Smarts(NullSmarts):
|
||||
|
||||
return 'complete_names', (names_type, doc_name, c.root), query
|
||||
|
||||
def find_text(self, pat, cursor):
|
||||
from calibre.gui2.tweak_book.text_search import find_text_in_chunks
|
||||
chunks = []
|
||||
c = QTextCursor(cursor)
|
||||
c.setPosition(0)
|
||||
in_text = True
|
||||
block = c.block()
|
||||
|
||||
cstart = min(cursor.position(), cursor.anchor())
|
||||
cend = max(cursor.position(), cursor.anchor())
|
||||
|
||||
def append(text, start):
|
||||
after = start + len(text)
|
||||
if start <= cend and cstart < after:
|
||||
extra = after - (cend + 1)
|
||||
if extra > 0:
|
||||
text = text[:-extra]
|
||||
extra = cstart - start
|
||||
if extra > 0:
|
||||
text = text[extra:]
|
||||
chunks.append((text, start + max(extra, 0)))
|
||||
|
||||
while block.isValid() and block.position() <= cend and block.position() + block.length() > cstart:
|
||||
boundaries = sorted(block.userData().tags, key=get_offset)
|
||||
if not boundaries:
|
||||
# Add the whole line
|
||||
if in_text:
|
||||
text = block.text()
|
||||
if text:
|
||||
append(text, block.position())
|
||||
else:
|
||||
start = block.position()
|
||||
c.setPosition(start)
|
||||
for b in boundaries:
|
||||
if in_text:
|
||||
c.setPosition(start + b.offset, c.KeepAnchor)
|
||||
if c.hasSelection():
|
||||
append(c.selectedText(), c.anchor())
|
||||
in_text = not b.is_start
|
||||
c.setPosition(start + b.offset + 1)
|
||||
if in_text:
|
||||
# Add remaining text in block
|
||||
c.setPosition(block.position() + boundaries[-1].offset + 1)
|
||||
c.movePosition(c.EndOfBlock, c.KeepAnchor)
|
||||
if c.hasSelection():
|
||||
append(c.selectedText(), c.anchor())
|
||||
block = block.next()
|
||||
s, e = find_text_in_chunks(pat, chunks)
|
||||
return s != -1 and e != -1, s, e
|
||||
|
||||
if __name__ == '__main__': # {{{
|
||||
from calibre.gui2.tweak_book.editor.widget import launch_editor
|
||||
launch_editor('''\
|
||||
if sys.argv[-1].endswith('.html'):
|
||||
raw = lopen(sys.argv[-1], 'rb').read().decode('utf-8')
|
||||
else:
|
||||
raw = '''\
|
||||
<!DOCTYPE html>
|
||||
<html xml:lang="en" lang="en">
|
||||
<!--
|
||||
@ -703,5 +756,9 @@ if __name__ == '__main__': # {{{
|
||||
<p>Some non-BMP unicode text:\U0001f431\U0001f431\U0001f431</p>
|
||||
</body>
|
||||
</html>
|
||||
''', path_is_raw=True, syntax='xml')
|
||||
'''
|
||||
def callback(ed):
|
||||
import regex
|
||||
ed.find_text(regex.compile('A bold word'))
|
||||
launch_editor(raw, path_is_raw=True, syntax='html', callback=callback)
|
||||
# }}}
|
||||
|
@ -380,6 +380,7 @@ class TextEdit(PlainTextEdit):
|
||||
pos = c.End if reverse else c.Start
|
||||
c.movePosition(pos, c.KeepAnchor)
|
||||
if hasattr(self.smarts, 'find_text'):
|
||||
self.highlighter.join()
|
||||
found, start, end = self.smarts.find_text(pat, c)
|
||||
if not found:
|
||||
return False
|
||||
|
@ -166,7 +166,7 @@ def run_text_search(search, current_editor, current_editor_name, searchable_name
|
||||
for fname, syntax in files.iteritems():
|
||||
ed = editors.get(fname, None)
|
||||
if ed is not None:
|
||||
if ed.find_text(pat, complete=True, save_match='gui'):
|
||||
if ed.find_text(pat, complete=True):
|
||||
show_editor(fname)
|
||||
return True
|
||||
else:
|
||||
@ -182,3 +182,33 @@ def run_text_search(search, current_editor, current_editor_name, searchable_name
|
||||
|
||||
msg = '<p>' + _('No matches were found for %s') % ('<pre style="font-style:italic">' + prepare_string_for_xml(search['find']) + '</pre>')
|
||||
return error_dialog(gui_parent, _('Not found'), msg, show=True)
|
||||
|
||||
def find_text_in_chunks(pat, chunks):
|
||||
text = ''.join(x[0] for x in chunks)
|
||||
m = pat.search(text)
|
||||
if m is None:
|
||||
return -1, -1
|
||||
start, after = m.span()
|
||||
|
||||
def contains(clen, pt):
|
||||
return offset <= pt < offset + clen
|
||||
|
||||
offset = 0
|
||||
start_pos = end_pos = None
|
||||
|
||||
for chunk, chunk_start in chunks:
|
||||
clen = len(chunk)
|
||||
if offset + clen < start:
|
||||
offset += clen
|
||||
continue # this chunk ends before start
|
||||
if start_pos is None:
|
||||
if contains(clen, start):
|
||||
start_pos = chunk_start + (start - offset)
|
||||
if start_pos is not None:
|
||||
if contains(clen, after-1):
|
||||
end_pos = chunk_start + (after - offset)
|
||||
return start_pos, end_pos
|
||||
offset += clen
|
||||
if offset > after:
|
||||
break # the next chunk starts after end
|
||||
return -1, -1
|
||||
|
Loading…
x
Reference in New Issue
Block a user