mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement text search for HTML files
This commit is contained in:
parent
3c27f28fdb
commit
298b664669
@ -10,7 +10,7 @@ import sys, re
|
|||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
|
||||||
from cssutils import parseStyle
|
from cssutils import parseStyle
|
||||||
from PyQt5.Qt import QTextEdit, Qt
|
from PyQt5.Qt import QTextEdit, Qt, QTextCursor
|
||||||
|
|
||||||
from calibre import prepare_string_for_xml, xml_entity_to_unicode
|
from calibre import prepare_string_for_xml, xml_entity_to_unicode
|
||||||
from calibre.ebooks.oeb.polish.container import OEB_DOCS
|
from calibre.ebooks.oeb.polish.container import OEB_DOCS
|
||||||
@ -672,9 +672,62 @@ class Smarts(NullSmarts):
|
|||||||
|
|
||||||
return 'complete_names', (names_type, doc_name, c.root), query
|
return 'complete_names', (names_type, doc_name, c.root), query
|
||||||
|
|
||||||
|
def find_text(self, pat, cursor):
|
||||||
|
from calibre.gui2.tweak_book.text_search import find_text_in_chunks
|
||||||
|
chunks = []
|
||||||
|
c = QTextCursor(cursor)
|
||||||
|
c.setPosition(0)
|
||||||
|
in_text = True
|
||||||
|
block = c.block()
|
||||||
|
|
||||||
|
cstart = min(cursor.position(), cursor.anchor())
|
||||||
|
cend = max(cursor.position(), cursor.anchor())
|
||||||
|
|
||||||
|
def append(text, start):
|
||||||
|
after = start + len(text)
|
||||||
|
if start <= cend and cstart < after:
|
||||||
|
extra = after - (cend + 1)
|
||||||
|
if extra > 0:
|
||||||
|
text = text[:-extra]
|
||||||
|
extra = cstart - start
|
||||||
|
if extra > 0:
|
||||||
|
text = text[extra:]
|
||||||
|
chunks.append((text, start + max(extra, 0)))
|
||||||
|
|
||||||
|
while block.isValid() and block.position() <= cend and block.position() + block.length() > cstart:
|
||||||
|
boundaries = sorted(block.userData().tags, key=get_offset)
|
||||||
|
if not boundaries:
|
||||||
|
# Add the whole line
|
||||||
|
if in_text:
|
||||||
|
text = block.text()
|
||||||
|
if text:
|
||||||
|
append(text, block.position())
|
||||||
|
else:
|
||||||
|
start = block.position()
|
||||||
|
c.setPosition(start)
|
||||||
|
for b in boundaries:
|
||||||
|
if in_text:
|
||||||
|
c.setPosition(start + b.offset, c.KeepAnchor)
|
||||||
|
if c.hasSelection():
|
||||||
|
append(c.selectedText(), c.anchor())
|
||||||
|
in_text = not b.is_start
|
||||||
|
c.setPosition(start + b.offset + 1)
|
||||||
|
if in_text:
|
||||||
|
# Add remaining text in block
|
||||||
|
c.setPosition(block.position() + boundaries[-1].offset + 1)
|
||||||
|
c.movePosition(c.EndOfBlock, c.KeepAnchor)
|
||||||
|
if c.hasSelection():
|
||||||
|
append(c.selectedText(), c.anchor())
|
||||||
|
block = block.next()
|
||||||
|
s, e = find_text_in_chunks(pat, chunks)
|
||||||
|
return s != -1 and e != -1, s, e
|
||||||
|
|
||||||
if __name__ == '__main__': # {{{
|
if __name__ == '__main__': # {{{
|
||||||
from calibre.gui2.tweak_book.editor.widget import launch_editor
|
from calibre.gui2.tweak_book.editor.widget import launch_editor
|
||||||
launch_editor('''\
|
if sys.argv[-1].endswith('.html'):
|
||||||
|
raw = lopen(sys.argv[-1], 'rb').read().decode('utf-8')
|
||||||
|
else:
|
||||||
|
raw = '''\
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html xml:lang="en" lang="en">
|
<html xml:lang="en" lang="en">
|
||||||
<!--
|
<!--
|
||||||
@ -703,5 +756,9 @@ if __name__ == '__main__': # {{{
|
|||||||
<p>Some non-BMP unicode text:\U0001f431\U0001f431\U0001f431</p>
|
<p>Some non-BMP unicode text:\U0001f431\U0001f431\U0001f431</p>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
''', path_is_raw=True, syntax='xml')
|
'''
|
||||||
|
def callback(ed):
|
||||||
|
import regex
|
||||||
|
ed.find_text(regex.compile('A bold word'))
|
||||||
|
launch_editor(raw, path_is_raw=True, syntax='html', callback=callback)
|
||||||
# }}}
|
# }}}
|
||||||
|
@ -380,6 +380,7 @@ class TextEdit(PlainTextEdit):
|
|||||||
pos = c.End if reverse else c.Start
|
pos = c.End if reverse else c.Start
|
||||||
c.movePosition(pos, c.KeepAnchor)
|
c.movePosition(pos, c.KeepAnchor)
|
||||||
if hasattr(self.smarts, 'find_text'):
|
if hasattr(self.smarts, 'find_text'):
|
||||||
|
self.highlighter.join()
|
||||||
found, start, end = self.smarts.find_text(pat, c)
|
found, start, end = self.smarts.find_text(pat, c)
|
||||||
if not found:
|
if not found:
|
||||||
return False
|
return False
|
||||||
|
@ -166,7 +166,7 @@ def run_text_search(search, current_editor, current_editor_name, searchable_name
|
|||||||
for fname, syntax in files.iteritems():
|
for fname, syntax in files.iteritems():
|
||||||
ed = editors.get(fname, None)
|
ed = editors.get(fname, None)
|
||||||
if ed is not None:
|
if ed is not None:
|
||||||
if ed.find_text(pat, complete=True, save_match='gui'):
|
if ed.find_text(pat, complete=True):
|
||||||
show_editor(fname)
|
show_editor(fname)
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
@ -182,3 +182,33 @@ def run_text_search(search, current_editor, current_editor_name, searchable_name
|
|||||||
|
|
||||||
msg = '<p>' + _('No matches were found for %s') % ('<pre style="font-style:italic">' + prepare_string_for_xml(search['find']) + '</pre>')
|
msg = '<p>' + _('No matches were found for %s') % ('<pre style="font-style:italic">' + prepare_string_for_xml(search['find']) + '</pre>')
|
||||||
return error_dialog(gui_parent, _('Not found'), msg, show=True)
|
return error_dialog(gui_parent, _('Not found'), msg, show=True)
|
||||||
|
|
||||||
|
def find_text_in_chunks(pat, chunks):
|
||||||
|
text = ''.join(x[0] for x in chunks)
|
||||||
|
m = pat.search(text)
|
||||||
|
if m is None:
|
||||||
|
return -1, -1
|
||||||
|
start, after = m.span()
|
||||||
|
|
||||||
|
def contains(clen, pt):
|
||||||
|
return offset <= pt < offset + clen
|
||||||
|
|
||||||
|
offset = 0
|
||||||
|
start_pos = end_pos = None
|
||||||
|
|
||||||
|
for chunk, chunk_start in chunks:
|
||||||
|
clen = len(chunk)
|
||||||
|
if offset + clen < start:
|
||||||
|
offset += clen
|
||||||
|
continue # this chunk ends before start
|
||||||
|
if start_pos is None:
|
||||||
|
if contains(clen, start):
|
||||||
|
start_pos = chunk_start + (start - offset)
|
||||||
|
if start_pos is not None:
|
||||||
|
if contains(clen, after-1):
|
||||||
|
end_pos = chunk_start + (after - offset)
|
||||||
|
return start_pos, end_pos
|
||||||
|
offset += clen
|
||||||
|
if offset > after:
|
||||||
|
break # the next chunk starts after end
|
||||||
|
return -1, -1
|
||||||
|
Loading…
x
Reference in New Issue
Block a user