mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Viewer: Fix searching in Regex and Whole words mode not working well.
Viewer: Fix searching for multiple words in fixed layout books not working. Fixes #1863464 [Private bug](https://bugs.launchpad.net/calibre/+bug/1863464)
This commit is contained in:
parent
90aba42b2a
commit
2f701318d2
@ -53,16 +53,21 @@ class BusySpinner(QWidget): # {{{
|
||||
|
||||
quote_map= {'"':'"“”', "'": "'‘’"}
|
||||
qpat = regex.compile(r'''(['"])''')
|
||||
spat = regex.compile(r'(\s+)')
|
||||
|
||||
|
||||
def text_to_regex(text):
|
||||
ans = []
|
||||
for part in qpat.split(text):
|
||||
r = quote_map.get(part)
|
||||
if r is not None:
|
||||
ans.append('[' + r + ']')
|
||||
for wpart in spat.split(text):
|
||||
if not wpart.strip():
|
||||
ans.append(r'\s+')
|
||||
else:
|
||||
ans.append(regex.escape(part))
|
||||
for part in qpat.split(wpart):
|
||||
r = quote_map.get(part)
|
||||
if r is not None:
|
||||
ans.append('[' + r + ']')
|
||||
else:
|
||||
ans.append(regex.escape(part))
|
||||
return ''.join(ans)
|
||||
|
||||
|
||||
@ -111,10 +116,11 @@ class SearchFinished(object):
|
||||
|
||||
class SearchResult(object):
|
||||
|
||||
__slots__ = ('search_query', 'before', 'text', 'after', 'spine_idx', 'index', 'file_name', '_static_text')
|
||||
__slots__ = ('search_query', 'before', 'text', 'after', 'q', 'spine_idx', 'index', 'file_name', '_static_text')
|
||||
|
||||
def __init__(self, search_query, before, text, after, name, spine_idx, index):
|
||||
def __init__(self, search_query, before, text, after, q, name, spine_idx, index):
|
||||
self.search_query = search_query
|
||||
self.q = q
|
||||
self.before, self.text, self.after = before, text, after
|
||||
self.spine_idx, self.index = spine_idx, index
|
||||
self.file_name = name
|
||||
@ -145,8 +151,8 @@ class SearchResult(object):
|
||||
'before': self.before, 'after': self.after, 'mode': self.search_query.mode
|
||||
}
|
||||
|
||||
def is_or_is_after(self, result_from_js):
|
||||
return result_from_js['spine_idx'] == self.spine_idx and self.index >= result_from_js['index'] and result_from_js['text'] == self.text
|
||||
def is_result(self, result_from_js):
|
||||
return result_from_js['spine_idx'] == self.spine_idx and self.index == result_from_js['index'] and result_from_js['text'] == self.text
|
||||
|
||||
def __str__(self):
|
||||
from collections import namedtuple
|
||||
@ -179,10 +185,7 @@ def searchable_text_for_name(name):
|
||||
stack.append(tail)
|
||||
if children:
|
||||
stack.extend(reversed(children))
|
||||
# Normalize whitespace to a single space, this will cause failures
|
||||
# when searching over spaces in pre nodes, but that is a lesser evil
|
||||
# since the DOM converts \n, \t etc to a single space
|
||||
return regex.sub(r'\s+', ' ', ''.join(ans))
|
||||
return ''.join(ans)
|
||||
|
||||
|
||||
def search_in_name(name, search_query, ctx_size=50):
|
||||
@ -383,23 +386,24 @@ class Results(QListWidget): # {{{
|
||||
self.item_activated()
|
||||
|
||||
def search_result_not_found(self, sr):
|
||||
remove = []
|
||||
remove = None
|
||||
for i in range(self.count()):
|
||||
item = self.item(i)
|
||||
r = item.data(Qt.UserRole)
|
||||
if r.is_or_is_after(sr):
|
||||
remove.append(i)
|
||||
if remove:
|
||||
last_i = remove[-1]
|
||||
if last_i < self.count() - 1:
|
||||
self.setCurrentRow(last_i + 1)
|
||||
if r.is_result(sr):
|
||||
remove = i
|
||||
if remove is not None:
|
||||
q = sr['spine_idx']
|
||||
for i in range(remove + 1, self.count()):
|
||||
item = self.item(i)
|
||||
r = item.data(Qt.UserRole)
|
||||
if r.spine_index != q:
|
||||
break
|
||||
r.index -= 1
|
||||
self.takeItem(remove)
|
||||
if remove < self.count():
|
||||
self.setCurrentRow(remove)
|
||||
self.item_activated()
|
||||
elif remove[0] > 0:
|
||||
self.setCurrentRow(remove[0] - 1)
|
||||
self.item_activated()
|
||||
for i in reversed(remove):
|
||||
self.takeItem(i)
|
||||
|
||||
# }}}
|
||||
|
||||
|
||||
@ -469,8 +473,9 @@ class SearchPanel(QWidget): # {{{
|
||||
try:
|
||||
for i, result in enumerate(search_in_name(name, search_query)):
|
||||
before, text, after = result
|
||||
self.results_found.emit(SearchResult(search_query, before, text, after, name, spine_idx, counter[text]))
|
||||
counter[text] += 1
|
||||
q = (before or '')[-5:] + text + (after or '')[:5]
|
||||
self.results_found.emit(SearchResult(search_query, before, text, after, q, name, spine_idx, counter[q]))
|
||||
counter[q] += 1
|
||||
except Exception:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
128
src/pyj/read_book/find.pyj
Normal file
128
src/pyj/read_book/find.pyj
Normal file
@ -0,0 +1,128 @@
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPL v3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
from __python__ import bound_methods, hash_literals
|
||||
|
||||
|
||||
def build_text_map():
|
||||
node_list = v'[]'
|
||||
flat_text = ''
|
||||
ignored_tags = {
|
||||
'style': True, 'script': True, 'noscript': True, 'title': True, 'meta': True, 'head': True, 'link': True, 'html': True,
|
||||
'img': True
|
||||
}
|
||||
|
||||
def process_node(node):
|
||||
nonlocal flat_text
|
||||
if node.nodeType is Node.TEXT_NODE:
|
||||
text = node.nodeValue
|
||||
if text and text.length:
|
||||
node_list.push({'node': node, 'offset': flat_text.length, 'length': text.length})
|
||||
flat_text += text
|
||||
elif node.nodeType is Node.ELEMENT_NODE:
|
||||
if not node.hasChildNodes():
|
||||
return
|
||||
tag = node.tagName.toLowerCase()
|
||||
if ignored_tags[tag]:
|
||||
return
|
||||
style = window.getComputedStyle(node)
|
||||
if style.display is 'none' or style.visibility is 'hidden':
|
||||
return
|
||||
children = node.childNodes
|
||||
for i in range(children.length):
|
||||
process_node(v'children[i]')
|
||||
|
||||
process_node(document.body)
|
||||
return {'timestamp': window.performance.now(), 'flat_text': flat_text, 'node_list': node_list}
|
||||
|
||||
|
||||
def find_node_for_index_binary(node_list, idx_in_flat_text, start):
|
||||
# Do a binary search for idx
|
||||
start = start or 0
|
||||
end = node_list.length - 1
|
||||
while start <= end:
|
||||
mid = Math.floor((start + end)/2)
|
||||
q = node_list[mid]
|
||||
limit = q.offset + q.length
|
||||
if q.offset <= idx_in_flat_text and limit > idx_in_flat_text:
|
||||
start_node = q.node
|
||||
start_offset = idx_in_flat_text - q.offset
|
||||
return start_node, start_offset, mid
|
||||
if limit <= idx_in_flat_text:
|
||||
start = mid + 1
|
||||
else:
|
||||
end = mid - 1
|
||||
return None, None, None
|
||||
|
||||
|
||||
def find_node_for_index_linear(node_list, idx_in_flat_text, start):
|
||||
start = start or 0
|
||||
for i in range(start, node_list.length):
|
||||
q = node_list[i]
|
||||
limit = q.offset + q.length
|
||||
if q.offset <= idx_in_flat_text and limit > idx_in_flat_text:
|
||||
start_node = q.node
|
||||
start_offset = idx_in_flat_text - q.offset
|
||||
return start_node, start_offset, i
|
||||
return None, None, None
|
||||
|
||||
|
||||
def find_specific_occurrence(q, num, before_len, after_len, text_map):
|
||||
if not q or not q.length:
|
||||
return
|
||||
from_idx = 0
|
||||
flat_text = text_map.flat_text
|
||||
pos = 0
|
||||
match_num = -1
|
||||
while True:
|
||||
idx = flat_text.indexOf(q, from_idx)
|
||||
if idx < 0:
|
||||
break
|
||||
match_num += 1
|
||||
from_idx = idx + 1
|
||||
if num < match_num:
|
||||
continue
|
||||
start_node, start_offset, node_pos = find_node_for_index_binary(text_map.node_list, idx + before_len, pos)
|
||||
if start_node is not None:
|
||||
pos = node_pos
|
||||
end_node, end_offset, node_pos = find_node_for_index_linear(text_map.node_list, idx + q.length - after_len, pos)
|
||||
if end_node is not None:
|
||||
return {
|
||||
'start_node': start_node, 'start_offset': start_offset, 'start_pos': pos,
|
||||
'end_node': end_node, 'end_offset': end_offset, 'end_pos': node_pos,
|
||||
'idx_in_flat_text': idx
|
||||
}
|
||||
break
|
||||
|
||||
|
||||
cache = {}
|
||||
|
||||
|
||||
def reset_find_caches():
|
||||
nonlocal cache
|
||||
cache = {}
|
||||
|
||||
|
||||
def select_find_result(match):
|
||||
sel = window.getSelection()
|
||||
sel.setBaseAndExtent(match.start_node, match.start_offset, match.end_node, match.end_offset)
|
||||
|
||||
|
||||
def select_search_result(sr):
|
||||
window.getSelection().removeAllRanges()
|
||||
if not cache.text_map:
|
||||
cache.text_map = build_text_map()
|
||||
q = ''
|
||||
before_len = after_len = 0
|
||||
if sr.before:
|
||||
q = sr.before[-5:]
|
||||
before_len = q.length
|
||||
q += sr.text
|
||||
if sr.after:
|
||||
after = sr.after[:5]
|
||||
after_len = after.length
|
||||
q += after
|
||||
match = find_specific_occurrence(q, int(sr.index), before_len, after_len, cache.text_map)
|
||||
if not match:
|
||||
return False
|
||||
select_find_result(match)
|
||||
return True
|
@ -9,6 +9,7 @@ from fs_images import fix_fullscreen_svg_images
|
||||
from iframe_comm import IframeClient
|
||||
from read_book.cfi import scroll_to as scroll_to_cfi
|
||||
from read_book.extract import get_elements
|
||||
from read_book.find import reset_find_caches, select_search_result
|
||||
from read_book.flow_mode import (
|
||||
anchor_funcs as flow_anchor_funcs, auto_scroll_action as flow_auto_scroll_action,
|
||||
flow_onwheel, flow_to_scroll_fraction, handle_gesture as flow_handle_gesture,
|
||||
@ -49,9 +50,7 @@ from read_book.touch import (
|
||||
create_handlers as create_touch_handlers, reset_handlers as reset_touch_handlers
|
||||
)
|
||||
from read_book.viewport import scroll_viewport
|
||||
from utils import (
|
||||
apply_cloned_selection, clone_selection, debounce, html_escape, is_ios
|
||||
)
|
||||
from utils import debounce, html_escape, is_ios
|
||||
|
||||
FORCE_FLOW_MODE = False
|
||||
CALIBRE_VERSION = '__CALIBRE_VERSION__'
|
||||
@ -339,6 +338,7 @@ class IframeBoss:
|
||||
self.content_loaded_stage2()
|
||||
|
||||
def content_loaded_stage2(self):
|
||||
reset_find_caches()
|
||||
self.connect_links()
|
||||
self.content_ready = True
|
||||
# this is the loading styles used to suppress scrollbars during load
|
||||
@ -580,39 +580,9 @@ class IframeBoss:
|
||||
self.send_message('find_in_spine', text=data.text, backwards=data.backwards, searched_in_spine=data.searched_in_spine)
|
||||
|
||||
def show_search_result(self, data, from_load):
|
||||
sr = data.search_result
|
||||
idx = -1
|
||||
window.getSelection().removeAllRanges()
|
||||
while idx < sr.index:
|
||||
if not window.find(sr.text, True, False, False, False, False):
|
||||
self.send_message('search_result_not_found', search_result=sr)
|
||||
break
|
||||
if sr.mode is not 'normal':
|
||||
# verify we have the correct match since regexes can have
|
||||
# boundary conditions
|
||||
sel = window.getSelection()
|
||||
ranges = clone_selection(sel)
|
||||
r = ranges[0]
|
||||
if sr.before:
|
||||
p = r.cloneRange()
|
||||
p.collapse(True)
|
||||
sel = apply_cloned_selection(v'[p]')
|
||||
sel.modify('extend', 'left', 'character')
|
||||
if sel.toString() is not sr.before[-1]:
|
||||
apply_cloned_selection(ranges)
|
||||
continue
|
||||
if sr.after:
|
||||
p = r.cloneRange()
|
||||
p.collapse(False)
|
||||
sel = apply_cloned_selection(v'[p]')
|
||||
sel.modify('extend', 'right', 'character')
|
||||
if sel.toString() is not sr.after[0]:
|
||||
apply_cloned_selection(ranges)
|
||||
continue
|
||||
apply_cloned_selection(ranges)
|
||||
idx += 1
|
||||
if idx > -1 and current_layout_mode() is not 'flow':
|
||||
snap_to_selection()
|
||||
if select_search_result(data.search_result):
|
||||
if current_layout_mode() is not 'flow':
|
||||
snap_to_selection()
|
||||
|
||||
def reference_item_changed(self, ref_num_or_none):
|
||||
self.send_message('reference_item_changed', refnum=ref_num_or_none, index=current_spine_item().index)
|
||||
|
@ -252,21 +252,6 @@ def sandboxed_html(html, style, sandbox):
|
||||
return ans
|
||||
|
||||
|
||||
def clone_selection(sel):
|
||||
ans = v'[]'
|
||||
for i in range(sel.rangeCount):
|
||||
ans.push(sel.getRangeAt(i).cloneRange())
|
||||
return ans
|
||||
|
||||
|
||||
def apply_cloned_selection(ranges):
|
||||
sel = window.getSelection()
|
||||
sel.removeAllRanges()
|
||||
for r in ranges:
|
||||
sel.addRange(r)
|
||||
return sel
|
||||
|
||||
|
||||
if __name__ is '__main__':
|
||||
from pythonize import strings
|
||||
strings()
|
||||
|
Loading…
x
Reference in New Issue
Block a user