Handle search expressions with boundary conditions

2025-07-09 03:04:10 -04:00 · 2020-01-22 15:57:23 +05:30 · 2020-01-22 15:57:23 +05:30 · 7cdd0c10ef
commit 7cdd0c10ef
parent b0701cc4a6
3 changed files with 49 additions and 3 deletions
--- a/src/calibre/gui2/viewer/search.py
+++ b/src/calibre/gui2/viewer/search.py
@ -117,7 +117,10 @@ class SearchResult(object):
    @property
    def for_js(self):
-        return {'file_name': self.file_name, 'spine_idx': self.spine_idx, 'index': self.index, 'text': self.text}
+        return {
            'file_name': self.file_name, 'spine_idx': self.spine_idx, 'index': self.index, 'text': self.text,
            'before': self.before, 'after': self.after, 'mode': self.search_query.mode
        }
    def is_or_is_after(self, result_from_js):
        return result_from_js['spine_idx'] == self.spine_idx and self.index >= result_from_js['index'] and result_from_js['text'] == self.text
@ -148,7 +151,10 @@ def searchable_text_for_name(name):
            stack.append(tail)
        if children:
            stack.extend(reversed(children))
-    return ''.join(ans)
+    # Normalize whitespace to a single space, this will cause failures
    # when searching over spaces in pre nodes, but that is a lesser evil
    # since the DOM converts \n, \t etc to a single space
    return regex.sub(r'\s+', ' ', ''.join(ans))
 def search_in_name(name, search_query, ctx_size=50):
--- a/src/pyj/read_book/iframe.pyj
+++ b/src/pyj/read_book/iframe.pyj
@ -46,7 +46,9 @@ from read_book.shortcuts import (
 from read_book.toc import update_visible_toc_anchors
 from read_book.touch import create_handlers as create_touch_handlers
 from read_book.viewport import scroll_viewport
-from utils import debounce, html_escape, is_ios
+from utils import (
    apply_cloned_selection, clone_selection, debounce, html_escape, is_ios
 )
 FORCE_FLOW_MODE = False
 CALIBRE_VERSION = '__CALIBRE_VERSION__'
@ -537,6 +539,29 @@ class IframeBoss:
            if not window.find(sr.text, True, False, False, False, False):
                self.send_message('search_result_not_found', search_result=sr)
                break
            if sr.mode is not 'normal':
                # verify we have the correct match since regexes can have
                # boundary conditions
                sel = window.getSelection()
                ranges = clone_selection(sel)
                r = ranges[0]
                if sr.before:
                    p = r.cloneRange()
                    p.collapse(True)
                    sel = apply_cloned_selection(v'[p]')
                    sel.modify('extend', 'left', 'character')
                    if sel.toString() is not sr.before[-1]:
                        apply_cloned_selection(ranges)
                        continue
                if sr.after:
                    p = r.cloneRange()
                    p.collapse(False)
                    sel = apply_cloned_selection(v'[p]')
                    sel.modify('extend', 'right', 'character')
                    if sel.toString() is not sr.after[0]:
                        apply_cloned_selection(ranges)
                        continue
                apply_cloned_selection(ranges)
            idx += 1
        if idx > -1 and current_layout_mode() is not 'flow':
            snap_to_selection()
--- a/src/pyj/utils.pyj
+++ b/src/pyj/utils.pyj
@ -252,6 +252,21 @@ def sandboxed_html(html, style, sandbox):
    return ans
 def clone_selection(sel):
    ans = v'[]'
    for i in range(sel.rangeCount):
        ans.push(sel.getRangeAt(i).cloneRange())
    return ans
 def apply_cloned_selection(ranges):
    sel = window.getSelection()
    sel.removeAllRanges()
    for r in ranges:
        sel.addRange(r)
    return sel
 if __name__ is '__main__':
    from pythonize import strings
    strings()