E-book viewer: Allow searching for nearby words using a new "Nearby words" search mode

2025-07-09 03:04:10 -04:00 · 2022-08-08 12:53:14 +05:30 · 2022-08-08 12:53:14 +05:30 · b474ba82b0
commit b474ba82b0
parent 3796f82a00
1 changed files with 56 additions and 6 deletions
--- a/src/calibre/gui2/viewer/search.py
+++ b/src/calibre/gui2/viewer/search.py
@ -82,26 +82,43 @@ def text_to_regex(text):
    return ''.join(ans)
 def words_and_interval_for_near(expr, default_interval=60):
    parts = expr.split()
    words = []
    interval = default_interval
    for q in parts:
        if q is parts[-1] and q.isdigit():
            interval = int(q)
        else:
            words.append(text_to_regex(q))
    return words, interval
 class Search:
    def __init__(self, text, mode, case_sensitive, backwards):
        self.text, self.mode = text, mode
        self.case_sensitive = case_sensitive
        self.backwards = backwards
-        self._regex = None
+        self._regex = self._nsd = None
    def __eq__(self, other):
        if not isinstance(other, Search):
            return False
        return self.text == other.text and self.mode == other.mode and self.case_sensitive == other.case_sensitive
    @property
    def regex_flags(self):
        flags = REGEX_FLAGS
        if not self.case_sensitive:
            flags |= regex.IGNORECASE
        return flags
    @property
    def regex(self):
        if self._regex is None:
            expr = self.text
            flags = REGEX_FLAGS
            if not self.case_sensitive:
                flags = regex.IGNORECASE
            if self.mode != 'regex':
                if self.mode == 'word':
                    words = []
@ -110,9 +127,23 @@ class Search:
                    expr = r'\s+'.join(words)
                else:
                    expr = text_to_regex(expr)
-            self._regex = regex.compile(expr, flags)
+            self._regex = regex.compile(expr, self.regex_flags)
        return self._regex
    @property
    def near_search_data(self):
        if self._nsd is None:
            words, interval = words_and_interval_for_near(self.text)
            interval = max(1, interval)
            flags = self.regex_flags
            flags |= regex.DOTALL
            match_any_word = r'(?:\b(?:' + '|'.join(words) + r')\b)'
            joiner = '.{1,%d}' % interval
            full_pat = regex.compile(joiner.join(match_any_word for x in words), flags=flags)
            word_pats = tuple(regex.compile(rf'\b{x}\b', flags) for x in words)
            self._nsd = word_pats, full_pat
        return self._nsd
    def __str__(self):
        from collections import namedtuple
        s = ('text', 'mode', 'case_sensitive', 'backwards')
@ -313,10 +344,23 @@ def toc_nodes_for_search_result(sr):
 def search_in_name(name, search_query, ctx_size=75):
    raw = searchable_text_for_name(name)[0]
-    if search_query.mode == 'regex' or search_query.case_sensitive:
+    if search_query.mode == 'near':
        word_pats, full_pat = search_query.near_search_data
        def miter():
            for match in full_pat.finditer(raw):
                text = match.group()
                for word_pat in word_pats:
                    if not word_pat.search(text):
                        break
                else:
                    yield match.span()
    elif search_query.mode == 'regex' or search_query.case_sensitive:
        def miter():
            for match in search_query.regex.finditer(raw):
                yield match.span()
    else:
        spans = []
        miter = lambda: spans
@ -376,6 +420,7 @@ class SearchInput(QWidget):  # {{{
        qt.setFocusPolicy(Qt.FocusPolicy.NoFocus)
        qt.addItem(_('Contains'), 'normal')
        qt.addItem(_('Whole words'), 'word')
        qt.addItem(_('Nearby words'), 'near')
        qt.addItem(_('Regex'), 'regex')
        qt.setToolTip('<p>' + _(
            'Choose the type of search: <ul>'
@ -383,6 +428,11 @@ class SearchInput(QWidget):  # {{{
            ' spaces and accents, unless Case sensitive searching is enabled.'
            '<li><b>Whole words</b> will search for whole words that equal the entered text. As with'
            ' "Contains" searches punctuation and accents are ignored.'
            '<li><b>Nearby words</b> will search for whole words that are near each other in the text.'
            ' For example: <i>calibre cool</i> will find places in the text where the words <i>calibre</i> and <i>cool</i>'
            ' occur within 60 characters of each other. To change the number of characters add the number to the end of'
            ' the list of words, for example: <i>calibre cool awesome 120</i> will search for <i>calibre</i>, <i>cool</i>'
            ' and <i>awesome</i> within 120 characters of each other.'
            '<li><b>Regex</b> will interpret the text as a regular expression.'
        ))
        qt.setCurrentIndex(qt.findData(vprefs.get(f'viewer-{self.panel_name}-mode', 'normal') or 'normal'))