mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
E-book viewer: Allow searching for nearby words using a new "Nearby words" search mode
This commit is contained in:
parent
3796f82a00
commit
b474ba82b0
@ -82,26 +82,43 @@ def text_to_regex(text):
|
|||||||
return ''.join(ans)
|
return ''.join(ans)
|
||||||
|
|
||||||
|
|
||||||
|
def words_and_interval_for_near(expr, default_interval=60):
|
||||||
|
parts = expr.split()
|
||||||
|
words = []
|
||||||
|
interval = default_interval
|
||||||
|
|
||||||
|
for q in parts:
|
||||||
|
if q is parts[-1] and q.isdigit():
|
||||||
|
interval = int(q)
|
||||||
|
else:
|
||||||
|
words.append(text_to_regex(q))
|
||||||
|
return words, interval
|
||||||
|
|
||||||
|
|
||||||
class Search:
|
class Search:
|
||||||
|
|
||||||
def __init__(self, text, mode, case_sensitive, backwards):
|
def __init__(self, text, mode, case_sensitive, backwards):
|
||||||
self.text, self.mode = text, mode
|
self.text, self.mode = text, mode
|
||||||
self.case_sensitive = case_sensitive
|
self.case_sensitive = case_sensitive
|
||||||
self.backwards = backwards
|
self.backwards = backwards
|
||||||
self._regex = None
|
self._regex = self._nsd = None
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
if not isinstance(other, Search):
|
if not isinstance(other, Search):
|
||||||
return False
|
return False
|
||||||
return self.text == other.text and self.mode == other.mode and self.case_sensitive == other.case_sensitive
|
return self.text == other.text and self.mode == other.mode and self.case_sensitive == other.case_sensitive
|
||||||
|
|
||||||
|
@property
|
||||||
|
def regex_flags(self):
|
||||||
|
flags = REGEX_FLAGS
|
||||||
|
if not self.case_sensitive:
|
||||||
|
flags |= regex.IGNORECASE
|
||||||
|
return flags
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def regex(self):
|
def regex(self):
|
||||||
if self._regex is None:
|
if self._regex is None:
|
||||||
expr = self.text
|
expr = self.text
|
||||||
flags = REGEX_FLAGS
|
|
||||||
if not self.case_sensitive:
|
|
||||||
flags = regex.IGNORECASE
|
|
||||||
if self.mode != 'regex':
|
if self.mode != 'regex':
|
||||||
if self.mode == 'word':
|
if self.mode == 'word':
|
||||||
words = []
|
words = []
|
||||||
@ -110,9 +127,23 @@ class Search:
|
|||||||
expr = r'\s+'.join(words)
|
expr = r'\s+'.join(words)
|
||||||
else:
|
else:
|
||||||
expr = text_to_regex(expr)
|
expr = text_to_regex(expr)
|
||||||
self._regex = regex.compile(expr, flags)
|
self._regex = regex.compile(expr, self.regex_flags)
|
||||||
return self._regex
|
return self._regex
|
||||||
|
|
||||||
|
@property
|
||||||
|
def near_search_data(self):
|
||||||
|
if self._nsd is None:
|
||||||
|
words, interval = words_and_interval_for_near(self.text)
|
||||||
|
interval = max(1, interval)
|
||||||
|
flags = self.regex_flags
|
||||||
|
flags |= regex.DOTALL
|
||||||
|
match_any_word = r'(?:\b(?:' + '|'.join(words) + r')\b)'
|
||||||
|
joiner = '.{1,%d}' % interval
|
||||||
|
full_pat = regex.compile(joiner.join(match_any_word for x in words), flags=flags)
|
||||||
|
word_pats = tuple(regex.compile(rf'\b{x}\b', flags) for x in words)
|
||||||
|
self._nsd = word_pats, full_pat
|
||||||
|
return self._nsd
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
s = ('text', 'mode', 'case_sensitive', 'backwards')
|
s = ('text', 'mode', 'case_sensitive', 'backwards')
|
||||||
@ -313,10 +344,23 @@ def toc_nodes_for_search_result(sr):
|
|||||||
def search_in_name(name, search_query, ctx_size=75):
|
def search_in_name(name, search_query, ctx_size=75):
|
||||||
raw = searchable_text_for_name(name)[0]
|
raw = searchable_text_for_name(name)[0]
|
||||||
|
|
||||||
if search_query.mode == 'regex' or search_query.case_sensitive:
|
if search_query.mode == 'near':
|
||||||
|
word_pats, full_pat = search_query.near_search_data
|
||||||
|
|
||||||
|
def miter():
|
||||||
|
for match in full_pat.finditer(raw):
|
||||||
|
text = match.group()
|
||||||
|
for word_pat in word_pats:
|
||||||
|
if not word_pat.search(text):
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
yield match.span()
|
||||||
|
|
||||||
|
elif search_query.mode == 'regex' or search_query.case_sensitive:
|
||||||
def miter():
|
def miter():
|
||||||
for match in search_query.regex.finditer(raw):
|
for match in search_query.regex.finditer(raw):
|
||||||
yield match.span()
|
yield match.span()
|
||||||
|
|
||||||
else:
|
else:
|
||||||
spans = []
|
spans = []
|
||||||
miter = lambda: spans
|
miter = lambda: spans
|
||||||
@ -376,6 +420,7 @@ class SearchInput(QWidget): # {{{
|
|||||||
qt.setFocusPolicy(Qt.FocusPolicy.NoFocus)
|
qt.setFocusPolicy(Qt.FocusPolicy.NoFocus)
|
||||||
qt.addItem(_('Contains'), 'normal')
|
qt.addItem(_('Contains'), 'normal')
|
||||||
qt.addItem(_('Whole words'), 'word')
|
qt.addItem(_('Whole words'), 'word')
|
||||||
|
qt.addItem(_('Nearby words'), 'near')
|
||||||
qt.addItem(_('Regex'), 'regex')
|
qt.addItem(_('Regex'), 'regex')
|
||||||
qt.setToolTip('<p>' + _(
|
qt.setToolTip('<p>' + _(
|
||||||
'Choose the type of search: <ul>'
|
'Choose the type of search: <ul>'
|
||||||
@ -383,6 +428,11 @@ class SearchInput(QWidget): # {{{
|
|||||||
' spaces and accents, unless Case sensitive searching is enabled.'
|
' spaces and accents, unless Case sensitive searching is enabled.'
|
||||||
'<li><b>Whole words</b> will search for whole words that equal the entered text. As with'
|
'<li><b>Whole words</b> will search for whole words that equal the entered text. As with'
|
||||||
' "Contains" searches punctuation and accents are ignored.'
|
' "Contains" searches punctuation and accents are ignored.'
|
||||||
|
'<li><b>Nearby words</b> will search for whole words that are near each other in the text.'
|
||||||
|
' For example: <i>calibre cool</i> will find places in the text where the words <i>calibre</i> and <i>cool</i>'
|
||||||
|
' occur within 60 characters of each other. To change the number of characters add the number to the end of'
|
||||||
|
' the list of words, for example: <i>calibre cool awesome 120</i> will search for <i>calibre</i>, <i>cool</i>'
|
||||||
|
' and <i>awesome</i> within 120 characters of each other.'
|
||||||
'<li><b>Regex</b> will interpret the text as a regular expression.'
|
'<li><b>Regex</b> will interpret the text as a regular expression.'
|
||||||
))
|
))
|
||||||
qt.setCurrentIndex(qt.findData(vprefs.get(f'viewer-{self.panel_name}-mode', 'normal') or 'normal'))
|
qt.setCurrentIndex(qt.findData(vprefs.get(f'viewer-{self.panel_name}-mode', 'normal') or 'normal'))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user