Use ICU whole word searching for the viewer's whole word search function

This commit is contained in:
Kovid Goyal 2022-04-24 19:57:49 +05:30
parent 0662d5cc77
commit 41b99c241d
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 5 additions and 3 deletions

View File

@ -313,14 +313,14 @@ def toc_nodes_for_search_result(sr):
def search_in_name(name, search_query, ctx_size=75):
raw = searchable_text_for_name(name)[0]
if search_query.mode in ('word', 'regex') or search_query.case_sensitive:
if search_query.mode == 'regex' or search_query.case_sensitive:
def miter():
for match in search_query.regex.finditer(raw):
yield match.span()
else:
spans = []
a = lambda s, l: spans.append((s, s + l))
primary_collator_without_punctuation().find_all(search_query.text, raw, a)
primary_collator_without_punctuation().find_all(search_query.text, raw, a, search_query.mode == 'word')
miter = lambda: spans
for (start, end) in miter():
@ -380,7 +380,8 @@ class SearchInput(QWidget): # {{{
'Choose the type of search: <ul>'
'<li><b>Contains</b> will search for the entered text anywhere. It will ignore punctuation,'
' spaces and accents, unless Case sensitive searching is enabled.'
'<li><b>Whole words</b> will search for whole words that equal the entered text.'
'<li><b>Whole words</b> will search for whole words that equal the entered text. As with'
' "Contains" searches punctuation and accents are ignored.'
'<li><b>Regex</b> will interpret the text as a regular expression.'
))
qt.setCurrentIndex(qt.findData(vprefs.get(f'viewer-{self.panel_name}-mode', 'normal') or 'normal'))

View File

@ -127,6 +127,7 @@ class TestICU(unittest.TestCase):
c = icu.primary_collator_without_punctuation()
self.ae(c.find('a', 'abc a bc'), (0, 1))
self.ae(c.find('a', 'abc a bc', True), (4, 1))
self.ae(c.find('pena', 'a peñaabc peña', True), (10, 4))
def test_collation_order(self):
'Testing collation ordering'