From aad20ae44740e780f7afe7cc7be0b8299fedae78 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 23 Apr 2022 12:51:54 +0530 Subject: [PATCH] Searching: Ignore punctuation when searching in the book list. So that, for example: Gravitys will match Gravity's. Fixes #1969926 [[Enhancement] Match both straight and curly apostrophe types when searching](https://bugs.launchpad.net/calibre/+bug/1969926) --- src/calibre/db/search.py | 4 ++-- src/calibre/db/tests/reading.py | 4 +++- src/calibre/gui2/preferences/search.ui | 10 +++++++--- src/calibre/utils/icu.py | 5 ++++- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/calibre/db/search.py b/src/calibre/db/search.py index bf15e060f2..301deab4a1 100644 --- a/src/calibre/db/search.py +++ b/src/calibre/db/search.py @@ -14,7 +14,7 @@ from calibre.constants import preferred_encoding, DEBUG from calibre.db.utils import force_to_bool from calibre.utils.config_base import prefs from calibre.utils.date import parse_date, UNDEFINED_DATE, now, dt_as_local -from calibre.utils.icu import primary_contains, sort_key +from calibre.utils.icu import primary_no_punc_contains, sort_key from calibre.utils.localization import lang_map, canonicalize_lang from calibre.utils.search_query_parser import SearchQueryParser, ParseException from polyglot.builtins import iteritems, string_or_bytes @@ -76,7 +76,7 @@ def _match(query, value, matchkind, use_primary_find_in_search=True, case_sensit return True elif matchkind == CONTAINS_MATCH: if not case_sensitive and use_primary_find_in_search: - if primary_contains(query, t): + if primary_no_punc_contains(query, t): return True elif query in t: return True diff --git a/src/calibre/db/tests/reading.py b/src/calibre/db/tests/reading.py index 2fc89a0144..7ffbdaefed 100644 --- a/src/calibre/db/tests/reading.py +++ b/src/calibre/db/tests/reading.py @@ -324,7 +324,6 @@ class ReadingTest(BaseTest): 'formats:#>1', 'formats:#=1', 'formats:=fmt1', 'formats:=fmt2', 'formats:=fmt1 or formats:fmt2', '#formats:true', '#formats:false', '#formats:fmt1', '#formats:fmt2', '#formats:fmt1 and #formats:fmt2', - )} old.conn.close() old = None @@ -375,6 +374,9 @@ class ReadingTest(BaseTest): self.assertEqual(cache.search('template:{series}#@#:b:true'), {1,2}) self.assertEqual(cache.search('template:{series}#@#:b:false'), {3}) + # test primary search + cache.set_field('title', {1: "Gravity’s Raiñbow"}) + self.assertEqual(cache.search('title:"Gravity\'s Rainbow"'), {1}) # Note that the old db searched uuid for un-prefixed searches, the new # db does not, for performance diff --git a/src/calibre/gui2/preferences/search.ui b/src/calibre/gui2/preferences/search.ui index 8dd6a546cc..c726abd808 100644 --- a/src/calibre/gui2/preferences/search.ui +++ b/src/calibre/gui2/preferences/search.ui @@ -135,8 +135,12 @@ + + Searching will ignore accents on characters as well as punctuation. So for example: +Penas will match Peña’s + - Unaccented characters match &accented characters + Unaccented characters match &accented characters and punctuation is ignored @@ -165,7 +169,7 @@ - + @@ -212,7 +216,7 @@ a search term by changing the value box then pressing Save. - + Enter a comma-separated list of lookup names of the columns diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index d39099c694..1096fde7eb 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -88,7 +88,10 @@ def primary_collator_without_punctuation(): if _primary_no_punc_collator is None: _primary_no_punc_collator = collator().clone() _primary_no_punc_collator.strength = _icu.UCOL_PRIMARY - _primary_no_punc_collator.set_attribute(_icu.UCOL_ALTERNATE_HANDLING, _icu.UCOL_SHIFTED) + try: + _primary_no_punc_collator.set_attribute(_icu.UCOL_ALTERNATE_HANDLING, _icu.UCOL_SHIFTED) + except AttributeError: + pass # people running from source without latest binary return _primary_no_punc_collator