mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
ICU collator for searching ignoring punctuation
This commit is contained in:
parent
4412d3f615
commit
303006cb11
@ -12,7 +12,8 @@ from calibre.utils.config_base import tweaks, prefs
|
|||||||
from calibre_extensions import icu as _icu
|
from calibre_extensions import icu as _icu
|
||||||
from polyglot.builtins import cmp
|
from polyglot.builtins import cmp
|
||||||
|
|
||||||
_locale = _collator = _primary_collator = _sort_collator = _non_numeric_sort_collator = _numeric_collator = _case_sensitive_collator = None
|
_locale = _collator = _primary_collator = _sort_collator = _non_numeric_sort_collator = _numeric_collator = None
|
||||||
|
_case_sensitive_collator = _primary_no_punc_collator = None
|
||||||
cmp
|
cmp
|
||||||
|
|
||||||
_none = ''
|
_none = ''
|
||||||
@ -66,7 +67,9 @@ def collator():
|
|||||||
|
|
||||||
def change_locale(locale=None):
|
def change_locale(locale=None):
|
||||||
global _locale, _collator, _primary_collator, _sort_collator, _numeric_collator, _case_sensitive_collator, _non_numeric_sort_collator
|
global _locale, _collator, _primary_collator, _sort_collator, _numeric_collator, _case_sensitive_collator, _non_numeric_sort_collator
|
||||||
|
global _primary_no_punc_collator
|
||||||
_collator = _primary_collator = _sort_collator = _numeric_collator = _case_sensitive_collator = _non_numeric_sort_collator = None
|
_collator = _primary_collator = _sort_collator = _numeric_collator = _case_sensitive_collator = _non_numeric_sort_collator = None
|
||||||
|
_primary_no_punc_collator = None
|
||||||
_locale = locale
|
_locale = locale
|
||||||
|
|
||||||
|
|
||||||
@ -79,6 +82,16 @@ def primary_collator():
|
|||||||
return _primary_collator
|
return _primary_collator
|
||||||
|
|
||||||
|
|
||||||
|
def primary_collator_without_punctuation():
|
||||||
|
'Ignores case differences, accented characters and punctuation'
|
||||||
|
global _primary_no_punc_collator
|
||||||
|
if _primary_no_punc_collator is None:
|
||||||
|
_primary_no_punc_collator = collator().clone()
|
||||||
|
_primary_no_punc_collator.strength = _icu.UCOL_PRIMARY
|
||||||
|
_primary_no_punc_collator.set_attribute(_icu.UCOL_ALTERNATE_HANDLING, _icu.UCOL_SHIFTED)
|
||||||
|
return _primary_no_punc_collator
|
||||||
|
|
||||||
|
|
||||||
def sort_collator():
|
def sort_collator():
|
||||||
'Ignores case differences and recognizes numbers in strings (if the tweak is set)'
|
'Ignores case differences and recognizes numbers in strings (if the tweak is set)'
|
||||||
global _sort_collator
|
global _sort_collator
|
||||||
@ -223,8 +236,10 @@ except AttributeError: # For people running from source
|
|||||||
|
|
||||||
find = make_two_arg_func(collator, 'find')
|
find = make_two_arg_func(collator, 'find')
|
||||||
primary_find = make_two_arg_func(primary_collator, 'find')
|
primary_find = make_two_arg_func(primary_collator, 'find')
|
||||||
|
primary_no_punc_find = make_two_arg_func(primary_collator_without_punctuation, 'find')
|
||||||
contains = make_two_arg_func(collator, 'contains')
|
contains = make_two_arg_func(collator, 'contains')
|
||||||
primary_contains = make_two_arg_func(primary_collator, 'contains')
|
primary_contains = make_two_arg_func(primary_collator, 'contains')
|
||||||
|
primary_no_punc_contains = make_two_arg_func(primary_collator_without_punctuation, 'contains')
|
||||||
startswith = make_two_arg_func(collator, 'startswith')
|
startswith = make_two_arg_func(collator, 'startswith')
|
||||||
primary_startswith = make_two_arg_func(primary_collator, 'startswith')
|
primary_startswith = make_two_arg_func(primary_collator, 'startswith')
|
||||||
safe_chr = _icu.chr
|
safe_chr = _icu.chr
|
||||||
|
@ -114,6 +114,8 @@ class TestICU(unittest.TestCase):
|
|||||||
self.assertTrue(icu.primary_contains('pena', 'peña'))
|
self.assertTrue(icu.primary_contains('pena', 'peña'))
|
||||||
x = icu.primary_collator()
|
x = icu.primary_collator()
|
||||||
self.ae(x.get_attribute(icu._icu.UCOL_STRENGTH), icu._icu.UCOL_PRIMARY),
|
self.ae(x.get_attribute(icu._icu.UCOL_STRENGTH), icu._icu.UCOL_PRIMARY),
|
||||||
|
self.ae((0, 4), icu.primary_no_punc_find('pena"', 'peña'))
|
||||||
|
self.ae((0, 13), icu.primary_no_punc_find("typographers", 'typographer’s'))
|
||||||
|
|
||||||
def test_collation_order(self):
|
def test_collation_order(self):
|
||||||
'Testing collation ordering'
|
'Testing collation ordering'
|
||||||
|
Loading…
x
Reference in New Issue
Block a user