From 06b598eb9df0bcd42d8e86aab69e416b671a6265 Mon Sep 17 00:00:00 2001 From: Charles Haley Date: Sun, 24 Oct 2021 17:07:50 +0100 Subject: [PATCH 1/3] Fix non_numeric_sort_collator. --- src/calibre/utils/icu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index 7d4042610a..bf38ebd1ba 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -98,7 +98,7 @@ def non_numeric_sort_collator(): _non_numeric_sort_collator = collator().clone() _non_numeric_sort_collator.strength = _icu.UCOL_SECONDARY _non_numeric_sort_collator.numeric = False - return _sort_collator + return _non_numeric_sort_collator def numeric_collator(): From 82e4b04e2f84a2e68580418ced43975f6766e796 Mon Sep 17 00:00:00 2001 From: Charles Haley Date: Sun, 24 Oct 2021 17:08:15 +0100 Subject: [PATCH 2/3] Include some tests for non_numeric_sort_collator --- src/calibre/utils/icu_test.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/src/calibre/utils/icu_test.py b/src/calibre/utils/icu_test.py index e628cb5725..d40a720b91 100644 --- a/src/calibre/utils/icu_test.py +++ b/src/calibre/utils/icu_test.py @@ -25,6 +25,7 @@ def make_collation_func(name, locale, numeric=True, maker=icu.make_sort_key_func class TestICU(unittest.TestCase): ae = unittest.TestCase.assertEqual + ane= unittest.TestCase.assertNotEqual def setUp(self): icu.change_locale('en') @@ -115,15 +116,38 @@ class TestICU(unittest.TestCase): def test_collation_order(self): 'Testing collation ordering' + from calibre.utils.icu import collation_order for group in [ - ('Šaa', 'Smith', 'Solženicyn', 'Štepánek'), - ('01', '1'), + (self.ae, ('Šaa', 'Smith', 'Solženicyn', 'Štepánek')), + (self.ae, ('11', '011')), + (self.ane, ('2', '1')), + (self.ae, ('100 Smith', '0100 Smith')), ]: last = None - for x in group: - order, length = icu.numeric_collator().collation_order(x) + assert_func = group[0] + for x in group[1]: + order, _ = icu.numeric_collator().collation_order(x) if last is not None: - self.ae(last, order, 'Order for %s not correct: %s != %s' % (x, last, order)) + assert_func(last, order, 'Order for %s not correct: %s != %s' % (x, last, order)) + last = order + + self.ae(dict(icu.partition_by_first_letter(['A1', '', 'a1', '\U0001f431', '\U0001f431x'])), + {' ':[''], 'A':['A1', 'a1'], '\U0001f431':['\U0001f431', '\U0001f431x']}) + + def test_collation_order_for_partitioning(self): + 'Testing collation ordering for partitioning' + for group in [ + (self.ae, ('Smith', 'Šaa', 'Solženicyn', 'Štepánek')), + (self.ane, ('11', '011')), + (self.ae, ('102 Smith', '100 Smith')), + (self.ane, ('100 Smith', '0100 Smith')), + ]: + last = None + assert_func = group[0] + for x in group[1]: + order, _ = icu.non_numeric_sort_collator().collation_order(x) + if last is not None: + assert_func(last, order, 'Order for %s not correct: %s != %s' % (x, last, order)) last = order self.ae(dict(icu.partition_by_first_letter(['A1', '', 'a1', '\U0001f431', '\U0001f431x'])), From 21a4207ad77033a46f00ccdab15224ed7ea05a1b Mon Sep 17 00:00:00 2001 From: Charles Haley Date: Sun, 24 Oct 2021 17:18:02 +0100 Subject: [PATCH 3/3] Bug #1948560: Odd behaviour with digits in alphabetical partitioning This is fixed for the tag browser in the gui and the content server. I couldn't find anywhere else that used first letter partitioning. When using first letter partitioning get_categories() constructs the sort key as follows: - If the first character of the string is a digit (I hope isdigit() is localized) then use that digit otherwise use a string that is larger than any digit - The ICU collation order key - The sort key. As such, items that start with a digit will first sort by that digit, then if needed use the ICU key as the second level then if needed use the sort key as the third level. Items that do not start with a digit will sort by the ICU key then the sort key. --- src/calibre/db/categories.py | 11 ++++++++++- src/calibre/gui2/tag_browser/model.py | 6 +++--- src/calibre/srv/metadata.py | 4 ++-- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/calibre/db/categories.py b/src/calibre/db/categories.py index 685f303832..b9de1316f9 100644 --- a/src/calibre/db/categories.py +++ b/src/calibre/db/categories.py @@ -115,13 +115,22 @@ def clean_user_categories(dbcache): return new_cats +numeric_collation = tweaks['numeric_collation'] +def first_digit(x): + global numeric_collation + c = icu_upper(x.sort or x.name or ' ')[0] + # The idea is that '9999999999' is larger than any digit so all digits + # will sort in front. Non-digits will sort according to their ICU first letter + return c if numeric_collation and c.isdigit() else '9999999999' + + category_sort_keys = {True:{}, False: {}} category_sort_keys[True]['popularity'] = category_sort_keys[False]['popularity'] = \ lambda x:(-getattr(x, 'count', 0), sort_key(x.sort or x.name)) category_sort_keys[True]['rating'] = category_sort_keys[False]['rating'] = \ lambda x:(-getattr(x, 'avg_rating', 0.0), sort_key(x.sort or x.name)) category_sort_keys[True]['name'] = \ - lambda x:(collation_order(icu_upper(x.sort or x.name or ' ')), sort_key(x.sort or x.name)) + lambda x:(first_digit(x), collation_order(icu_upper(x.sort or x.name or ' ')), sort_key(x.sort or x.name)) category_sort_keys[False]['name'] = \ lambda x:sort_key(x.sort or x.name) diff --git a/src/calibre/gui2/tag_browser/model.py b/src/calibre/gui2/tag_browser/model.py index 210331f707..99ee9af50e 100644 --- a/src/calibre/gui2/tag_browser/model.py +++ b/src/calibre/gui2/tag_browser/model.py @@ -24,8 +24,8 @@ from calibre.library.field_metadata import category_icon_map from calibre.utils.config import prefs, tweaks from calibre.utils.formatter import EvalFormatter from calibre.utils.icu import ( - collation_order, contains, lower, primary_contains, primary_strcmp, sort_key, - strcmp + contains, lower, primary_contains, primary_strcmp, sort_key, + strcmp, collation_order_for_partitioning ) from calibre.utils.serialize import json_dumps, json_loads from polyglot.builtins import iteritems, itervalues @@ -556,7 +556,7 @@ class TagsModel(QAbstractItemModel): # {{{ # Deal with items that don't have sorts, such as formats t = tag.sort if tag.sort else tag.name c = icu_upper(t) if t else ' ' - ordnum, ordlen = collation_order(c) + ordnum, ordlen = collation_order_for_partitioning(c) if last_ordnum != ordnum: if fl_collapse and idx > 0: intervals.append(FL_Interval(last_c, last_c, idx-last_idx)) diff --git a/src/calibre/srv/metadata.py b/src/calibre/srv/metadata.py index 51e72b54c1..1487759110 100644 --- a/src/calibre/srv/metadata.py +++ b/src/calibre/srv/metadata.py @@ -17,7 +17,7 @@ from calibre.utils.date import isoformat, UNDEFINED_DATE, local_tz from calibre.utils.config import tweaks from calibre.utils.formatter import EvalFormatter from calibre.utils.file_type_icons import EXT_MAP -from calibre.utils.icu import collation_order +from calibre.utils.icu import collation_order_for_partitioning from calibre.utils.localization import calibre_langcode_to_name from calibre.library.comments import comments_to_html, markdown from calibre.library.field_metadata import category_icon_map @@ -291,7 +291,7 @@ def build_first_letter_list(category_items): c = ' ' else: c = icu_upper(tag.sort) - ordnum, ordlen = collation_order(c) + ordnum, ordlen = collation_order_for_partitioning(c) if last_ordnum != ordnum: last_c = c[0:ordlen] last_ordnum = ordnum