diff --git a/src/calibre/db/categories.py b/src/calibre/db/categories.py index 685f303832..b9de1316f9 100644 --- a/src/calibre/db/categories.py +++ b/src/calibre/db/categories.py @@ -115,13 +115,22 @@ def clean_user_categories(dbcache): return new_cats +numeric_collation = tweaks['numeric_collation'] +def first_digit(x): + global numeric_collation + c = icu_upper(x.sort or x.name or ' ')[0] + # The idea is that '9999999999' is larger than any digit so all digits + # will sort in front. Non-digits will sort according to their ICU first letter + return c if numeric_collation and c.isdigit() else '9999999999' + + category_sort_keys = {True:{}, False: {}} category_sort_keys[True]['popularity'] = category_sort_keys[False]['popularity'] = \ lambda x:(-getattr(x, 'count', 0), sort_key(x.sort or x.name)) category_sort_keys[True]['rating'] = category_sort_keys[False]['rating'] = \ lambda x:(-getattr(x, 'avg_rating', 0.0), sort_key(x.sort or x.name)) category_sort_keys[True]['name'] = \ - lambda x:(collation_order(icu_upper(x.sort or x.name or ' ')), sort_key(x.sort or x.name)) + lambda x:(first_digit(x), collation_order(icu_upper(x.sort or x.name or ' ')), sort_key(x.sort or x.name)) category_sort_keys[False]['name'] = \ lambda x:sort_key(x.sort or x.name) diff --git a/src/calibre/gui2/tag_browser/model.py b/src/calibre/gui2/tag_browser/model.py index 210331f707..99ee9af50e 100644 --- a/src/calibre/gui2/tag_browser/model.py +++ b/src/calibre/gui2/tag_browser/model.py @@ -24,8 +24,8 @@ from calibre.library.field_metadata import category_icon_map from calibre.utils.config import prefs, tweaks from calibre.utils.formatter import EvalFormatter from calibre.utils.icu import ( - collation_order, contains, lower, primary_contains, primary_strcmp, sort_key, - strcmp + contains, lower, primary_contains, primary_strcmp, sort_key, + strcmp, collation_order_for_partitioning ) from calibre.utils.serialize import json_dumps, json_loads from polyglot.builtins import iteritems, itervalues @@ -556,7 +556,7 @@ class TagsModel(QAbstractItemModel): # {{{ # Deal with items that don't have sorts, such as formats t = tag.sort if tag.sort else tag.name c = icu_upper(t) if t else ' ' - ordnum, ordlen = collation_order(c) + ordnum, ordlen = collation_order_for_partitioning(c) if last_ordnum != ordnum: if fl_collapse and idx > 0: intervals.append(FL_Interval(last_c, last_c, idx-last_idx)) diff --git a/src/calibre/srv/metadata.py b/src/calibre/srv/metadata.py index 51e72b54c1..1487759110 100644 --- a/src/calibre/srv/metadata.py +++ b/src/calibre/srv/metadata.py @@ -17,7 +17,7 @@ from calibre.utils.date import isoformat, UNDEFINED_DATE, local_tz from calibre.utils.config import tweaks from calibre.utils.formatter import EvalFormatter from calibre.utils.file_type_icons import EXT_MAP -from calibre.utils.icu import collation_order +from calibre.utils.icu import collation_order_for_partitioning from calibre.utils.localization import calibre_langcode_to_name from calibre.library.comments import comments_to_html, markdown from calibre.library.field_metadata import category_icon_map @@ -291,7 +291,7 @@ def build_first_letter_list(category_items): c = ' ' else: c = icu_upper(tag.sort) - ordnum, ordlen = collation_order(c) + ordnum, ordlen = collation_order_for_partitioning(c) if last_ordnum != ordnum: last_c = c[0:ordlen] last_ordnum = ordnum diff --git a/src/calibre/utils/icu.py b/src/calibre/utils/icu.py index 7d4042610a..bf38ebd1ba 100644 --- a/src/calibre/utils/icu.py +++ b/src/calibre/utils/icu.py @@ -98,7 +98,7 @@ def non_numeric_sort_collator(): _non_numeric_sort_collator = collator().clone() _non_numeric_sort_collator.strength = _icu.UCOL_SECONDARY _non_numeric_sort_collator.numeric = False - return _sort_collator + return _non_numeric_sort_collator def numeric_collator(): diff --git a/src/calibre/utils/icu_test.py b/src/calibre/utils/icu_test.py index e628cb5725..d40a720b91 100644 --- a/src/calibre/utils/icu_test.py +++ b/src/calibre/utils/icu_test.py @@ -25,6 +25,7 @@ def make_collation_func(name, locale, numeric=True, maker=icu.make_sort_key_func class TestICU(unittest.TestCase): ae = unittest.TestCase.assertEqual + ane= unittest.TestCase.assertNotEqual def setUp(self): icu.change_locale('en') @@ -115,15 +116,38 @@ class TestICU(unittest.TestCase): def test_collation_order(self): 'Testing collation ordering' + from calibre.utils.icu import collation_order for group in [ - ('Šaa', 'Smith', 'Solženicyn', 'Štepánek'), - ('01', '1'), + (self.ae, ('Šaa', 'Smith', 'Solženicyn', 'Štepánek')), + (self.ae, ('11', '011')), + (self.ane, ('2', '1')), + (self.ae, ('100 Smith', '0100 Smith')), ]: last = None - for x in group: - order, length = icu.numeric_collator().collation_order(x) + assert_func = group[0] + for x in group[1]: + order, _ = icu.numeric_collator().collation_order(x) if last is not None: - self.ae(last, order, 'Order for %s not correct: %s != %s' % (x, last, order)) + assert_func(last, order, 'Order for %s not correct: %s != %s' % (x, last, order)) + last = order + + self.ae(dict(icu.partition_by_first_letter(['A1', '', 'a1', '\U0001f431', '\U0001f431x'])), + {' ':[''], 'A':['A1', 'a1'], '\U0001f431':['\U0001f431', '\U0001f431x']}) + + def test_collation_order_for_partitioning(self): + 'Testing collation ordering for partitioning' + for group in [ + (self.ae, ('Smith', 'Šaa', 'Solženicyn', 'Štepánek')), + (self.ane, ('11', '011')), + (self.ae, ('102 Smith', '100 Smith')), + (self.ane, ('100 Smith', '0100 Smith')), + ]: + last = None + assert_func = group[0] + for x in group[1]: + order, _ = icu.non_numeric_sort_collator().collation_order(x) + if last is not None: + assert_func(last, order, 'Order for %s not correct: %s != %s' % (x, last, order)) last = order self.ae(dict(icu.partition_by_first_letter(['A1', '', 'a1', '\U0001f431', '\U0001f431x'])),