mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
A utility function to partition a list of items by their first letter, using ICU collation ordering
This commit is contained in:
parent
a761e39317
commit
3c971aa472
@ -7,6 +7,10 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
is_narrow_build = sys.maxunicode < 0x10ffff
|
||||||
|
|
||||||
# Setup code {{{
|
# Setup code {{{
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
@ -250,6 +254,26 @@ def contractions(col=None):
|
|||||||
_cmap[col] = ans
|
_cmap[col] = ans
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
def partition_by_first_letter(items, reverse=False, key=lambda x:x):
|
||||||
|
# Build a list of 'equal' first letters by noticing changes
|
||||||
|
# in ICU's 'ordinal' for the first letter.
|
||||||
|
from collections import OrderedDict
|
||||||
|
items = sorted(items, key=lambda x:sort_key(key(x)), reverse=reverse)
|
||||||
|
ans = OrderedDict()
|
||||||
|
last_c, last_ordnum = ' ', 0
|
||||||
|
for item in items:
|
||||||
|
c = icu_upper(key(item) or ' ')
|
||||||
|
ordnum, ordlen = collation_order(c)
|
||||||
|
if last_ordnum != ordnum:
|
||||||
|
if not is_narrow_build:
|
||||||
|
ordlen = 1
|
||||||
|
last_c = c[0:ordlen]
|
||||||
|
last_ordnum = ordnum
|
||||||
|
try:
|
||||||
|
ans[last_c].append(item)
|
||||||
|
except KeyError:
|
||||||
|
ans[last_c] = [item]
|
||||||
|
return ans
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
|
|
||||||
|
@ -125,6 +125,9 @@ class TestICU(unittest.TestCase):
|
|||||||
self.ae(last, order)
|
self.ae(last, order)
|
||||||
last = order
|
last = order
|
||||||
|
|
||||||
|
self.ae(dict(icu.partition_by_first_letter(['A1', '', 'a1', '\U0001f431', '\U0001f431x'])),
|
||||||
|
{' ':[''], 'A':['A1', 'a1'], '\U0001f431':['\U0001f431', '\U0001f431x']})
|
||||||
|
|
||||||
def test_roundtrip(self):
|
def test_roundtrip(self):
|
||||||
for r in (u'xxx\0\u2219\U0001f431xxx', u'\0', u'', u'simple'):
|
for r in (u'xxx\0\u2219\U0001f431xxx', u'\0', u'', u'simple'):
|
||||||
self.ae(r, icu._icu.roundtrip(r))
|
self.ae(r, icu._icu.roundtrip(r))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user