A utility function to partition a list of items by their first letter, using ICU collation ordering

This commit is contained in:
Kovid Goyal 2014-03-31 18:06:21 +05:30
parent a761e39317
commit 3c971aa472
2 changed files with 27 additions and 0 deletions

View File

@ -7,6 +7,10 @@ __license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import sys
is_narrow_build = sys.maxunicode < 0x10ffff
# Setup code {{{ # Setup code {{{
import sys import sys
@ -250,6 +254,26 @@ def contractions(col=None):
_cmap[col] = ans _cmap[col] = ans
return ans return ans
def partition_by_first_letter(items, reverse=False, key=lambda x:x):
# Build a list of 'equal' first letters by noticing changes
# in ICU's 'ordinal' for the first letter.
from collections import OrderedDict
items = sorted(items, key=lambda x:sort_key(key(x)), reverse=reverse)
ans = OrderedDict()
last_c, last_ordnum = ' ', 0
for item in items:
c = icu_upper(key(item) or ' ')
ordnum, ordlen = collation_order(c)
if last_ordnum != ordnum:
if not is_narrow_build:
ordlen = 1
last_c = c[0:ordlen]
last_ordnum = ordnum
try:
ans[last_c].append(item)
except KeyError:
ans[last_c] = [item]
return ans
################################################################################ ################################################################################

View File

@ -125,6 +125,9 @@ class TestICU(unittest.TestCase):
self.ae(last, order) self.ae(last, order)
last = order last = order
self.ae(dict(icu.partition_by_first_letter(['A1', '', 'a1', '\U0001f431', '\U0001f431x'])),
{' ':[''], 'A':['A1', 'a1'], '\U0001f431':['\U0001f431', '\U0001f431x']})
def test_roundtrip(self): def test_roundtrip(self):
for r in (u'xxx\0\u2219\U0001f431xxx', u'\0', u'', u'simple'): for r in (u'xxx\0\u2219\U0001f431xxx', u'\0', u'', u'simple'):
self.ae(r, icu._icu.roundtrip(r)) self.ae(r, icu._icu.roundtrip(r))