Dont use eval() to generate ICU functions

Instead use the python3 nonlocal feature to generate cell functions
Performance is better and code is simpler.
This commit is contained in:
Kovid Goyal 2021-03-11 09:13:42 +05:30
parent 611613681f
commit da779f6f2f
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 95 additions and 117 deletions

View File

@ -110,109 +110,94 @@ def case_sensitive_collator():
_case_sensitive_collator.upper_first = True _case_sensitive_collator.upper_first = True
return _case_sensitive_collator return _case_sensitive_collator
# Templates that will be used to generate various concrete
# function implementations based on different collators, to allow lazy loading
# of collators, with maximum runtime performance
def make_sort_key_func(collator_function, func_name='sort_key'):
func = None
def sort_key(a):
nonlocal func
if func is None:
func = getattr(collator_function(), func_name)
_sort_key_template = '''
def {name}(obj):
try:
try: try:
return {collator}.{func}(obj) return func(a)
except AttributeError: except TypeError:
pass if isinstance(a, bytes):
return {collator_func}().{func}(obj) try:
except TypeError: a = a.decode(sys.getdefaultencoding())
if isinstance(obj, bytes): except ValueError:
try: return a
obj = obj.decode(sys.getdefaultencoding()) return func(a)
except ValueError: return b''
return obj
return {collator}.{func}(obj) return sort_key
return b''
'''
def make_two_arg_func(collator_function, func_name='strcmp'):
func = None
def two_args(a, b):
nonlocal func
if func is None:
func = getattr(collator_function(), func_name)
_strcmp_template = '''
def {name}(a, b):
try:
try: try:
return {collator}.{func}(a, b) return func(a, b)
except AttributeError: except TypeError:
pass if isinstance(a, bytes):
return {collator_func}().{func}(a, b) try:
except TypeError: a = a.decode(sys.getdefaultencoding())
if isinstance(a, bytes): except Exception:
try: return cmp(a, b)
a = a.decode(sys.getdefaultencoding()) elif a is None:
except ValueError: a = ''
return cmp(a, b) if isinstance(b, bytes):
elif a is None: try:
a = u'' b = b.decode(sys.getdefaultencoding())
if isinstance(b, bytes): except Exception:
try: return cmp(a, b)
b = b.decode(sys.getdefaultencoding()) elif b is None:
except ValueError: b = ''
return cmp(a, b) return func(a, b)
elif b is None:
b = u''
return {collator}.{func}(a, b)
'''
_change_case_template = ''' return two_args
def {name}(x):
try:
def make_change_case_func(which):
def change_case(x):
try: try:
return _icu.change_case(x, _icu.{which}, _locale)
except NotImplementedError:
pass
collator() # sets _locale
return _icu.change_case(x, _icu.{which}, _locale)
except TypeError:
if isinstance(x, bytes):
try: try:
x = x.decode(sys.getdefaultencoding()) return _icu.change_case(x, which, _locale)
except ValueError: except NotImplementedError:
return x pass
return _icu.change_case(x, _icu.{which}, _locale) collator() # sets _locale
raise return _icu.change_case(x, which, _locale)
''' except TypeError:
if isinstance(x, bytes):
try:
def _make_func(template, name, **kwargs): x = x.decode(sys.getdefaultencoding())
l = globals() except ValueError:
kwargs['name'] = name return x
kwargs['func'] = kwargs.get('func', 'sort_key') return _icu.change_case(x, which, _locale)
exec(template.format(**kwargs), l) raise
return l[name] return change_case
# }}} # }}}
# ################ The string functions ######################################## # ################ The string functions ########################################
sort_key = _make_func(_sort_key_template, 'sort_key', collator='_sort_collator', collator_func='sort_collator') sort_key = make_sort_key_func(sort_collator)
numeric_sort_key = make_sort_key_func(numeric_collator)
primary_sort_key = make_sort_key_func(primary_collator)
case_sensitive_sort_key = make_sort_key_func(case_sensitive_collator)
collation_order = make_sort_key_func(sort_collator, 'collation_order')
numeric_sort_key = _make_func(_sort_key_template, 'numeric_sort_key', collator='_numeric_collator', collator_func='numeric_collator') strcmp = make_two_arg_func(sort_collator)
case_sensitive_strcmp = make_two_arg_func(case_sensitive_collator)
primary_sort_key = _make_func(_sort_key_template, 'primary_sort_key', collator='_primary_collator', collator_func='primary_collator') primary_strcmp = make_two_arg_func(primary_collator)
upper = make_change_case_func(_icu.UPPER_CASE)
case_sensitive_sort_key = _make_func(_sort_key_template, 'case_sensitive_sort_key', lower = make_change_case_func(_icu.LOWER_CASE)
collator='_case_sensitive_collator', collator_func='case_sensitive_collator') title_case = make_change_case_func(_icu.TITLE_CASE)
collation_order = _make_func(_sort_key_template, 'collation_order', collator='_sort_collator', collator_func='sort_collator', func='collation_order')
strcmp = _make_func(_strcmp_template, 'strcmp', collator='_sort_collator', collator_func='sort_collator', func='strcmp')
case_sensitive_strcmp = _make_func(
_strcmp_template, 'case_sensitive_strcmp', collator='_case_sensitive_collator', collator_func='case_sensitive_collator', func='strcmp')
primary_strcmp = _make_func(_strcmp_template, 'primary_strcmp', collator='_primary_collator', collator_func='primary_collator', func='strcmp')
upper = _make_func(_change_case_template, 'upper', which='UPPER_CASE')
lower = _make_func(_change_case_template, 'lower', which='LOWER_CASE')
title_case = _make_func(_change_case_template, 'title_case', which='TITLE_CASE')
def capitalize(x): def capitalize(x):
@ -227,20 +212,13 @@ try:
except AttributeError: # For people running from source except AttributeError: # For people running from source
swapcase = lambda x:x.swapcase() swapcase = lambda x:x.swapcase()
find = _make_func(_strcmp_template, 'find', collator='_collator', collator_func='collator', func='find') find = make_two_arg_func(collator, 'find')
primary_find = make_two_arg_func(primary_collator, 'find')
primary_find = _make_func(_strcmp_template, 'primary_find', collator='_primary_collator', collator_func='primary_collator', func='find') contains = make_two_arg_func(collator, 'contains')
primary_contains = make_two_arg_func(primary_collator, 'contains')
contains = _make_func(_strcmp_template, 'contains', collator='_collator', collator_func='collator', func='contains') startswith = make_two_arg_func(collator, 'startswith')
primary_startswith = make_two_arg_func(primary_collator, 'startswith')
primary_contains = _make_func(_strcmp_template, 'primary_contains', collator='_primary_collator', collator_func='primary_collator', func='contains')
startswith = _make_func(_strcmp_template, 'startswith', collator='_collator', collator_func='collator', func='startswith')
primary_startswith = _make_func(_strcmp_template, 'primary_startswith', collator='_primary_collator', collator_func='primary_collator', func='startswith')
safe_chr = _icu.chr safe_chr = _icu.chr
ord_string = _icu.ord_string ord_string = _icu.ord_string

View File

@ -13,13 +13,13 @@ from polyglot.builtins import iteritems, unicode_type, cmp
@contextmanager @contextmanager
def make_collation_func(name, locale, numeric=True, template='_sort_key_template', func='strcmp'): def make_collation_func(name, locale, numeric=True, maker=icu.make_sort_key_func, func='strcmp'):
c = icu._icu.Collator(locale) def coll():
cname = '%s_test_collator%s' % (name, template) ans = icu._icu.Collator(locale)
setattr(icu, cname, c) ans.numeric = numeric
c.numeric = numeric return ans
yield icu._make_func(getattr(icu, template), name, collator=cname, collator_func='not_used_xxx', func=func)
delattr(icu, cname) yield maker(coll, func)
class TestICU(unittest.TestCase): class TestICU(unittest.TestCase):
@ -46,23 +46,23 @@ class TestICU(unittest.TestCase):
self.ae(0, icu.strcmp(s, s.encode(sys.getdefaultencoding()))) self.ae(0, icu.strcmp(s, s.encode(sys.getdefaultencoding())))
# Test locales # Test locales
with make_collation_func('dsk', 'de', func='sort_key') as dsk: with make_collation_func('dsk', 'de', maker=icu.make_sort_key_func, func='sort_key') as dsk:
self.ae(german_good, sorted(german, key=dsk)) self.ae(german_good, sorted(german, key=dsk))
with make_collation_func('dcmp', 'de', template='_strcmp_template') as dcmp: with make_collation_func('dcmp', 'de', maker=icu.make_two_arg_func, func='strcmp') as dcmp:
for x in german: for x in german:
for y in german: for y in german:
self.ae(cmp(dsk(x), dsk(y)), dcmp(x, y)) self.ae(cmp(dsk(x), dsk(y)), dcmp(x, y))
with make_collation_func('fsk', 'fr', func='sort_key') as fsk: with make_collation_func('fsk', 'fr', maker=icu.make_sort_key_func, func='sort_key') as fsk:
self.ae(french_good, sorted(french, key=fsk)) self.ae(french_good, sorted(french, key=fsk))
with make_collation_func('fcmp', 'fr', template='_strcmp_template') as fcmp: with make_collation_func('fcmp', 'fr', maker=icu.make_two_arg_func) as fcmp:
for x in french: for x in french:
for y in french: for y in french:
self.ae(cmp(fsk(x), fsk(y)), fcmp(x, y)) self.ae(cmp(fsk(x), fsk(y)), fcmp(x, y))
with make_collation_func('ssk', 'es', func='sort_key') as ssk: with make_collation_func('ssk', 'es', maker=icu.make_sort_key_func, func='sort_key') as ssk:
self.assertNotEqual(ssk('peña'), ssk('pena')) self.assertNotEqual(ssk('peña'), ssk('pena'))
with make_collation_func('scmp', 'es', template='_strcmp_template') as scmp: with make_collation_func('scmp', 'es', maker=icu.make_two_arg_func) as scmp:
self.assertNotEqual(0, scmp('pena', 'peña')) self.assertNotEqual(0, scmp('pena', 'peña'))
for k, v in iteritems({u'pèché': u'peche', u'flüße':u'Flusse', u'Štepánek':u'ŠtepaneK'}): for k, v in iteritems({u'pèché': u'peche', u'flüße':u'Flusse', u'Štepánek':u'ŠtepaneK'}):