mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Dont use eval() to generate ICU functions
Instead use the python3 nonlocal feature to generate cell functions Performance is better and code is simpler.
This commit is contained in:
parent
611613681f
commit
da779f6f2f
@ -110,109 +110,94 @@ def case_sensitive_collator():
|
||||
_case_sensitive_collator.upper_first = True
|
||||
return _case_sensitive_collator
|
||||
|
||||
# Templates that will be used to generate various concrete
|
||||
# function implementations based on different collators, to allow lazy loading
|
||||
# of collators, with maximum runtime performance
|
||||
|
||||
def make_sort_key_func(collator_function, func_name='sort_key'):
|
||||
func = None
|
||||
|
||||
def sort_key(a):
|
||||
nonlocal func
|
||||
if func is None:
|
||||
func = getattr(collator_function(), func_name)
|
||||
|
||||
_sort_key_template = '''
|
||||
def {name}(obj):
|
||||
try:
|
||||
try:
|
||||
return {collator}.{func}(obj)
|
||||
except AttributeError:
|
||||
pass
|
||||
return {collator_func}().{func}(obj)
|
||||
except TypeError:
|
||||
if isinstance(obj, bytes):
|
||||
try:
|
||||
obj = obj.decode(sys.getdefaultencoding())
|
||||
except ValueError:
|
||||
return obj
|
||||
return {collator}.{func}(obj)
|
||||
return b''
|
||||
'''
|
||||
return func(a)
|
||||
except TypeError:
|
||||
if isinstance(a, bytes):
|
||||
try:
|
||||
a = a.decode(sys.getdefaultencoding())
|
||||
except ValueError:
|
||||
return a
|
||||
return func(a)
|
||||
return b''
|
||||
|
||||
return sort_key
|
||||
|
||||
|
||||
def make_two_arg_func(collator_function, func_name='strcmp'):
|
||||
func = None
|
||||
|
||||
def two_args(a, b):
|
||||
nonlocal func
|
||||
if func is None:
|
||||
func = getattr(collator_function(), func_name)
|
||||
|
||||
_strcmp_template = '''
|
||||
def {name}(a, b):
|
||||
try:
|
||||
try:
|
||||
return {collator}.{func}(a, b)
|
||||
except AttributeError:
|
||||
pass
|
||||
return {collator_func}().{func}(a, b)
|
||||
except TypeError:
|
||||
if isinstance(a, bytes):
|
||||
try:
|
||||
a = a.decode(sys.getdefaultencoding())
|
||||
except ValueError:
|
||||
return cmp(a, b)
|
||||
elif a is None:
|
||||
a = u''
|
||||
if isinstance(b, bytes):
|
||||
try:
|
||||
b = b.decode(sys.getdefaultencoding())
|
||||
except ValueError:
|
||||
return cmp(a, b)
|
||||
elif b is None:
|
||||
b = u''
|
||||
return {collator}.{func}(a, b)
|
||||
'''
|
||||
return func(a, b)
|
||||
except TypeError:
|
||||
if isinstance(a, bytes):
|
||||
try:
|
||||
a = a.decode(sys.getdefaultencoding())
|
||||
except Exception:
|
||||
return cmp(a, b)
|
||||
elif a is None:
|
||||
a = ''
|
||||
if isinstance(b, bytes):
|
||||
try:
|
||||
b = b.decode(sys.getdefaultencoding())
|
||||
except Exception:
|
||||
return cmp(a, b)
|
||||
elif b is None:
|
||||
b = ''
|
||||
return func(a, b)
|
||||
|
||||
_change_case_template = '''
|
||||
def {name}(x):
|
||||
try:
|
||||
return two_args
|
||||
|
||||
|
||||
def make_change_case_func(which):
|
||||
|
||||
def change_case(x):
|
||||
try:
|
||||
return _icu.change_case(x, _icu.{which}, _locale)
|
||||
except NotImplementedError:
|
||||
pass
|
||||
collator() # sets _locale
|
||||
return _icu.change_case(x, _icu.{which}, _locale)
|
||||
except TypeError:
|
||||
if isinstance(x, bytes):
|
||||
try:
|
||||
x = x.decode(sys.getdefaultencoding())
|
||||
except ValueError:
|
||||
return x
|
||||
return _icu.change_case(x, _icu.{which}, _locale)
|
||||
raise
|
||||
'''
|
||||
|
||||
|
||||
def _make_func(template, name, **kwargs):
|
||||
l = globals()
|
||||
kwargs['name'] = name
|
||||
kwargs['func'] = kwargs.get('func', 'sort_key')
|
||||
exec(template.format(**kwargs), l)
|
||||
return l[name]
|
||||
|
||||
|
||||
return _icu.change_case(x, which, _locale)
|
||||
except NotImplementedError:
|
||||
pass
|
||||
collator() # sets _locale
|
||||
return _icu.change_case(x, which, _locale)
|
||||
except TypeError:
|
||||
if isinstance(x, bytes):
|
||||
try:
|
||||
x = x.decode(sys.getdefaultencoding())
|
||||
except ValueError:
|
||||
return x
|
||||
return _icu.change_case(x, which, _locale)
|
||||
raise
|
||||
return change_case
|
||||
# }}}
|
||||
|
||||
|
||||
# ################ The string functions ########################################
|
||||
sort_key = _make_func(_sort_key_template, 'sort_key', collator='_sort_collator', collator_func='sort_collator')
|
||||
sort_key = make_sort_key_func(sort_collator)
|
||||
numeric_sort_key = make_sort_key_func(numeric_collator)
|
||||
primary_sort_key = make_sort_key_func(primary_collator)
|
||||
case_sensitive_sort_key = make_sort_key_func(case_sensitive_collator)
|
||||
collation_order = make_sort_key_func(sort_collator, 'collation_order')
|
||||
|
||||
numeric_sort_key = _make_func(_sort_key_template, 'numeric_sort_key', collator='_numeric_collator', collator_func='numeric_collator')
|
||||
|
||||
primary_sort_key = _make_func(_sort_key_template, 'primary_sort_key', collator='_primary_collator', collator_func='primary_collator')
|
||||
|
||||
case_sensitive_sort_key = _make_func(_sort_key_template, 'case_sensitive_sort_key',
|
||||
collator='_case_sensitive_collator', collator_func='case_sensitive_collator')
|
||||
|
||||
collation_order = _make_func(_sort_key_template, 'collation_order', collator='_sort_collator', collator_func='sort_collator', func='collation_order')
|
||||
|
||||
strcmp = _make_func(_strcmp_template, 'strcmp', collator='_sort_collator', collator_func='sort_collator', func='strcmp')
|
||||
|
||||
case_sensitive_strcmp = _make_func(
|
||||
_strcmp_template, 'case_sensitive_strcmp', collator='_case_sensitive_collator', collator_func='case_sensitive_collator', func='strcmp')
|
||||
|
||||
primary_strcmp = _make_func(_strcmp_template, 'primary_strcmp', collator='_primary_collator', collator_func='primary_collator', func='strcmp')
|
||||
|
||||
upper = _make_func(_change_case_template, 'upper', which='UPPER_CASE')
|
||||
|
||||
lower = _make_func(_change_case_template, 'lower', which='LOWER_CASE')
|
||||
|
||||
title_case = _make_func(_change_case_template, 'title_case', which='TITLE_CASE')
|
||||
strcmp = make_two_arg_func(sort_collator)
|
||||
case_sensitive_strcmp = make_two_arg_func(case_sensitive_collator)
|
||||
primary_strcmp = make_two_arg_func(primary_collator)
|
||||
upper = make_change_case_func(_icu.UPPER_CASE)
|
||||
lower = make_change_case_func(_icu.LOWER_CASE)
|
||||
title_case = make_change_case_func(_icu.TITLE_CASE)
|
||||
|
||||
|
||||
def capitalize(x):
|
||||
@ -227,20 +212,13 @@ try:
|
||||
except AttributeError: # For people running from source
|
||||
swapcase = lambda x:x.swapcase()
|
||||
|
||||
find = _make_func(_strcmp_template, 'find', collator='_collator', collator_func='collator', func='find')
|
||||
|
||||
primary_find = _make_func(_strcmp_template, 'primary_find', collator='_primary_collator', collator_func='primary_collator', func='find')
|
||||
|
||||
contains = _make_func(_strcmp_template, 'contains', collator='_collator', collator_func='collator', func='contains')
|
||||
|
||||
primary_contains = _make_func(_strcmp_template, 'primary_contains', collator='_primary_collator', collator_func='primary_collator', func='contains')
|
||||
|
||||
startswith = _make_func(_strcmp_template, 'startswith', collator='_collator', collator_func='collator', func='startswith')
|
||||
|
||||
primary_startswith = _make_func(_strcmp_template, 'primary_startswith', collator='_primary_collator', collator_func='primary_collator', func='startswith')
|
||||
|
||||
find = make_two_arg_func(collator, 'find')
|
||||
primary_find = make_two_arg_func(primary_collator, 'find')
|
||||
contains = make_two_arg_func(collator, 'contains')
|
||||
primary_contains = make_two_arg_func(primary_collator, 'contains')
|
||||
startswith = make_two_arg_func(collator, 'startswith')
|
||||
primary_startswith = make_two_arg_func(primary_collator, 'startswith')
|
||||
safe_chr = _icu.chr
|
||||
|
||||
ord_string = _icu.ord_string
|
||||
|
||||
|
||||
|
@ -13,13 +13,13 @@ from polyglot.builtins import iteritems, unicode_type, cmp
|
||||
|
||||
|
||||
@contextmanager
|
||||
def make_collation_func(name, locale, numeric=True, template='_sort_key_template', func='strcmp'):
|
||||
c = icu._icu.Collator(locale)
|
||||
cname = '%s_test_collator%s' % (name, template)
|
||||
setattr(icu, cname, c)
|
||||
c.numeric = numeric
|
||||
yield icu._make_func(getattr(icu, template), name, collator=cname, collator_func='not_used_xxx', func=func)
|
||||
delattr(icu, cname)
|
||||
def make_collation_func(name, locale, numeric=True, maker=icu.make_sort_key_func, func='strcmp'):
|
||||
def coll():
|
||||
ans = icu._icu.Collator(locale)
|
||||
ans.numeric = numeric
|
||||
return ans
|
||||
|
||||
yield maker(coll, func)
|
||||
|
||||
|
||||
class TestICU(unittest.TestCase):
|
||||
@ -46,23 +46,23 @@ class TestICU(unittest.TestCase):
|
||||
self.ae(0, icu.strcmp(s, s.encode(sys.getdefaultencoding())))
|
||||
|
||||
# Test locales
|
||||
with make_collation_func('dsk', 'de', func='sort_key') as dsk:
|
||||
with make_collation_func('dsk', 'de', maker=icu.make_sort_key_func, func='sort_key') as dsk:
|
||||
self.ae(german_good, sorted(german, key=dsk))
|
||||
with make_collation_func('dcmp', 'de', template='_strcmp_template') as dcmp:
|
||||
with make_collation_func('dcmp', 'de', maker=icu.make_two_arg_func, func='strcmp') as dcmp:
|
||||
for x in german:
|
||||
for y in german:
|
||||
self.ae(cmp(dsk(x), dsk(y)), dcmp(x, y))
|
||||
|
||||
with make_collation_func('fsk', 'fr', func='sort_key') as fsk:
|
||||
with make_collation_func('fsk', 'fr', maker=icu.make_sort_key_func, func='sort_key') as fsk:
|
||||
self.ae(french_good, sorted(french, key=fsk))
|
||||
with make_collation_func('fcmp', 'fr', template='_strcmp_template') as fcmp:
|
||||
with make_collation_func('fcmp', 'fr', maker=icu.make_two_arg_func) as fcmp:
|
||||
for x in french:
|
||||
for y in french:
|
||||
self.ae(cmp(fsk(x), fsk(y)), fcmp(x, y))
|
||||
|
||||
with make_collation_func('ssk', 'es', func='sort_key') as ssk:
|
||||
with make_collation_func('ssk', 'es', maker=icu.make_sort_key_func, func='sort_key') as ssk:
|
||||
self.assertNotEqual(ssk('peña'), ssk('pena'))
|
||||
with make_collation_func('scmp', 'es', template='_strcmp_template') as scmp:
|
||||
with make_collation_func('scmp', 'es', maker=icu.make_two_arg_func) as scmp:
|
||||
self.assertNotEqual(0, scmp('pena', 'peña'))
|
||||
|
||||
for k, v in iteritems({u'pèché': u'peche', u'flüße':u'Flusse', u'Štepánek':u'ŠtepaneK'}):
|
||||
|
Loading…
x
Reference in New Issue
Block a user