mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Dont use eval() to generate ICU functions
Instead use the python3 nonlocal feature to generate cell functions Performance is better and code is simpler.
This commit is contained in:
parent
611613681f
commit
da779f6f2f
@ -110,109 +110,94 @@ def case_sensitive_collator():
|
|||||||
_case_sensitive_collator.upper_first = True
|
_case_sensitive_collator.upper_first = True
|
||||||
return _case_sensitive_collator
|
return _case_sensitive_collator
|
||||||
|
|
||||||
# Templates that will be used to generate various concrete
|
|
||||||
# function implementations based on different collators, to allow lazy loading
|
|
||||||
# of collators, with maximum runtime performance
|
|
||||||
|
|
||||||
|
def make_sort_key_func(collator_function, func_name='sort_key'):
|
||||||
|
func = None
|
||||||
|
|
||||||
|
def sort_key(a):
|
||||||
|
nonlocal func
|
||||||
|
if func is None:
|
||||||
|
func = getattr(collator_function(), func_name)
|
||||||
|
|
||||||
_sort_key_template = '''
|
|
||||||
def {name}(obj):
|
|
||||||
try:
|
|
||||||
try:
|
try:
|
||||||
return {collator}.{func}(obj)
|
return func(a)
|
||||||
except AttributeError:
|
except TypeError:
|
||||||
pass
|
if isinstance(a, bytes):
|
||||||
return {collator_func}().{func}(obj)
|
try:
|
||||||
except TypeError:
|
a = a.decode(sys.getdefaultencoding())
|
||||||
if isinstance(obj, bytes):
|
except ValueError:
|
||||||
try:
|
return a
|
||||||
obj = obj.decode(sys.getdefaultencoding())
|
return func(a)
|
||||||
except ValueError:
|
return b''
|
||||||
return obj
|
|
||||||
return {collator}.{func}(obj)
|
return sort_key
|
||||||
return b''
|
|
||||||
'''
|
|
||||||
|
def make_two_arg_func(collator_function, func_name='strcmp'):
|
||||||
|
func = None
|
||||||
|
|
||||||
|
def two_args(a, b):
|
||||||
|
nonlocal func
|
||||||
|
if func is None:
|
||||||
|
func = getattr(collator_function(), func_name)
|
||||||
|
|
||||||
_strcmp_template = '''
|
|
||||||
def {name}(a, b):
|
|
||||||
try:
|
|
||||||
try:
|
try:
|
||||||
return {collator}.{func}(a, b)
|
return func(a, b)
|
||||||
except AttributeError:
|
except TypeError:
|
||||||
pass
|
if isinstance(a, bytes):
|
||||||
return {collator_func}().{func}(a, b)
|
try:
|
||||||
except TypeError:
|
a = a.decode(sys.getdefaultencoding())
|
||||||
if isinstance(a, bytes):
|
except Exception:
|
||||||
try:
|
return cmp(a, b)
|
||||||
a = a.decode(sys.getdefaultencoding())
|
elif a is None:
|
||||||
except ValueError:
|
a = ''
|
||||||
return cmp(a, b)
|
if isinstance(b, bytes):
|
||||||
elif a is None:
|
try:
|
||||||
a = u''
|
b = b.decode(sys.getdefaultencoding())
|
||||||
if isinstance(b, bytes):
|
except Exception:
|
||||||
try:
|
return cmp(a, b)
|
||||||
b = b.decode(sys.getdefaultencoding())
|
elif b is None:
|
||||||
except ValueError:
|
b = ''
|
||||||
return cmp(a, b)
|
return func(a, b)
|
||||||
elif b is None:
|
|
||||||
b = u''
|
|
||||||
return {collator}.{func}(a, b)
|
|
||||||
'''
|
|
||||||
|
|
||||||
_change_case_template = '''
|
return two_args
|
||||||
def {name}(x):
|
|
||||||
try:
|
|
||||||
|
def make_change_case_func(which):
|
||||||
|
|
||||||
|
def change_case(x):
|
||||||
try:
|
try:
|
||||||
return _icu.change_case(x, _icu.{which}, _locale)
|
|
||||||
except NotImplementedError:
|
|
||||||
pass
|
|
||||||
collator() # sets _locale
|
|
||||||
return _icu.change_case(x, _icu.{which}, _locale)
|
|
||||||
except TypeError:
|
|
||||||
if isinstance(x, bytes):
|
|
||||||
try:
|
try:
|
||||||
x = x.decode(sys.getdefaultencoding())
|
return _icu.change_case(x, which, _locale)
|
||||||
except ValueError:
|
except NotImplementedError:
|
||||||
return x
|
pass
|
||||||
return _icu.change_case(x, _icu.{which}, _locale)
|
collator() # sets _locale
|
||||||
raise
|
return _icu.change_case(x, which, _locale)
|
||||||
'''
|
except TypeError:
|
||||||
|
if isinstance(x, bytes):
|
||||||
|
try:
|
||||||
def _make_func(template, name, **kwargs):
|
x = x.decode(sys.getdefaultencoding())
|
||||||
l = globals()
|
except ValueError:
|
||||||
kwargs['name'] = name
|
return x
|
||||||
kwargs['func'] = kwargs.get('func', 'sort_key')
|
return _icu.change_case(x, which, _locale)
|
||||||
exec(template.format(**kwargs), l)
|
raise
|
||||||
return l[name]
|
return change_case
|
||||||
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
# ################ The string functions ########################################
|
# ################ The string functions ########################################
|
||||||
sort_key = _make_func(_sort_key_template, 'sort_key', collator='_sort_collator', collator_func='sort_collator')
|
sort_key = make_sort_key_func(sort_collator)
|
||||||
|
numeric_sort_key = make_sort_key_func(numeric_collator)
|
||||||
|
primary_sort_key = make_sort_key_func(primary_collator)
|
||||||
|
case_sensitive_sort_key = make_sort_key_func(case_sensitive_collator)
|
||||||
|
collation_order = make_sort_key_func(sort_collator, 'collation_order')
|
||||||
|
|
||||||
numeric_sort_key = _make_func(_sort_key_template, 'numeric_sort_key', collator='_numeric_collator', collator_func='numeric_collator')
|
strcmp = make_two_arg_func(sort_collator)
|
||||||
|
case_sensitive_strcmp = make_two_arg_func(case_sensitive_collator)
|
||||||
primary_sort_key = _make_func(_sort_key_template, 'primary_sort_key', collator='_primary_collator', collator_func='primary_collator')
|
primary_strcmp = make_two_arg_func(primary_collator)
|
||||||
|
upper = make_change_case_func(_icu.UPPER_CASE)
|
||||||
case_sensitive_sort_key = _make_func(_sort_key_template, 'case_sensitive_sort_key',
|
lower = make_change_case_func(_icu.LOWER_CASE)
|
||||||
collator='_case_sensitive_collator', collator_func='case_sensitive_collator')
|
title_case = make_change_case_func(_icu.TITLE_CASE)
|
||||||
|
|
||||||
collation_order = _make_func(_sort_key_template, 'collation_order', collator='_sort_collator', collator_func='sort_collator', func='collation_order')
|
|
||||||
|
|
||||||
strcmp = _make_func(_strcmp_template, 'strcmp', collator='_sort_collator', collator_func='sort_collator', func='strcmp')
|
|
||||||
|
|
||||||
case_sensitive_strcmp = _make_func(
|
|
||||||
_strcmp_template, 'case_sensitive_strcmp', collator='_case_sensitive_collator', collator_func='case_sensitive_collator', func='strcmp')
|
|
||||||
|
|
||||||
primary_strcmp = _make_func(_strcmp_template, 'primary_strcmp', collator='_primary_collator', collator_func='primary_collator', func='strcmp')
|
|
||||||
|
|
||||||
upper = _make_func(_change_case_template, 'upper', which='UPPER_CASE')
|
|
||||||
|
|
||||||
lower = _make_func(_change_case_template, 'lower', which='LOWER_CASE')
|
|
||||||
|
|
||||||
title_case = _make_func(_change_case_template, 'title_case', which='TITLE_CASE')
|
|
||||||
|
|
||||||
|
|
||||||
def capitalize(x):
|
def capitalize(x):
|
||||||
@ -227,20 +212,13 @@ try:
|
|||||||
except AttributeError: # For people running from source
|
except AttributeError: # For people running from source
|
||||||
swapcase = lambda x:x.swapcase()
|
swapcase = lambda x:x.swapcase()
|
||||||
|
|
||||||
find = _make_func(_strcmp_template, 'find', collator='_collator', collator_func='collator', func='find')
|
find = make_two_arg_func(collator, 'find')
|
||||||
|
primary_find = make_two_arg_func(primary_collator, 'find')
|
||||||
primary_find = _make_func(_strcmp_template, 'primary_find', collator='_primary_collator', collator_func='primary_collator', func='find')
|
contains = make_two_arg_func(collator, 'contains')
|
||||||
|
primary_contains = make_two_arg_func(primary_collator, 'contains')
|
||||||
contains = _make_func(_strcmp_template, 'contains', collator='_collator', collator_func='collator', func='contains')
|
startswith = make_two_arg_func(collator, 'startswith')
|
||||||
|
primary_startswith = make_two_arg_func(primary_collator, 'startswith')
|
||||||
primary_contains = _make_func(_strcmp_template, 'primary_contains', collator='_primary_collator', collator_func='primary_collator', func='contains')
|
|
||||||
|
|
||||||
startswith = _make_func(_strcmp_template, 'startswith', collator='_collator', collator_func='collator', func='startswith')
|
|
||||||
|
|
||||||
primary_startswith = _make_func(_strcmp_template, 'primary_startswith', collator='_primary_collator', collator_func='primary_collator', func='startswith')
|
|
||||||
|
|
||||||
safe_chr = _icu.chr
|
safe_chr = _icu.chr
|
||||||
|
|
||||||
ord_string = _icu.ord_string
|
ord_string = _icu.ord_string
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,13 +13,13 @@ from polyglot.builtins import iteritems, unicode_type, cmp
|
|||||||
|
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def make_collation_func(name, locale, numeric=True, template='_sort_key_template', func='strcmp'):
|
def make_collation_func(name, locale, numeric=True, maker=icu.make_sort_key_func, func='strcmp'):
|
||||||
c = icu._icu.Collator(locale)
|
def coll():
|
||||||
cname = '%s_test_collator%s' % (name, template)
|
ans = icu._icu.Collator(locale)
|
||||||
setattr(icu, cname, c)
|
ans.numeric = numeric
|
||||||
c.numeric = numeric
|
return ans
|
||||||
yield icu._make_func(getattr(icu, template), name, collator=cname, collator_func='not_used_xxx', func=func)
|
|
||||||
delattr(icu, cname)
|
yield maker(coll, func)
|
||||||
|
|
||||||
|
|
||||||
class TestICU(unittest.TestCase):
|
class TestICU(unittest.TestCase):
|
||||||
@ -46,23 +46,23 @@ class TestICU(unittest.TestCase):
|
|||||||
self.ae(0, icu.strcmp(s, s.encode(sys.getdefaultencoding())))
|
self.ae(0, icu.strcmp(s, s.encode(sys.getdefaultencoding())))
|
||||||
|
|
||||||
# Test locales
|
# Test locales
|
||||||
with make_collation_func('dsk', 'de', func='sort_key') as dsk:
|
with make_collation_func('dsk', 'de', maker=icu.make_sort_key_func, func='sort_key') as dsk:
|
||||||
self.ae(german_good, sorted(german, key=dsk))
|
self.ae(german_good, sorted(german, key=dsk))
|
||||||
with make_collation_func('dcmp', 'de', template='_strcmp_template') as dcmp:
|
with make_collation_func('dcmp', 'de', maker=icu.make_two_arg_func, func='strcmp') as dcmp:
|
||||||
for x in german:
|
for x in german:
|
||||||
for y in german:
|
for y in german:
|
||||||
self.ae(cmp(dsk(x), dsk(y)), dcmp(x, y))
|
self.ae(cmp(dsk(x), dsk(y)), dcmp(x, y))
|
||||||
|
|
||||||
with make_collation_func('fsk', 'fr', func='sort_key') as fsk:
|
with make_collation_func('fsk', 'fr', maker=icu.make_sort_key_func, func='sort_key') as fsk:
|
||||||
self.ae(french_good, sorted(french, key=fsk))
|
self.ae(french_good, sorted(french, key=fsk))
|
||||||
with make_collation_func('fcmp', 'fr', template='_strcmp_template') as fcmp:
|
with make_collation_func('fcmp', 'fr', maker=icu.make_two_arg_func) as fcmp:
|
||||||
for x in french:
|
for x in french:
|
||||||
for y in french:
|
for y in french:
|
||||||
self.ae(cmp(fsk(x), fsk(y)), fcmp(x, y))
|
self.ae(cmp(fsk(x), fsk(y)), fcmp(x, y))
|
||||||
|
|
||||||
with make_collation_func('ssk', 'es', func='sort_key') as ssk:
|
with make_collation_func('ssk', 'es', maker=icu.make_sort_key_func, func='sort_key') as ssk:
|
||||||
self.assertNotEqual(ssk('peña'), ssk('pena'))
|
self.assertNotEqual(ssk('peña'), ssk('pena'))
|
||||||
with make_collation_func('scmp', 'es', template='_strcmp_template') as scmp:
|
with make_collation_func('scmp', 'es', maker=icu.make_two_arg_func) as scmp:
|
||||||
self.assertNotEqual(0, scmp('pena', 'peña'))
|
self.assertNotEqual(0, scmp('pena', 'peña'))
|
||||||
|
|
||||||
for k, v in iteritems({u'pèché': u'peche', u'flüße':u'Flusse', u'Štepánek':u'ŠtepaneK'}):
|
for k, v in iteritems({u'pèché': u'peche', u'flüße':u'Flusse', u'Štepánek':u'ŠtepaneK'}):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user