Use unicode keys for the ISO lang code maps

This means the same stored maps can be used on python2 and 3
This commit is contained in:
Kovid Goyal 2019-04-10 14:10:06 +05:30
parent 4775fc780c
commit d6b6d4c892
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -746,14 +746,23 @@ class ISO639(Command): # {{{
m3bto3t = {} m3bto3t = {}
nm = {} nm = {}
codes2, codes3t, codes3b = set(), set(), set() codes2, codes3t, codes3b = set(), set(), set()
unicode_type = type(u'')
for x in root.xpath('//iso_639_3_entry'): for x in root.xpath('//iso_639_3_entry'):
two = x.get('part1_code', None) two = x.get('part1_code', None)
if two:
two = unicode_type(two)
threet = x.get('id') threet = x.get('id')
if threet:
threet = unicode_type(threet)
threeb = x.get('part2_code', None) threeb = x.get('part2_code', None)
if threeb:
threeb = unicode_type(threeb)
if threeb is None: if threeb is None:
# Only recognize languages in ISO-639-2 # Only recognize languages in ISO-639-2
continue continue
name = x.get('name') name = x.get('name')
if name:
name = unicode_type(name)
if two is not None: if two is not None:
by_2[two] = name by_2[two] = name
@ -769,9 +778,9 @@ class ISO639(Command): # {{{
base_name = name.lower() base_name = name.lower()
nm[base_name] = threet nm[base_name] = threet
x = {'by_2':by_2, 'by_3b':by_3b, 'by_3t':by_3t, 'codes2':codes2, x = {u'by_2':by_2, u'by_3b':by_3b, u'by_3t':by_3t, u'codes2':codes2,
'codes3b':codes3b, 'codes3t':codes3t, '2to3':m2to3, u'codes3b':codes3b, u'codes3t':codes3t, u'2to3':m2to3,
'3to2':m3to2, '3bto3t':m3bto3t, 'name_map':nm} u'3to2':m3to2, u'3bto3t':m3bto3t, u'name_map':nm}
from calibre.utils.serialize import msgpack_dumps from calibre.utils.serialize import msgpack_dumps
with open(dest, 'wb') as f: with open(dest, 'wb') as f:
f.write(msgpack_dumps(x)) f.write(msgpack_dumps(x))
@ -806,14 +815,19 @@ class ISO3166(ISO639): # {{{
codes = set() codes = set()
three_map = {} three_map = {}
name_map = {} name_map = {}
unicode_type = type(u'')
for x in root.xpath('//iso_3166_entry'): for x in root.xpath('//iso_3166_entry'):
two = x.get('alpha_2_code') two = x.get('alpha_2_code')
three = x.get('alpha_3_code') if two:
two = unicode_type(two)
codes.add(two) codes.add(two)
name_map[two] = x.get('name') name_map[two] = x.get('name')
if name_map[two]:
name_map[two] = unicode_type(name_map[two])
three = x.get('alpha_3_code')
if three: if three:
three_map[three] = two three_map[unicode_type(three)] = two
x = {'names':name_map, 'codes':frozenset(codes), 'three_map':three_map} x = {u'names':name_map, u'codes':frozenset(codes), u'three_map':three_map}
from calibre.utils.serialize import msgpack_dumps from calibre.utils.serialize import msgpack_dumps
with open(dest, 'wb') as f: with open(dest, 'wb') as f:
f.write(msgpack_dumps(x)) f.write(msgpack_dumps(x))