mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 02:34:06 -04:00
Update the ISO 639-3 language database
This commit is contained in:
parent
6cccd18be8
commit
1b93d540b7
48688
setup/iso_639-3.json
Normal file
48688
setup/iso_639-3.json
Normal file
File diff suppressed because it is too large
Load Diff
39178
setup/iso_639_3.xml
39178
setup/iso_639_3.xml
File diff suppressed because it is too large
Load Diff
@ -726,7 +726,7 @@ class ISO639(Command): # {{{
|
|||||||
'iso639.calibre_msgpack')
|
'iso639.calibre_msgpack')
|
||||||
|
|
||||||
def run(self, opts):
|
def run(self, opts):
|
||||||
src = self.j(self.d(self.SRC), 'setup', 'iso_639_3.xml')
|
src = self.j(self.d(self.SRC), 'setup', 'iso_639-3.json')
|
||||||
if not os.path.exists(src):
|
if not os.path.exists(src):
|
||||||
raise Exception(src + ' does not exist')
|
raise Exception(src + ' does not exist')
|
||||||
dest = self.DEST
|
dest = self.DEST
|
||||||
@ -737,29 +737,24 @@ class ISO639(Command): # {{{
|
|||||||
self.info('Packed code is up to date')
|
self.info('Packed code is up to date')
|
||||||
return
|
return
|
||||||
self.info('Packing ISO-639 codes to', dest)
|
self.info('Packing ISO-639 codes to', dest)
|
||||||
from lxml import etree
|
with open(src, 'rb') as f:
|
||||||
root = etree.fromstring(open(src, 'rb').read())
|
root = json.load(f)
|
||||||
|
entries = root['639-3']
|
||||||
by_2 = {}
|
by_2 = {}
|
||||||
by_3b = {}
|
by_3 = {}
|
||||||
by_3t = {}
|
|
||||||
m2to3 = {}
|
m2to3 = {}
|
||||||
m3to2 = {}
|
m3to2 = {}
|
||||||
m3bto3t = {}
|
|
||||||
nm = {}
|
nm = {}
|
||||||
codes2, codes3t, codes3b = set(), set(), set()
|
codes2, codes3 = set(), set()
|
||||||
unicode_type = type(u'')
|
unicode_type = type(u'')
|
||||||
for x in root.xpath('//iso_639_3_entry'):
|
for x in entries:
|
||||||
two = x.get('part1_code', None)
|
two = x.get('alpha_2')
|
||||||
if two:
|
if two:
|
||||||
two = unicode_type(two)
|
two = unicode_type(two)
|
||||||
threet = x.get('id')
|
threeb = x.get('alpha_3')
|
||||||
if threet:
|
|
||||||
threet = unicode_type(threet)
|
|
||||||
threeb = x.get('part2_code', None)
|
|
||||||
if threeb:
|
if threeb:
|
||||||
threeb = unicode_type(threeb)
|
threeb = unicode_type(threeb)
|
||||||
if threeb is None:
|
if threeb is None:
|
||||||
# Only recognize languages in ISO-639-2
|
|
||||||
continue
|
continue
|
||||||
name = x.get('name')
|
name = x.get('name')
|
||||||
if name:
|
if name:
|
||||||
@ -768,20 +763,16 @@ class ISO639(Command): # {{{
|
|||||||
if two is not None:
|
if two is not None:
|
||||||
by_2[two] = name
|
by_2[two] = name
|
||||||
codes2.add(two)
|
codes2.add(two)
|
||||||
m2to3[two] = threet
|
m2to3[two] = threeb
|
||||||
m3to2[threeb] = m3to2[threet] = two
|
m3to2[threeb] = two
|
||||||
by_3b[threeb] = name
|
codes3.add(threeb)
|
||||||
by_3t[threet] = name
|
by_3[threeb] = name
|
||||||
if threeb != threet:
|
|
||||||
m3bto3t[threeb] = threet
|
|
||||||
codes3b.add(threeb)
|
|
||||||
codes3t.add(threet)
|
|
||||||
base_name = name.lower()
|
base_name = name.lower()
|
||||||
nm[base_name] = threet
|
nm[base_name] = threeb
|
||||||
|
|
||||||
x = {u'by_2':by_2, u'by_3b':by_3b, u'by_3t':by_3t, u'codes2':codes2,
|
x = {u'by_2':by_2, u'by_3':by_3, u'codes2':codes2,
|
||||||
u'codes3b':codes3b, u'codes3t':codes3t, u'2to3':m2to3,
|
u'codes3':codes3, u'2to3':m2to3,
|
||||||
u'3to2':m3to2, u'3bto3t':m3bto3t, u'name_map':nm}
|
u'3to2':m3to2, u'name_map':nm}
|
||||||
from calibre.utils.serialize import msgpack_dumps
|
from calibre.utils.serialize import msgpack_dumps
|
||||||
with open(dest, 'wb') as f:
|
with open(dest, 'wb') as f:
|
||||||
f.write(msgpack_dumps(x))
|
f.write(msgpack_dumps(x))
|
||||||
|
@ -369,6 +369,8 @@ def _load_iso639():
|
|||||||
ip = P('localization/iso639.calibre_msgpack', allow_user_override=False, data=True)
|
ip = P('localization/iso639.calibre_msgpack', allow_user_override=False, data=True)
|
||||||
from calibre.utils.serialize import msgpack_loads
|
from calibre.utils.serialize import msgpack_loads
|
||||||
_iso639 = msgpack_loads(ip)
|
_iso639 = msgpack_loads(ip)
|
||||||
|
if 'by_3' not in _iso639:
|
||||||
|
_iso639['by_3'] = _iso639['by_3t']
|
||||||
return _iso639
|
return _iso639
|
||||||
|
|
||||||
|
|
||||||
@ -379,10 +381,8 @@ def get_iso_language(lang_trans, lang):
|
|||||||
if len(lang) == 2:
|
if len(lang) == 2:
|
||||||
ans = iso639['by_2'].get(lang, ans)
|
ans = iso639['by_2'].get(lang, ans)
|
||||||
elif len(lang) == 3:
|
elif len(lang) == 3:
|
||||||
if lang in iso639['by_3b']:
|
if lang in iso639['by_3']:
|
||||||
ans = iso639['by_3b'][lang]
|
ans = iso639['by_3'][lang]
|
||||||
else:
|
|
||||||
ans = iso639['by_3t'].get(lang, ans)
|
|
||||||
return lang_trans(ans)
|
return lang_trans(ans)
|
||||||
|
|
||||||
|
|
||||||
@ -401,7 +401,7 @@ def calibre_langcode_to_name(lc, localize=True):
|
|||||||
iso639 = _load_iso639()
|
iso639 = _load_iso639()
|
||||||
translate = _ if localize else lambda x: x
|
translate = _ if localize else lambda x: x
|
||||||
try:
|
try:
|
||||||
return translate(iso639['by_3t'][lc])
|
return translate(iso639['by_3'][lc])
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
return lc
|
return lc
|
||||||
@ -426,10 +426,8 @@ def canonicalize_lang(raw):
|
|||||||
if ans is not None:
|
if ans is not None:
|
||||||
return ans
|
return ans
|
||||||
elif len(raw) == 3:
|
elif len(raw) == 3:
|
||||||
if raw in iso639['by_3t']:
|
if raw in iso639['by_3']:
|
||||||
return raw
|
return raw
|
||||||
if raw in iso639['3bto3t']:
|
|
||||||
return iso639['3bto3t'][raw]
|
|
||||||
|
|
||||||
return iso639['name_map'].get(raw, None)
|
return iso639['name_map'].get(raw, None)
|
||||||
|
|
||||||
@ -443,7 +441,7 @@ def lang_map():
|
|||||||
translate = _
|
translate = _
|
||||||
global _lang_map
|
global _lang_map
|
||||||
if _lang_map is None:
|
if _lang_map is None:
|
||||||
_lang_map = {k:translate(v) for k, v in iteritems(iso639['by_3t'])}
|
_lang_map = {k:translate(v) for k, v in iteritems(iso639['by_3'])}
|
||||||
return _lang_map
|
return _lang_map
|
||||||
|
|
||||||
|
|
||||||
@ -467,7 +465,7 @@ def langnames_to_langcodes(names):
|
|||||||
translate = _
|
translate = _
|
||||||
ans = {}
|
ans = {}
|
||||||
names = set(names)
|
names = set(names)
|
||||||
for k, v in iteritems(iso639['by_3t']):
|
for k, v in iteritems(iso639['by_3']):
|
||||||
tv = translate(v)
|
tv = translate(v)
|
||||||
if tv in names:
|
if tv in names:
|
||||||
names.remove(tv)
|
names.remove(tv)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user