mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Use the translations for language names from the iso-codes project
This commit is contained in:
parent
9f02e0a91c
commit
d9d2e798ca
@ -1,10 +1,14 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# License: GPLv3 Copyright: 2023, Kovid Goyal <kovid at kovidgoyal.net>
|
# License: GPLv3 Copyright: 2023, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
from setup import download_securely
|
import fnmatch
|
||||||
|
import os
|
||||||
|
import shutil, time
|
||||||
import zipfile
|
import zipfile
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
|
from setup import download_securely
|
||||||
|
|
||||||
|
|
||||||
class ISOData:
|
class ISOData:
|
||||||
URL = 'https://salsa.debian.org/iso-codes-team/iso-codes/-/archive/main/iso-codes-main.zip'
|
URL = 'https://salsa.debian.org/iso-codes-team/iso-codes/-/archive/main/iso-codes-main.zip'
|
||||||
@ -23,5 +27,16 @@ class ISOData:
|
|||||||
with zf.open(f'iso-codes-main/data/{name}') as f:
|
with zf.open(f'iso-codes-main/data/{name}') as f:
|
||||||
return f.read()
|
return f.read()
|
||||||
|
|
||||||
|
def extract_po_files(self, name: str, output_dir: str) -> None:
|
||||||
|
name = name.split('.', 1)[0]
|
||||||
|
pat = f'iso-codes-main/{name}/*.po'
|
||||||
|
with zipfile.ZipFile(self.zip_data) as zf:
|
||||||
|
for name in fnmatch.filter(zf.namelist(), pat):
|
||||||
|
dest = os.path.join(output_dir, name.split('/')[-1])
|
||||||
|
zi = zf.getinfo(name)
|
||||||
|
with zf.open(zi) as src, open(dest, 'wb') as d:
|
||||||
|
shutil.copyfileobj(src, d)
|
||||||
|
date_time = time.mktime(zi.date_time + (0, 0, -1))
|
||||||
|
os.utime(dest, (date_time, date_time))
|
||||||
|
|
||||||
iso_data = ISOData()
|
iso_data = ISOData()
|
||||||
|
@ -91,32 +91,6 @@ class POT(Command): # {{{
|
|||||||
|
|
||||||
return '\n'.join(ans)
|
return '\n'.join(ans)
|
||||||
|
|
||||||
def get_iso639_strings(self):
|
|
||||||
self.info('Generating translation template for iso639')
|
|
||||||
root = json.loads(iso_data.db_data('iso_639-3.json'))
|
|
||||||
entries = root['639-3']
|
|
||||||
ans = []
|
|
||||||
|
|
||||||
def name_getter(x):
|
|
||||||
return x.get('inverted_name') or x.get('name')
|
|
||||||
|
|
||||||
for x in sorted(entries, key=lambda x:name_getter(x).lower()):
|
|
||||||
name = name_getter(x)
|
|
||||||
if name:
|
|
||||||
ans.append(f'msgid "{name}"')
|
|
||||||
ans.append('msgstr ""')
|
|
||||||
ans.append('')
|
|
||||||
pot = self.pot_header() + '\n\n' + '\n'.join(ans)
|
|
||||||
dest = self.j(self.TRANSLATIONS, 'iso_639', 'iso_639_3.pot')
|
|
||||||
try:
|
|
||||||
os.makedirs(os.path.dirname(dest))
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
with open(dest, 'wb') as f:
|
|
||||||
f.write(pot.encode('utf-8'))
|
|
||||||
self.upload_pot(resource='iso639')
|
|
||||||
self.git(['add', dest])
|
|
||||||
|
|
||||||
def get_content_server_strings(self):
|
def get_content_server_strings(self):
|
||||||
self.info('Generating translation template for content_server')
|
self.info('Generating translation template for content_server')
|
||||||
from calibre import walk
|
from calibre import walk
|
||||||
@ -208,7 +182,6 @@ class POT(Command): # {{{
|
|||||||
def run(self, opts):
|
def run(self, opts):
|
||||||
if not is_ci:
|
if not is_ci:
|
||||||
require_git_master()
|
require_git_master()
|
||||||
self.get_iso639_strings()
|
|
||||||
if not is_ci:
|
if not is_ci:
|
||||||
self.get_website_strings()
|
self.get_website_strings()
|
||||||
self.get_content_server_strings()
|
self.get_content_server_strings()
|
||||||
@ -318,12 +291,12 @@ class Translations(POT): # {{{
|
|||||||
self.compile_website_translations()
|
self.compile_website_translations()
|
||||||
self.compile_changelog_translations()
|
self.compile_changelog_translations()
|
||||||
|
|
||||||
def compile_group(self, files, handle_stats=None, file_ok=None, action_per_file=None):
|
def compile_group(self, files, handle_stats=None, action_per_file=None, make_translated_strings_unique=False, keyfunc=lambda x: x):
|
||||||
ok_files = []
|
ok_files = []
|
||||||
hashmap = {}
|
hashmap = {}
|
||||||
|
|
||||||
def stats_cache(src, data=None):
|
def stats_cache(src, data=None):
|
||||||
cname = self.cache_name(src) + '.stats.json'
|
cname = self.cache_name(keyfunc(src)) + '.stats.json'
|
||||||
with open(self.j(self.cache_dir, cname), ('rb' if data is None else 'wb')) as f:
|
with open(self.j(self.cache_dir, cname), ('rb' if data is None else 'wb')) as f:
|
||||||
if data is None:
|
if data is None:
|
||||||
return json.loads(f.read())
|
return json.loads(f.read())
|
||||||
@ -336,42 +309,39 @@ class Translations(POT): # {{{
|
|||||||
base = os.path.dirname(dest)
|
base = os.path.dirname(dest)
|
||||||
if not os.path.exists(base):
|
if not os.path.exists(base):
|
||||||
os.makedirs(base)
|
os.makedirs(base)
|
||||||
data, h = self.hash_and_data(src)
|
data, h = self.hash_and_data(src, keyfunc)
|
||||||
current_hash = h.digest()
|
current_hash = h.digest()
|
||||||
saved_hash, saved_data = self.read_cache(src)
|
saved_hash, saved_data = self.read_cache(keyfunc(src))
|
||||||
if current_hash == saved_hash:
|
if current_hash == saved_hash:
|
||||||
with open(dest, 'wb') as d:
|
with open(dest, 'wb') as d:
|
||||||
d.write(saved_data)
|
d.write(saved_data)
|
||||||
if handle_stats is not None:
|
if handle_stats is not None:
|
||||||
handle_stats(src, stats_cache(src))
|
handle_stats(src, stats_cache(src))
|
||||||
else:
|
else:
|
||||||
if file_ok is None or file_ok(data, src):
|
ok_files.append((src, dest))
|
||||||
# self.info('\t' + os.path.relpath(src, self.j(self.d(self.SRC), 'translations')))
|
hashmap[keyfunc(src)] = current_hash
|
||||||
ok_files.append((src, dest))
|
|
||||||
hashmap[src] = current_hash
|
|
||||||
if action_per_file is not None:
|
if action_per_file is not None:
|
||||||
action_per_file(src)
|
action_per_file(src)
|
||||||
|
|
||||||
self.info(f'\tCompiling {len(ok_files)} files')
|
self.info(f'\tCompiling {len(ok_files)} files')
|
||||||
items = []
|
items = []
|
||||||
results = batched_parallel_jobs(
|
results = batched_parallel_jobs(
|
||||||
[sys.executable, self.j(self.SRC, 'calibre', 'translations', 'msgfmt.py'), 'STDIN'],
|
[sys.executable, self.j(self.SRC, 'calibre', 'translations', 'msgfmt.py'), 'STDIN', 'uniqify' if make_translated_strings_unique else ' '],
|
||||||
ok_files)
|
ok_files)
|
||||||
for (src, dest), nums in zip(ok_files, results):
|
for (src, dest), data in zip(ok_files, results):
|
||||||
items.append((src, dest, nums))
|
items.append((src, dest, data))
|
||||||
|
|
||||||
for (src, dest, nums) in items:
|
for (src, dest, data) in items:
|
||||||
self.write_cache(open(dest, 'rb').read(), hashmap[src], src)
|
self.write_cache(open(dest, 'rb').read(), hashmap[keyfunc(src)], keyfunc(src))
|
||||||
stats_cache(src, nums)
|
stats_cache(src, data)
|
||||||
if handle_stats is not None:
|
if handle_stats is not None:
|
||||||
handle_stats(src, nums)
|
handle_stats(src, data)
|
||||||
|
|
||||||
def compile_main_translations(self):
|
def compile_main_translations(self):
|
||||||
l = {}
|
l = {}
|
||||||
lc_dataf = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'lc_data.py')
|
lc_dataf = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'lc_data.py')
|
||||||
exec(compile(open(lc_dataf, 'rb').read(), lc_dataf, 'exec'), l, l)
|
exec(compile(open(lc_dataf, 'rb').read(), lc_dataf, 'exec'), l, l)
|
||||||
lcdata = {k:{k1:v1 for k1, v1 in v} for k, v in l['data']}
|
lcdata = {k:{k1:v1 for k1, v1 in v} for k, v in l['data']}
|
||||||
self.iso639_errors = []
|
|
||||||
self.info('Compiling main UI translation files...')
|
self.info('Compiling main UI translation files...')
|
||||||
fmap = {f:self.mo_file(f) for f in self.po_files()}
|
fmap = {f:self.mo_file(f) for f in self.po_files()}
|
||||||
files = [(f, fmap[f][1]) for f in self.po_files()]
|
files = [(f, fmap[f][1]) for f in self.po_files()]
|
||||||
@ -388,33 +358,41 @@ class Translations(POT): # {{{
|
|||||||
|
|
||||||
stats = {}
|
stats = {}
|
||||||
|
|
||||||
def handle_stats(f, nums):
|
def handle_stats(f, data):
|
||||||
trans = nums[0]
|
trans, untrans = data['translated'], data['untranslated']
|
||||||
total = trans if len(nums) == 1 else (trans + nums[1])
|
total = trans + untrans
|
||||||
locale = fmap[f][0]
|
locale = fmap[f][0]
|
||||||
stats[locale] = min(1.0, float(trans)/total)
|
stats[locale] = min(1.0, float(trans)/total)
|
||||||
|
|
||||||
self.compile_group(files, handle_stats=handle_stats, action_per_file=action_per_file)
|
self.compile_group(files, handle_stats=handle_stats, action_per_file=action_per_file)
|
||||||
self.info('Compiling ISO639 files...')
|
|
||||||
|
|
||||||
|
self.info('Compiling ISO639 files...')
|
||||||
files = []
|
files = []
|
||||||
skip_iso = {
|
skip_iso = {
|
||||||
'en_GB', 'en_CA', 'en_AU', 'si', 'ur', 'sc', 'ltg', 'nds',
|
'si', 'te', 'km', 'en_GB', 'en_AU', 'en_CA', 'yi', 'ku', 'my', 'uz@Latn', 'fil', 'hy', 'ltg', 'km_KH', 'km',
|
||||||
'te', 'yi', 'fo', 'sq', 'ast', 'ml', 'ku', 'fr_CA', 'him',
|
'ur', 'ml', 'fo', 'ug', 'nds', 'jv'
|
||||||
'jv', 'ka', 'fur', 'ber', 'my', 'fil', 'hy', 'ug'}
|
}
|
||||||
for f, (locale, dest) in iteritems(fmap):
|
|
||||||
iscpo = {'bn':'bn_IN', 'zh_HK':'zh_CN'}.get(locale, locale)
|
|
||||||
iso639 = self.j(self.TRANSLATIONS, 'iso_639', '%s.po'%iscpo)
|
|
||||||
if os.path.exists(iso639):
|
|
||||||
files.append((iso639, self.j(self.d(dest), 'iso639.mo')))
|
|
||||||
elif locale not in skip_iso:
|
|
||||||
self.warn('No ISO 639 translations for locale:', locale)
|
|
||||||
self.compile_group(files, file_ok=self.check_iso639)
|
|
||||||
|
|
||||||
if self.iso639_errors:
|
def handle_stats(f, data):
|
||||||
for err in self.iso639_errors:
|
if False and data['uniqified']:
|
||||||
print(err)
|
print(f'{data["uniqified"]:3d} non-unique language name translations in {os.path.basename(f)}', file=sys.stderr)
|
||||||
raise SystemExit(1)
|
|
||||||
|
with tempfile.TemporaryDirectory() as tdir:
|
||||||
|
iso_data.extract_po_files('iso_639-3', tdir)
|
||||||
|
for f, (locale, dest) in iteritems(fmap):
|
||||||
|
iscpo = {'zh_HK':'zh_CN'}.get(locale, locale)
|
||||||
|
iso639 = self.j(tdir, '%s.po'%iscpo)
|
||||||
|
if os.path.exists(iso639):
|
||||||
|
files.append((iso639, self.j(self.d(dest), 'iso639.mo')))
|
||||||
|
else:
|
||||||
|
iscpo = iscpo.partition('_')[0]
|
||||||
|
iso639 = self.j(tdir, '%s.po'%iscpo)
|
||||||
|
if os.path.exists(iso639):
|
||||||
|
files.append((iso639, self.j(self.d(dest), 'iso639.mo')))
|
||||||
|
elif locale not in skip_iso:
|
||||||
|
self.warn('No ISO 639 translations for locale:', locale)
|
||||||
|
self.compile_group(files, make_translated_strings_unique=True, handle_stats=handle_stats,
|
||||||
|
keyfunc=lambda x: os.path.join(self.d(self.SRC), 'iso639', os.path.basename(x)))
|
||||||
|
|
||||||
dest = self.stats
|
dest = self.stats
|
||||||
base = self.d(dest)
|
base = self.d(dest)
|
||||||
@ -427,11 +405,11 @@ class Translations(POT): # {{{
|
|||||||
with open(dest, 'wb') as f:
|
with open(dest, 'wb') as f:
|
||||||
f.write(msgpack_dumps(stats))
|
f.write(msgpack_dumps(stats))
|
||||||
|
|
||||||
def hash_and_data(self, f):
|
def hash_and_data(self, f, keyfunc=lambda x:x):
|
||||||
with open(f, 'rb') as s:
|
with open(f, 'rb') as s:
|
||||||
data = s.read()
|
data = s.read()
|
||||||
h = hashlib.sha1(data)
|
h = hashlib.sha1(data)
|
||||||
h.update(f.encode('utf-8'))
|
h.update(keyfunc(f).encode('utf-8'))
|
||||||
return data, h
|
return data, h
|
||||||
|
|
||||||
def compile_content_server_translations(self):
|
def compile_content_server_translations(self):
|
||||||
@ -467,31 +445,6 @@ class Translations(POT): # {{{
|
|||||||
zi.compress_type = ZIP_STORED if is_ci else ZIP_DEFLATED
|
zi.compress_type = ZIP_STORED if is_ci else ZIP_DEFLATED
|
||||||
zf.writestr(zi, raw)
|
zf.writestr(zi, raw)
|
||||||
|
|
||||||
def check_iso639(self, raw, path):
|
|
||||||
from calibre.utils.localization import langnames_to_langcodes
|
|
||||||
rmap = {}
|
|
||||||
msgid = None
|
|
||||||
has_errors = False
|
|
||||||
if isinstance(raw, bytes):
|
|
||||||
raw = raw.decode('utf-8')
|
|
||||||
for match in re.finditer(r'^(msgid|msgstr)\s+"(.*?)"', raw, re.M):
|
|
||||||
if match.group(1) == 'msgid':
|
|
||||||
msgid = match.group(2)
|
|
||||||
else:
|
|
||||||
msgstr = match.group(2)
|
|
||||||
if not msgstr:
|
|
||||||
continue
|
|
||||||
omsgid = rmap.get(msgstr, None)
|
|
||||||
if omsgid is not None:
|
|
||||||
cm = langnames_to_langcodes([omsgid, msgid])
|
|
||||||
if cm[msgid] and cm[omsgid] and cm[msgid] != cm[omsgid]:
|
|
||||||
has_errors = True
|
|
||||||
self.iso639_errors.append('In file {} the name {} is used as translation for both {} and {}'.format(
|
|
||||||
os.path.basename(path), msgstr, msgid, rmap[msgstr]))
|
|
||||||
# raise SystemExit(1)
|
|
||||||
rmap[msgstr] = msgid
|
|
||||||
return not has_errors
|
|
||||||
|
|
||||||
def freeze_locales(self):
|
def freeze_locales(self):
|
||||||
zf = self.DEST + '.zip'
|
zf = self.DEST + '.zip'
|
||||||
from calibre import CurrentDir
|
from calibre import CurrentDir
|
||||||
@ -518,10 +471,10 @@ class Translations(POT): # {{{
|
|||||||
stats = {}
|
stats = {}
|
||||||
done = []
|
done = []
|
||||||
|
|
||||||
def handle_stats(src, nums):
|
def handle_stats(src, data):
|
||||||
locale = fmap[src]
|
locale = fmap[src]
|
||||||
trans = nums[0]
|
trans, untrans = data['translated'], data['untranslated']
|
||||||
total = trans if len(nums) == 1 else (trans + nums[1])
|
total = trans + untrans
|
||||||
stats[locale] = int(round(100 * trans / total))
|
stats[locale] = int(round(100 * trans / total))
|
||||||
|
|
||||||
with TemporaryDirectory() as tdir, ZipFile(self.j(srcbase, 'locales.zip'), 'w', ZIP_STORED) as zf:
|
with TemporaryDirectory() as tdir, ZipFile(self.j(srcbase, 'locales.zip'), 'w', ZIP_STORED) as zf:
|
||||||
@ -593,12 +546,11 @@ class Translations(POT): # {{{
|
|||||||
mofile = self.j(dest, po.rpartition('.')[0] + '.mo')
|
mofile = self.j(dest, po.rpartition('.')[0] + '.mo')
|
||||||
files.append((self.j(q, po), mofile))
|
files.append((self.j(q, po), mofile))
|
||||||
|
|
||||||
def handle_stats(src, nums):
|
def handle_stats(src, data):
|
||||||
locale = self.b(self.d(src))
|
locale = self.b(self.d(src))
|
||||||
stats = all_stats[locale]
|
stats = all_stats[locale]
|
||||||
stats['translated'] += nums[0]
|
stats['translated'] += data['translated']
|
||||||
if len(nums) > 1:
|
stats['untranslated'] += data['untranslated']
|
||||||
stats['untranslated'] += nums[1]
|
|
||||||
|
|
||||||
self.compile_group(files, handle_stats=handle_stats)
|
self.compile_group(files, handle_stats=handle_stats)
|
||||||
for locale, stats in iteritems(all_stats):
|
for locale, stats in iteritems(all_stats):
|
||||||
|
@ -37,7 +37,9 @@ from email.parser import HeaderParser
|
|||||||
__version__ = "1.2"
|
__version__ = "1.2"
|
||||||
|
|
||||||
MESSAGES = {}
|
MESSAGES = {}
|
||||||
STATS = {'translated': 0, 'untranslated': 0}
|
STATS = {'translated': 0, 'untranslated': 0, 'uniqified': 0}
|
||||||
|
MAKE_UNIQUE = False
|
||||||
|
NON_UNIQUE = set()
|
||||||
|
|
||||||
|
|
||||||
def usage(code, msg=''):
|
def usage(code, msg=''):
|
||||||
@ -53,6 +55,12 @@ def add(ctxt, msgid, msgstr, fuzzy):
|
|||||||
if msgid:
|
if msgid:
|
||||||
STATS['translated'] += 1
|
STATS['translated'] += 1
|
||||||
if ctxt is None:
|
if ctxt is None:
|
||||||
|
if msgstr in NON_UNIQUE:
|
||||||
|
STATS['uniqified'] += 1
|
||||||
|
if MAKE_UNIQUE:
|
||||||
|
msgstr += b' (' + msgid + b')'
|
||||||
|
else:
|
||||||
|
NON_UNIQUE.add(msgstr)
|
||||||
MESSAGES[msgid] = msgstr
|
MESSAGES[msgid] = msgstr
|
||||||
else:
|
else:
|
||||||
MESSAGES[b"%b\x04%b" % (ctxt, msgid)] = msgstr
|
MESSAGES[b"%b\x04%b" % (ctxt, msgid)] = msgstr
|
||||||
@ -235,9 +243,10 @@ def make(filename, outfile):
|
|||||||
|
|
||||||
def make_with_stats(filename, outfile):
|
def make_with_stats(filename, outfile):
|
||||||
MESSAGES.clear()
|
MESSAGES.clear()
|
||||||
STATS['translated'] = STATS['untranslated'] = 0
|
NON_UNIQUE.clear()
|
||||||
|
STATS['translated'] = STATS['untranslated'] = STATS['uniqified'] = 0
|
||||||
make(filename, outfile)
|
make(filename, outfile)
|
||||||
return STATS['translated'], STATS['untranslated']
|
return STATS.copy()
|
||||||
|
|
||||||
|
|
||||||
def run_batch(pairs):
|
def run_batch(pairs):
|
||||||
@ -246,8 +255,10 @@ def run_batch(pairs):
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
global MAKE_UNIQUE
|
||||||
args = sys.argv[1:]
|
args = sys.argv[1:]
|
||||||
if args == ['STDIN']:
|
if args[0] == 'STDIN':
|
||||||
|
MAKE_UNIQUE = args[1] == 'uniqify'
|
||||||
import json
|
import json
|
||||||
results = tuple(run_batch(json.loads(sys.stdin.buffer.read())))
|
results = tuple(run_batch(json.loads(sys.stdin.buffer.read())))
|
||||||
sys.stdout.buffer.write(json.dumps(results).encode('utf-8'))
|
sys.stdout.buffer.write(json.dumps(results).encode('utf-8'))
|
||||||
@ -279,7 +290,7 @@ def main():
|
|||||||
return
|
return
|
||||||
|
|
||||||
for filename in args:
|
for filename in args:
|
||||||
translated, untranslated = make_with_stats(filename, outfile)
|
make_with_stats(filename, outfile)
|
||||||
if output_stats:
|
if output_stats:
|
||||||
print(STATS['translated'], 'translated messages,', STATS['untranslated'], 'untranslated messages.')
|
print(STATS['translated'], 'translated messages,', STATS['untranslated'], 'untranslated messages.')
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user