mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Refactor the translations builder to use a cache
This commit is contained in:
parent
bda0e2d812
commit
a545fc497d
@ -6,12 +6,12 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, tempfile, shutil, subprocess, glob, re, time, textwrap, cPickle, shlex, json, errno
|
import os, tempfile, shutil, subprocess, glob, re, time, textwrap, cPickle, shlex, json, errno, hashlib
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from locale import normalize as normalize_locale
|
from locale import normalize as normalize_locale
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from setup import Command, __appname__, __version__, require_git_master
|
from setup import Command, __appname__, __version__, require_git_master, build_cache_dir
|
||||||
from setup.parallel_build import parallel_check_output
|
from setup.parallel_build import parallel_check_output
|
||||||
|
|
||||||
def qt_sources():
|
def qt_sources():
|
||||||
@ -207,6 +207,39 @@ class Translations(POT): # {{{
|
|||||||
DEST = os.path.join(os.path.dirname(POT.SRC), 'resources', 'localization',
|
DEST = os.path.join(os.path.dirname(POT.SRC), 'resources', 'localization',
|
||||||
'locales')
|
'locales')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def cache_dir(self):
|
||||||
|
ans = self.j(build_cache_dir(), 'translations')
|
||||||
|
if not hasattr(self, 'cache_dir_created'):
|
||||||
|
self.cache_dir_created = True
|
||||||
|
try:
|
||||||
|
os.mkdir(ans)
|
||||||
|
except EnvironmentError as err:
|
||||||
|
if err.errno != errno.EEXIST:
|
||||||
|
raise
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def cache_name(self, f):
|
||||||
|
f = os.path.relpath(f, self.d(self.SRC))
|
||||||
|
return f.replace(os.sep, '.').replace('/', '.').lstrip('.')
|
||||||
|
|
||||||
|
def read_cache(self, f):
|
||||||
|
cname = self.cache_name(f)
|
||||||
|
try:
|
||||||
|
with open(self.j(self.cache_dir, cname), 'rb') as f:
|
||||||
|
data = f.read()
|
||||||
|
return data[:20], data[20:]
|
||||||
|
except EnvironmentError as err:
|
||||||
|
if err.errno != errno.ENOENT:
|
||||||
|
raise
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
def write_cache(self, data, h, f):
|
||||||
|
cname = self.cache_name(f)
|
||||||
|
assert len(h) == 20
|
||||||
|
with open(self.j(self.cache_dir, cname), 'wb') as f:
|
||||||
|
f.write(h), f.write(data)
|
||||||
|
|
||||||
def po_files(self):
|
def po_files(self):
|
||||||
return glob.glob(os.path.join(self.TRANSLATIONS, __appname__, '*.po'))
|
return glob.glob(os.path.join(self.TRANSLATIONS, __appname__, '*.po'))
|
||||||
|
|
||||||
@ -215,75 +248,144 @@ class Translations(POT): # {{{
|
|||||||
return locale, os.path.join(self.DEST, locale, 'messages.mo')
|
return locale, os.path.join(self.DEST, locale, 'messages.mo')
|
||||||
|
|
||||||
def run(self, opts):
|
def run(self, opts):
|
||||||
self.compile_content_server_translations()
|
|
||||||
self.compile_main_translations()
|
self.compile_main_translations()
|
||||||
self.write_stats()
|
self.compile_content_server_translations()
|
||||||
self.freeze_locales()
|
self.freeze_locales()
|
||||||
self.compile_user_manual_translations()
|
self.compile_user_manual_translations()
|
||||||
|
|
||||||
|
def compile_group(self, files, handle_stats=None, file_ok=None, action_per_file=None):
|
||||||
|
jobs, ok_files = [], []
|
||||||
|
hashmap = {}
|
||||||
|
|
||||||
|
def stats_cache(src, data=None):
|
||||||
|
cname = self.cache_name(src) + '.stats.json'
|
||||||
|
with open(self.j(build_cache_dir(), cname), ('rb' if data is None else 'wb')) as f:
|
||||||
|
if data is None:
|
||||||
|
return json.load(f)
|
||||||
|
json.dump(data, f)
|
||||||
|
|
||||||
|
for src, dest in files:
|
||||||
|
base = os.path.dirname(dest)
|
||||||
|
if not os.path.exists(base):
|
||||||
|
os.makedirs(base)
|
||||||
|
data, current_hash = self.hash_and_data(src)
|
||||||
|
saved_hash, saved_data = self.read_cache(src)
|
||||||
|
if current_hash == saved_hash:
|
||||||
|
with open(dest, 'wb') as d:
|
||||||
|
d.write(saved_data)
|
||||||
|
if handle_stats is not None:
|
||||||
|
handle_stats(src, stats_cache(src))
|
||||||
|
else:
|
||||||
|
if file_ok is None or file_ok(data, src):
|
||||||
|
self.info('\t' + os.path.relpath(src, self.j(self.d(self.SRC), 'translations')))
|
||||||
|
jobs.append(['msgfmt', '--statistics', '-o', dest, src])
|
||||||
|
ok_files.append((src, dest))
|
||||||
|
hashmap[src] = current_hash
|
||||||
|
if action_per_file is not None:
|
||||||
|
action_per_file(src)
|
||||||
|
|
||||||
|
for (src, dest), line in zip(ok_files, parallel_check_output(jobs, self.info)):
|
||||||
|
self.write_cache(open(dest, 'rb').read(), hashmap[src], src)
|
||||||
|
nums = tuple(map(int, re.findall(r'\d+', line)))
|
||||||
|
stats_cache(src, nums)
|
||||||
|
if handle_stats is not None:
|
||||||
|
handle_stats(src, nums)
|
||||||
|
|
||||||
def compile_main_translations(self):
|
def compile_main_translations(self):
|
||||||
l = {}
|
l = {}
|
||||||
lc_dataf = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'lc_data.py')
|
lc_dataf = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'lc_data.py')
|
||||||
exec(compile(open(lc_dataf, 'rb').read(), lc_dataf, 'exec'), l, l)
|
exec(compile(open(lc_dataf, 'rb').read(), lc_dataf, 'exec'), l, l)
|
||||||
lcdata = {k:{k1:v1 for k1, v1 in v} for k, v in l['data']}
|
lcdata = {k:{k1:v1 for k1, v1 in v} for k, v in l['data']}
|
||||||
self.iso639_errors = []
|
self.iso639_errors = []
|
||||||
jobs = []
|
self.info('Compiling main UI translation files...')
|
||||||
for f in self.po_files():
|
fmap = {f:self.mo_file(f) for f in self.po_files()}
|
||||||
locale, dest = self.mo_file(f)
|
files = [(f, fmap[f][1]) for f in self.po_files()]
|
||||||
base = os.path.dirname(dest)
|
|
||||||
if not os.path.exists(base):
|
|
||||||
os.makedirs(base)
|
|
||||||
jobs.append(['msgfmt', '-o', dest, f])
|
|
||||||
iscpo = {'bn':'bn_IN', 'zh_HK':'zh_CN'}.get(locale, locale)
|
|
||||||
iso639 = self.j(self.TRANSLATIONS, 'iso_639', '%s.po'%iscpo)
|
|
||||||
|
|
||||||
if os.path.exists(iso639) and self.check_iso639(iso639):
|
|
||||||
dest = self.j(self.d(dest), 'iso639.mo')
|
|
||||||
if self.newer(dest, iso639):
|
|
||||||
jobs.append(['msgfmt', '-o', dest, iso639])
|
|
||||||
elif locale not in {
|
|
||||||
'en_GB', 'en_CA', 'en_AU', 'si', 'ur', 'sc', 'ltg', 'nds',
|
|
||||||
'te', 'yi', 'fo', 'sq', 'ast', 'ml', 'ku', 'fr_CA', 'him',
|
|
||||||
'jv', 'ka', 'fur', 'ber', 'my', 'fil', 'hy', 'ug'}:
|
|
||||||
self.warn('No ISO 639 translations for locale:', locale)
|
|
||||||
|
|
||||||
|
def action_per_file(f):
|
||||||
|
locale, dest = fmap[f]
|
||||||
ln = normalize_locale(locale).partition('.')[0]
|
ln = normalize_locale(locale).partition('.')[0]
|
||||||
if ln in lcdata:
|
if ln in lcdata:
|
||||||
ld = lcdata[ln]
|
ld = lcdata[ln]
|
||||||
lcdest = self.j(self.d(dest), 'lcdata.pickle')
|
lcdest = self.j(self.d(dest), 'lcdata.pickle')
|
||||||
with open(lcdest, 'wb') as lcf:
|
with open(lcdest, 'wb') as lcf:
|
||||||
lcf.write(cPickle.dumps(ld, -1))
|
lcf.write(cPickle.dumps(ld, -1))
|
||||||
self.info('\nCompiling %d translation files...' % len(jobs))
|
|
||||||
tuple(parallel_check_output(jobs, self.info))
|
stats = {}
|
||||||
|
def handle_stats(f, nums):
|
||||||
|
trans = nums[0]
|
||||||
|
total = trans if len(nums) == 1 else (trans + nums[1])
|
||||||
|
locale = fmap[f][0]
|
||||||
|
stats[locale] = min(1.0, float(trans)/total)
|
||||||
|
|
||||||
|
self.compile_group(files, handle_stats=handle_stats, action_per_file=action_per_file)
|
||||||
|
self.info('Compiling ISO639 files...')
|
||||||
|
|
||||||
|
files = []
|
||||||
|
skip_iso = {
|
||||||
|
'en_GB', 'en_CA', 'en_AU', 'si', 'ur', 'sc', 'ltg', 'nds',
|
||||||
|
'te', 'yi', 'fo', 'sq', 'ast', 'ml', 'ku', 'fr_CA', 'him',
|
||||||
|
'jv', 'ka', 'fur', 'ber', 'my', 'fil', 'hy', 'ug'}
|
||||||
|
for f, (locale, dest) in fmap.iteritems():
|
||||||
|
iscpo = {'bn':'bn_IN', 'zh_HK':'zh_CN'}.get(locale, locale)
|
||||||
|
iso639 = self.j(self.TRANSLATIONS, 'iso_639', '%s.po'%iscpo)
|
||||||
|
if os.path.exists(iso639):
|
||||||
|
files.append((iso639, self.j(self.d(dest), 'iso639.mo')))
|
||||||
|
elif locale not in skip_iso:
|
||||||
|
self.warn('No ISO 639 translations for locale:', locale)
|
||||||
|
self.compile_group(files, file_ok=self.check_iso639)
|
||||||
|
|
||||||
if self.iso639_errors:
|
if self.iso639_errors:
|
||||||
for err in self.iso639_errors:
|
for err in self.iso639_errors:
|
||||||
print (err)
|
print (err)
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
dest = self.stats
|
||||||
|
base = self.d(dest)
|
||||||
|
try:
|
||||||
|
os.mkdir(base)
|
||||||
|
except EnvironmentError as err:
|
||||||
|
if err.errno != errno.EEXIST:
|
||||||
|
raise
|
||||||
|
cPickle.dump(stats, open(dest, 'wb'), -1)
|
||||||
|
|
||||||
|
def hash_and_data(self, f):
|
||||||
|
with open(f, 'rb') as s:
|
||||||
|
data = s.read()
|
||||||
|
h = hashlib.sha1(data)
|
||||||
|
h.update(f.encode('utf-8'))
|
||||||
|
return data, h.digest()
|
||||||
|
|
||||||
def compile_content_server_translations(self):
|
def compile_content_server_translations(self):
|
||||||
self.info('\nCompiling content-server translations')
|
self.info('Compiling content-server translations')
|
||||||
from calibre.utils.rapydscript import msgfmt
|
from calibre.utils.rapydscript import msgfmt
|
||||||
from calibre.utils.zipfile import ZipFile, ZIP_DEFLATED, ZipInfo
|
from calibre.utils.zipfile import ZipFile, ZIP_DEFLATED, ZipInfo
|
||||||
with ZipFile(self.j(self.RESOURCES, 'content-server', 'locales.zip'), 'w', ZIP_DEFLATED) as zf:
|
with ZipFile(self.j(self.RESOURCES, 'content-server', 'locales.zip'), 'w', ZIP_DEFLATED) as zf:
|
||||||
for src in glob.glob(os.path.join(self.TRANSLATIONS, 'content-server', '*.po')):
|
for src in glob.glob(os.path.join(self.TRANSLATIONS, 'content-server', '*.po')):
|
||||||
with open(src, 'rb') as f:
|
data, current_hash = self.hash_and_data(src)
|
||||||
po_data = f.read().decode('utf-8')
|
saved_hash, saved_data = self.read_cache(src)
|
||||||
data = json.loads(msgfmt(po_data))
|
if current_hash == saved_hash:
|
||||||
translated_entries = {k:v for k, v in data['entries'].iteritems() if v and sum(map(len, v))}
|
raw = saved_data
|
||||||
data['entries'] = translated_entries
|
else:
|
||||||
if translated_entries:
|
self.info('\tParsing ' + os.path.basename(src))
|
||||||
raw = json.dumps(data, ensure_ascii=False, sort_keys=True)
|
raw = None
|
||||||
if isinstance(raw, type(u'')):
|
po_data = data.decode('utf-8')
|
||||||
raw = raw.encode('utf-8')
|
data = json.loads(msgfmt(po_data))
|
||||||
|
translated_entries = {k:v for k, v in data['entries'].iteritems() if v and sum(map(len, v))}
|
||||||
|
data['entries'] = translated_entries
|
||||||
|
cdata = b'{}'
|
||||||
|
if translated_entries:
|
||||||
|
raw = json.dumps(data, ensure_ascii=False, sort_keys=True)
|
||||||
|
if isinstance(raw, type(u'')):
|
||||||
|
raw = raw.encode('utf-8')
|
||||||
|
cdata = raw
|
||||||
|
self.write_cache(cdata, current_hash, src)
|
||||||
|
if raw:
|
||||||
zi = ZipInfo(os.path.basename(src).rpartition('.')[0])
|
zi = ZipInfo(os.path.basename(src).rpartition('.')[0])
|
||||||
zi.compress_type = ZIP_DEFLATED
|
zi.compress_type = ZIP_DEFLATED
|
||||||
zf.writestr(zi, raw)
|
zf.writestr(zi, raw)
|
||||||
|
|
||||||
def check_iso639(self, path):
|
def check_iso639(self, raw, path):
|
||||||
from calibre.utils.localization import langnames_to_langcodes
|
from calibre.utils.localization import langnames_to_langcodes
|
||||||
with open(path, 'rb') as f:
|
|
||||||
raw = f.read()
|
|
||||||
rmap = {}
|
rmap = {}
|
||||||
msgid = None
|
msgid = None
|
||||||
has_errors = False
|
has_errors = False
|
||||||
@ -318,36 +420,13 @@ class Translations(POT): # {{{
|
|||||||
def stats(self):
|
def stats(self):
|
||||||
return self.j(self.d(self.DEST), 'stats.pickle')
|
return self.j(self.d(self.DEST), 'stats.pickle')
|
||||||
|
|
||||||
def write_stats(self):
|
|
||||||
files = self.po_files()
|
|
||||||
dest = self.stats
|
|
||||||
if not self.newer(dest, files):
|
|
||||||
return
|
|
||||||
self.info('Calculating translation statistics...')
|
|
||||||
stats = {}
|
|
||||||
jobs = (
|
|
||||||
['msgfmt', '--statistics', '-o', os.devnull, x] for x in files
|
|
||||||
)
|
|
||||||
for f, line in zip(files, parallel_check_output(jobs, self.info)):
|
|
||||||
nums = tuple(map(int, re.findall(r'\d+', line)))
|
|
||||||
trans = nums[0]
|
|
||||||
total = trans if len(nums) == 1 else (trans + nums[1])
|
|
||||||
locale = self.mo_file(f)[0]
|
|
||||||
stats[locale] = min(1.0, float(trans)/total)
|
|
||||||
|
|
||||||
base = self.d(dest)
|
|
||||||
try:
|
|
||||||
os.mkdir(base)
|
|
||||||
except EnvironmentError as err:
|
|
||||||
if err.errno != errno.EEXIST:
|
|
||||||
raise
|
|
||||||
cPickle.dump(stats, open(dest, 'wb'), -1)
|
|
||||||
|
|
||||||
def compile_user_manual_translations(self):
|
def compile_user_manual_translations(self):
|
||||||
self.info('Compiling user manual translations...')
|
self.info('Compiling user manual translations...')
|
||||||
srcbase = self.j(self.d(self.SRC), 'translations', 'manual')
|
srcbase = self.j(self.d(self.SRC), 'translations', 'manual')
|
||||||
destbase = self.j(self.d(self.SRC), 'manual', 'locale')
|
destbase = self.j(self.d(self.SRC), 'manual', 'locale')
|
||||||
complete = {}
|
complete = {}
|
||||||
|
all_stats = defaultdict(lambda : {'translated': 0, 'untranslated': 0})
|
||||||
|
files = []
|
||||||
for x in os.listdir(srcbase):
|
for x in os.listdir(srcbase):
|
||||||
q = self.j(srcbase, x)
|
q = self.j(srcbase, x)
|
||||||
if not os.path.isdir(q):
|
if not os.path.isdir(q):
|
||||||
@ -356,27 +435,27 @@ class Translations(POT): # {{{
|
|||||||
if os.path.exists(dest):
|
if os.path.exists(dest):
|
||||||
shutil.rmtree(dest)
|
shutil.rmtree(dest)
|
||||||
os.makedirs(dest)
|
os.makedirs(dest)
|
||||||
jobs = []
|
|
||||||
for po in os.listdir(q):
|
for po in os.listdir(q):
|
||||||
if not po.endswith('.po'):
|
if not po.endswith('.po'):
|
||||||
continue
|
continue
|
||||||
jobs.append([
|
mofile = self.j(dest, po.rpartition('.')[0] + '.mo')
|
||||||
'msgfmt', '--statistics', '-o', self.j(
|
files.append((self.j(q, po), mofile))
|
||||||
dest, po.rpartition('.')[0] + '.mo'), self.j(q, po)])
|
|
||||||
stats = tuple(parallel_check_output(jobs, self.info))
|
def handle_stats(src, nums):
|
||||||
translated = untranslated = 0
|
locale = self.b(self.d(src))
|
||||||
for line in stats:
|
stats = all_stats[locale]
|
||||||
nums = tuple(map(int, re.findall(r'\d+', line)))
|
stats['translated'] += nums[0]
|
||||||
translated += nums[0]
|
if len(nums) > 1:
|
||||||
if len(nums) > 1:
|
stats['untranslated'] += nums[1]
|
||||||
untranslated += nums[1]
|
|
||||||
stats = {'translated':translated, 'untranslated':untranslated}
|
self.compile_group(files, handle_stats=handle_stats)
|
||||||
with open(self.j(self.d(dest), 'stats.json'), 'wb') as f:
|
for locale, stats in all_stats.iteritems():
|
||||||
|
with open(self.j(srcbase, locale, 'stats.json'), 'wb') as f:
|
||||||
json.dump(stats, f)
|
json.dump(stats, f)
|
||||||
total = translated + untranslated
|
total = stats['translated'] + stats['untranslated']
|
||||||
# Raise the 30% threshold in the future
|
# Raise the 30% threshold in the future
|
||||||
if total and (translated / float(total)) > 0.3:
|
if total and (stats['translated'] / float(total)) > 0.3:
|
||||||
complete[x] = stats
|
complete[locale] = stats
|
||||||
with open(self.j(destbase, 'completed.json'), 'wb') as f:
|
with open(self.j(destbase, 'completed.json'), 'wb') as f:
|
||||||
json.dump(complete, f, indent=True, sort_keys=True)
|
json.dump(complete, f, indent=True, sort_keys=True)
|
||||||
|
|
||||||
@ -389,6 +468,7 @@ class Translations(POT): # {{{
|
|||||||
destbase = self.j(self.d(self.SRC), 'manual', 'locale')
|
destbase = self.j(self.d(self.SRC), 'manual', 'locale')
|
||||||
if os.path.exists(destbase):
|
if os.path.exists(destbase):
|
||||||
shutil.rmtree(destbase)
|
shutil.rmtree(destbase)
|
||||||
|
shutil.rmtree(self.cache_dir)
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user