mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Speed up compiling translations
Avoids calling a separate msgfmt binary for ever .po file since there are almost 4000 of them. Instead use msgfmt.py
This commit is contained in:
parent
2865326de3
commit
6811bb0cf7
@ -5,45 +5,15 @@
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import subprocess, os
|
import subprocess, os, itertools, json, sys
|
||||||
from multiprocessing.dummy import Pool
|
from multiprocessing.dummy import Pool
|
||||||
|
from threading import Thread
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
|
|
||||||
from setup import iswindows
|
from polyglot.builtins import unicode_type, as_bytes
|
||||||
from polyglot.builtins import unicode_type
|
|
||||||
|
|
||||||
if iswindows:
|
cpu_count = min(16, max(1, os.cpu_count()))
|
||||||
from ctypes import windll, Structure, POINTER, c_size_t
|
|
||||||
from ctypes.wintypes import WORD, DWORD, LPVOID
|
|
||||||
|
|
||||||
class SYSTEM_INFO(Structure):
|
|
||||||
_fields_ = [
|
|
||||||
("wProcessorArchitecture", WORD),
|
|
||||||
("wReserved", WORD),
|
|
||||||
("dwPageSize", DWORD),
|
|
||||||
("lpMinimumApplicationAddress", LPVOID),
|
|
||||||
("lpMaximumApplicationAddress", LPVOID),
|
|
||||||
("dwActiveProcessorMask", c_size_t),
|
|
||||||
("dwNumberOfProcessors", DWORD),
|
|
||||||
("dwProcessorType", DWORD),
|
|
||||||
("dwAllocationGranularity", DWORD),
|
|
||||||
("wProcessorLevel", WORD),
|
|
||||||
("wProcessorRevision", WORD)]
|
|
||||||
gsi = windll.kernel32.GetSystemInfo
|
|
||||||
gsi.argtypes = [POINTER(SYSTEM_INFO)]
|
|
||||||
gsi.restype = None
|
|
||||||
si = SYSTEM_INFO()
|
|
||||||
gsi(si)
|
|
||||||
cpu_count = si.dwNumberOfProcessors
|
|
||||||
else:
|
|
||||||
from multiprocessing import cpu_count
|
|
||||||
try:
|
|
||||||
cpu_count = cpu_count()
|
|
||||||
except NotImplementedError:
|
|
||||||
cpu_count = 1
|
|
||||||
|
|
||||||
cpu_count = min(16, max(1, cpu_count))
|
|
||||||
|
|
||||||
|
|
||||||
def run_worker(job, decorate=True):
|
def run_worker(job, decorate=True):
|
||||||
@ -95,3 +65,44 @@ def parallel_check_output(jobs, log):
|
|||||||
log(stderr)
|
log(stderr)
|
||||||
raise SystemExit(1)
|
raise SystemExit(1)
|
||||||
yield stdout
|
yield stdout
|
||||||
|
|
||||||
|
|
||||||
|
def get_tasks(it, size):
|
||||||
|
it = iter(it)
|
||||||
|
while 1:
|
||||||
|
x = tuple(itertools.islice(it, size))
|
||||||
|
if not x:
|
||||||
|
return
|
||||||
|
yield x
|
||||||
|
|
||||||
|
|
||||||
|
def batched_parallel_jobs(cmd, jobs, cwd=None):
|
||||||
|
chunksize, extra = divmod(len(jobs), cpu_count)
|
||||||
|
if extra:
|
||||||
|
chunksize += 1
|
||||||
|
workers = []
|
||||||
|
|
||||||
|
def get_output(p):
|
||||||
|
p.output = p.communicate(as_bytes(json.dumps(p.jobs_batch)))
|
||||||
|
|
||||||
|
for batch in get_tasks(jobs, chunksize):
|
||||||
|
p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
|
||||||
|
p.jobs_batch = batch
|
||||||
|
p.output_thread = t = Thread(target=get_output, args=(p,))
|
||||||
|
t.daemon = True
|
||||||
|
t.start()
|
||||||
|
workers.append(p)
|
||||||
|
|
||||||
|
failed = False
|
||||||
|
ans = []
|
||||||
|
for p in workers:
|
||||||
|
p.output_thread.join()
|
||||||
|
if p.wait() != 0:
|
||||||
|
sys.stderr.buffer.write(p.output[1])
|
||||||
|
sys.stderr.buffer.flush()
|
||||||
|
failed = True
|
||||||
|
else:
|
||||||
|
ans.extend(json.loads(p.output[0]))
|
||||||
|
if failed:
|
||||||
|
raise SystemExit('Worker process failed')
|
||||||
|
return ans
|
||||||
|
@ -12,7 +12,7 @@ from locale import normalize as normalize_locale
|
|||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from setup import Command, __appname__, __version__, require_git_master, build_cache_dir, edit_file, dump_json
|
from setup import Command, __appname__, __version__, require_git_master, build_cache_dir, edit_file, dump_json
|
||||||
from setup.parallel_build import parallel_check_output
|
from setup.parallel_build import batched_parallel_jobs
|
||||||
from polyglot.builtins import codepoint_to_chr, iteritems, range
|
from polyglot.builtins import codepoint_to_chr, iteritems, range
|
||||||
is_ci = os.environ.get('CI', '').lower() == 'true'
|
is_ci = os.environ.get('CI', '').lower() == 'true'
|
||||||
|
|
||||||
@ -320,8 +320,7 @@ class Translations(POT): # {{{
|
|||||||
self.compile_changelog_translations()
|
self.compile_changelog_translations()
|
||||||
|
|
||||||
def compile_group(self, files, handle_stats=None, file_ok=None, action_per_file=None):
|
def compile_group(self, files, handle_stats=None, file_ok=None, action_per_file=None):
|
||||||
from calibre.constants import islinux
|
ok_files = []
|
||||||
jobs, ok_files = [], []
|
|
||||||
hashmap = {}
|
hashmap = {}
|
||||||
|
|
||||||
def stats_cache(src, data=None):
|
def stats_cache(src, data=None):
|
||||||
@ -349,20 +348,21 @@ class Translations(POT): # {{{
|
|||||||
else:
|
else:
|
||||||
if file_ok is None or file_ok(data, src):
|
if file_ok is None or file_ok(data, src):
|
||||||
# self.info('\t' + os.path.relpath(src, self.j(self.d(self.SRC), 'translations')))
|
# self.info('\t' + os.path.relpath(src, self.j(self.d(self.SRC), 'translations')))
|
||||||
if islinux:
|
|
||||||
msgfmt = ['msgfmt']
|
|
||||||
else:
|
|
||||||
msgfmt = [sys.executable, self.j(self.SRC, 'calibre', 'translations', 'msgfmt.py')]
|
|
||||||
jobs.append(msgfmt + ['--statistics', '-o', dest, src])
|
|
||||||
ok_files.append((src, dest))
|
ok_files.append((src, dest))
|
||||||
hashmap[src] = current_hash
|
hashmap[src] = current_hash
|
||||||
if action_per_file is not None:
|
if action_per_file is not None:
|
||||||
action_per_file(src)
|
action_per_file(src)
|
||||||
|
|
||||||
self.info(f'\tCompiling {len(jobs)} files')
|
self.info(f'\tCompiling {len(ok_files)} files')
|
||||||
for (src, dest), line in zip(ok_files, parallel_check_output(jobs, self.info)):
|
items = []
|
||||||
|
results = batched_parallel_jobs(
|
||||||
|
[sys.executable, self.j(self.SRC, 'calibre', 'translations', 'msgfmt.py'), 'STDIN'],
|
||||||
|
ok_files)
|
||||||
|
for (src, dest), nums in zip(ok_files, results):
|
||||||
|
items.append((src, dest, nums))
|
||||||
|
|
||||||
|
for (src, dest, nums) in items:
|
||||||
self.write_cache(open(dest, 'rb').read(), hashmap[src], src)
|
self.write_cache(open(dest, 'rb').read(), hashmap[src], src)
|
||||||
nums = tuple(map(int, re.findall(r'\d+', line)))
|
|
||||||
stats_cache(src, nums)
|
stats_cache(src, nums)
|
||||||
if handle_stats is not None:
|
if handle_stats is not None:
|
||||||
handle_stats(src, nums)
|
handle_stats(src, nums)
|
||||||
|
@ -50,7 +50,6 @@ def usage(code, msg=''):
|
|||||||
|
|
||||||
def add(ctxt, id, str, fuzzy):
|
def add(ctxt, id, str, fuzzy):
|
||||||
"Add a non-fuzzy translation to the dictionary."
|
"Add a non-fuzzy translation to the dictionary."
|
||||||
global MESSAGES
|
|
||||||
if not fuzzy and str:
|
if not fuzzy and str:
|
||||||
if id:
|
if id:
|
||||||
STATS['translated'] += 1
|
STATS['translated'] += 1
|
||||||
@ -65,7 +64,6 @@ def add(ctxt, id, str, fuzzy):
|
|||||||
|
|
||||||
def generate():
|
def generate():
|
||||||
"Return the generated output."
|
"Return the generated output."
|
||||||
global MESSAGES
|
|
||||||
# the keys are sorted in the .mo file
|
# the keys are sorted in the .mo file
|
||||||
keys = sorted(MESSAGES.keys())
|
keys = sorted(MESSAGES.keys())
|
||||||
offsets = []
|
offsets = []
|
||||||
@ -236,9 +234,28 @@ def make(filename, outfile):
|
|||||||
print(msg, file=sys.stderr)
|
print(msg, file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
def make_with_stats(filename, outfile):
|
||||||
|
MESSAGES.clear()
|
||||||
|
STATS['translated'] = STATS['untranslated'] = 0
|
||||||
|
make(filename, outfile)
|
||||||
|
return STATS['translated'], STATS['untranslated']
|
||||||
|
|
||||||
|
|
||||||
|
def run_batch(pairs):
|
||||||
|
for (filename, outfile) in pairs:
|
||||||
|
yield make_with_stats(filename, outfile)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
args = sys.argv[1:]
|
||||||
|
if args == ['STDIN']:
|
||||||
|
import json
|
||||||
|
results = tuple(run_batch(json.loads(sys.stdin.buffer.read())))
|
||||||
|
sys.stdout.buffer.write(json.dumps(results).encode('utf-8'))
|
||||||
|
sys.stdout.close()
|
||||||
|
return
|
||||||
try:
|
try:
|
||||||
opts, args = getopt.getopt(sys.argv[1:], 'hVso:',
|
opts, args = getopt.getopt(args, 'hVso:',
|
||||||
['help', 'version', 'statistics', 'output-file='])
|
['help', 'version', 'statistics', 'output-file='])
|
||||||
except getopt.error as msg:
|
except getopt.error as msg:
|
||||||
usage(1, msg)
|
usage(1, msg)
|
||||||
@ -263,8 +280,7 @@ def main():
|
|||||||
return
|
return
|
||||||
|
|
||||||
for filename in args:
|
for filename in args:
|
||||||
STATS['translated'] = STATS['untranslated'] = 0
|
translated, untranslated = make_with_stats(filename, outfile)
|
||||||
make(filename, outfile)
|
|
||||||
if output_stats:
|
if output_stats:
|
||||||
print(STATS['translated'], 'translated messages,', STATS['untranslated'], 'untranslated messages.')
|
print(STATS['translated'], 'translated messages,', STATS['untranslated'], 'untranslated messages.')
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user