mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Avoid re-parsing the OPF file in worker processes
This commit is contained in:
parent
36d5dee047
commit
9a44ef24ff
@ -298,10 +298,8 @@ class Container(ContainerBase): # {{{
|
|||||||
# some epubs include the opf in the manifest with an incorrect mime type
|
# some epubs include the opf in the manifest with an incorrect mime type
|
||||||
self.mime_map[name] = item.get('media-type')
|
self.mime_map[name] = item.get('media-type')
|
||||||
|
|
||||||
def clone_data(self, dest_dir):
|
def data_for_clone(self, dest_dir=None):
|
||||||
Container.commit(self, keep_parsed=True)
|
dest_dir = dest_dir or self.root
|
||||||
self.cloned = True
|
|
||||||
clone_dir(self.root, dest_dir)
|
|
||||||
return {
|
return {
|
||||||
'root': dest_dir,
|
'root': dest_dir,
|
||||||
'opf_name': self.opf_name,
|
'opf_name': self.opf_name,
|
||||||
@ -314,6 +312,12 @@ class Container(ContainerBase): # {{{
|
|||||||
for name, path in iteritems(self.name_path_map)}
|
for name, path in iteritems(self.name_path_map)}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def clone_data(self, dest_dir):
|
||||||
|
Container.commit(self, keep_parsed=True)
|
||||||
|
self.cloned = True
|
||||||
|
clone_dir(self.root, dest_dir)
|
||||||
|
return self.data_for_clone()
|
||||||
|
|
||||||
def add_name_to_manifest(self, name, process_manifest_item=None):
|
def add_name_to_manifest(self, name, process_manifest_item=None):
|
||||||
' Add an entry to the manifest for a file with the specified name. Returns the manifest id. '
|
' Add an entry to the manifest for a file with the specified name. Returns the manifest id. '
|
||||||
all_ids = {x.get('id') for x in self.opf_xpath('//*[@id]')}
|
all_ids = {x.get('id') for x in self.opf_xpath('//*[@id]')}
|
||||||
|
@ -43,15 +43,15 @@ from calibre.utils.date import EPOCH
|
|||||||
from calibre.utils.ipc.simple_worker import start_pipe_worker
|
from calibre.utils.ipc.simple_worker import start_pipe_worker
|
||||||
from calibre.utils.iso8601 import parse_iso8601
|
from calibre.utils.iso8601 import parse_iso8601
|
||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
from calibre.utils.serialize import json_loads
|
from calibre.utils.serialize import (
|
||||||
|
json_dumps, json_loads, msgpack_dumps, msgpack_loads
|
||||||
|
)
|
||||||
from calibre.utils.short_uuid import uuid4
|
from calibre.utils.short_uuid import uuid4
|
||||||
from polyglot.binary import (
|
from polyglot.binary import (
|
||||||
as_base64_unicode as encode_component, from_base64_bytes,
|
as_base64_unicode as encode_component, from_base64_bytes,
|
||||||
from_base64_unicode as decode_component
|
from_base64_unicode as decode_component
|
||||||
)
|
)
|
||||||
from polyglot.builtins import (
|
from polyglot.builtins import as_bytes, is_py3, iteritems, map, unicode_type
|
||||||
as_bytes, is_py3, iteritems, itervalues, map, unicode_type
|
|
||||||
)
|
|
||||||
from polyglot.urllib import quote, urlparse
|
from polyglot.urllib import quote, urlparse
|
||||||
|
|
||||||
RENDER_VERSION = 1
|
RENDER_VERSION = 1
|
||||||
@ -460,7 +460,7 @@ class RenderManager(object):
|
|||||||
|
|
||||||
group_sz = int(ceil(len(names) / num_workers))
|
group_sz = int(ceil(len(names) / num_workers))
|
||||||
for group, worker in zip(grouper(group_sz, names), self.workers):
|
for group, worker in zip(grouper(group_sz, names), self.workers):
|
||||||
worker.stdin.write(as_bytes(json.dumps((worker.output_path, group,) + args)))
|
worker.stdin.write(as_bytes(msgpack_dumps((worker.output_path, group,) + args)))
|
||||||
worker.stdin.flush(), worker.stdin.close()
|
worker.stdin.flush(), worker.stdin.close()
|
||||||
worker.job_sent = True
|
worker.job_sent = True
|
||||||
|
|
||||||
@ -479,7 +479,7 @@ class RenderManager(object):
|
|||||||
error = f.read().decode('utf-8', 'replace')
|
error = f.read().decode('utf-8', 'replace')
|
||||||
else:
|
else:
|
||||||
with lopen(worker.output_path, 'rb') as f:
|
with lopen(worker.output_path, 'rb') as f:
|
||||||
results.append(json.loads(f.read()))
|
results.append(msgpack_loads(f.read()))
|
||||||
if error is not None:
|
if error is not None:
|
||||||
raise Exception('Render worker failed with error:\n' + error)
|
raise Exception('Render worker failed with error:\n' + error)
|
||||||
return results
|
return results
|
||||||
@ -490,10 +490,10 @@ def worker_main():
|
|||||||
raw = stdin.read()
|
raw = stdin.read()
|
||||||
if raw == b'_':
|
if raw == b'_':
|
||||||
return
|
return
|
||||||
args = json.loads(raw)
|
args = msgpack_loads(raw)
|
||||||
result = process_book_files(*args[1:])
|
result = process_book_files(*args[1:])
|
||||||
with open(args[0], 'wb') as f:
|
with open(args[0], 'wb') as f:
|
||||||
f.write(as_bytes(json.dumps(result)))
|
f.write(as_bytes(msgpack_dumps(result)))
|
||||||
|
|
||||||
|
|
||||||
def virtualize_html(container, name, link_uid, link_to_map, virtualized_names):
|
def virtualize_html(container, name, link_uid, link_to_map, virtualized_names):
|
||||||
@ -520,8 +520,9 @@ def virtualize_html(container, name, link_uid, link_to_map, virtualized_names):
|
|||||||
return name in changed
|
return name in changed
|
||||||
|
|
||||||
|
|
||||||
def process_book_files(names, container_dir, opfpath, virtualize_resources, link_uid, container=None):
|
def process_book_files(names, container_dir, opfpath, virtualize_resources, link_uid, data_for_clone, container=None):
|
||||||
container = container or SimpleContainer(container_dir, opfpath, default_log)
|
if container is None:
|
||||||
|
container = SimpleContainer(container_dir, opfpath, default_log, clone_data=data_for_clone)
|
||||||
link_to_map = {}
|
link_to_map = {}
|
||||||
html_data = {}
|
html_data = {}
|
||||||
virtualized_names = set()
|
virtualized_names = set()
|
||||||
@ -541,10 +542,7 @@ def process_book_files(names, container_dir, opfpath, virtualize_resources, link
|
|||||||
transform_style_sheet(container, name, link_uid, virtualize_resources, virtualized_names)
|
transform_style_sheet(container, name, link_uid, virtualize_resources, virtualized_names)
|
||||||
elif mt == 'image/svg+xml':
|
elif mt == 'image/svg+xml':
|
||||||
transform_svg_image(container, name, link_uid, virtualize_resources, virtualized_names)
|
transform_svg_image(container, name, link_uid, virtualize_resources, virtualized_names)
|
||||||
for v in itervalues(link_to_map):
|
return link_to_map, html_data, virtualized_names
|
||||||
for k in v:
|
|
||||||
v[k] = tuple(v[k])
|
|
||||||
return link_to_map, html_data, tuple(virtualized_names)
|
|
||||||
|
|
||||||
|
|
||||||
def process_exploded_book(
|
def process_exploded_book(
|
||||||
@ -607,7 +605,11 @@ def process_exploded_book(
|
|||||||
(n for n, mt in iteritems(container.mime_map) if mt in OEB_STYLES or mt in OEB_DOCS or mt == 'image/svg+xml'),
|
(n for n, mt in iteritems(container.mime_map) if mt in OEB_STYLES or mt in OEB_DOCS or mt == 'image/svg+xml'),
|
||||||
key=work_priority)
|
key=work_priority)
|
||||||
|
|
||||||
results = render_manager(names, (tdir, opfpath, virtualize_resources, book_render_data['link_uid']), container)
|
results = render_manager(
|
||||||
|
names, (
|
||||||
|
tdir, opfpath, virtualize_resources, book_render_data['link_uid'], container.data_for_clone()
|
||||||
|
), container
|
||||||
|
)
|
||||||
ltm = book_render_data['link_to_map']
|
ltm = book_render_data['link_to_map']
|
||||||
html_data = {}
|
html_data = {}
|
||||||
virtualized_names = set()
|
virtualized_names = set()
|
||||||
@ -621,10 +623,8 @@ def process_exploded_book(
|
|||||||
|
|
||||||
for link_to_map, hdata, vnames in results:
|
for link_to_map, hdata, vnames in results:
|
||||||
html_data.update(hdata)
|
html_data.update(hdata)
|
||||||
virtualized_names |= set(vnames)
|
virtualized_names |= vnames
|
||||||
for k, v in iteritems(link_to_map):
|
for k, v in iteritems(link_to_map):
|
||||||
for x in v:
|
|
||||||
v[x] = set(v[x])
|
|
||||||
if k in ltm:
|
if k in ltm:
|
||||||
merge_ltm(ltm[k], v)
|
merge_ltm(ltm[k], v)
|
||||||
else:
|
else:
|
||||||
@ -838,7 +838,6 @@ def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, ex
|
|||||||
book_metadata=mi, virtualize_resources=virtualize_resources
|
book_metadata=mi, virtualize_resources=virtualize_resources
|
||||||
)
|
)
|
||||||
if serialize_metadata:
|
if serialize_metadata:
|
||||||
from calibre.utils.serialize import json_dumps
|
|
||||||
from calibre.ebooks.metadata.book.serialize import metadata_as_dict
|
from calibre.ebooks.metadata.book.serialize import metadata_as_dict
|
||||||
d = metadata_as_dict(mi)
|
d = metadata_as_dict(mi)
|
||||||
d.pop('cover_data', None)
|
d.pop('cover_data', None)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user