From 9a44ef24ffe5f232dd7c2eaa83a987fc7eb500ce Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 26 Oct 2019 13:35:07 +0530
Subject: [PATCH] Avoid re-parsing the OPF file in worker processes

---
 src/calibre/ebooks/oeb/polish/container.py | 12 ++++---
 src/calibre/srv/render_book.py             | 37 +++++++++++-----------
 2 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/src/calibre/ebooks/oeb/polish/container.py b/src/calibre/ebooks/oeb/polish/container.py
index 0fdc11309f..79b4279e6f 100644
--- a/src/calibre/ebooks/oeb/polish/container.py
+++ b/src/calibre/ebooks/oeb/polish/container.py
@@ -298,10 +298,8 @@ class Container(ContainerBase):  # {{{
                 # some epubs include the opf in the manifest with an incorrect mime type
                 self.mime_map[name] = item.get('media-type')
 
-    def clone_data(self, dest_dir):
-        Container.commit(self, keep_parsed=True)
-        self.cloned = True
-        clone_dir(self.root, dest_dir)
+    def data_for_clone(self, dest_dir=None):
+        dest_dir = dest_dir or self.root
         return {
             'root': dest_dir,
             'opf_name': self.opf_name,
@@ -314,6 +312,12 @@ class Container(ContainerBase):  # {{{
                 for name, path in iteritems(self.name_path_map)}
         }
 
+    def clone_data(self, dest_dir):
+        Container.commit(self, keep_parsed=True)
+        self.cloned = True
+        clone_dir(self.root, dest_dir)
+        return self.data_for_clone()
+
     def add_name_to_manifest(self, name, process_manifest_item=None):
         ' Add an entry to the manifest for a file with the specified name. Returns the manifest id. '
         all_ids = {x.get('id') for x in self.opf_xpath('//*[@id]')}
diff --git a/src/calibre/srv/render_book.py b/src/calibre/srv/render_book.py
index 69d6f24f9b..2e8c4bb1af 100644
--- a/src/calibre/srv/render_book.py
+++ b/src/calibre/srv/render_book.py
@@ -43,15 +43,15 @@ from calibre.utils.date import EPOCH
 from calibre.utils.ipc.simple_worker import start_pipe_worker
 from calibre.utils.iso8601 import parse_iso8601
 from calibre.utils.logging import default_log
-from calibre.utils.serialize import json_loads
+from calibre.utils.serialize import (
+    json_dumps, json_loads, msgpack_dumps, msgpack_loads
+)
 from calibre.utils.short_uuid import uuid4
 from polyglot.binary import (
     as_base64_unicode as encode_component, from_base64_bytes,
     from_base64_unicode as decode_component
 )
-from polyglot.builtins import (
-    as_bytes, is_py3, iteritems, itervalues, map, unicode_type
-)
+from polyglot.builtins import as_bytes, is_py3, iteritems, map, unicode_type
 from polyglot.urllib import quote, urlparse
 
 RENDER_VERSION = 1
@@ -460,7 +460,7 @@ class RenderManager(object):
 
         group_sz = int(ceil(len(names) / num_workers))
         for group, worker in zip(grouper(group_sz, names), self.workers):
-            worker.stdin.write(as_bytes(json.dumps((worker.output_path, group,) + args)))
+            worker.stdin.write(as_bytes(msgpack_dumps((worker.output_path, group,) + args)))
             worker.stdin.flush(), worker.stdin.close()
             worker.job_sent = True
 
@@ -479,7 +479,7 @@ class RenderManager(object):
                     error = f.read().decode('utf-8', 'replace')
             else:
                 with lopen(worker.output_path, 'rb') as f:
-                    results.append(json.loads(f.read()))
+                    results.append(msgpack_loads(f.read()))
         if error is not None:
             raise Exception('Render worker failed with error:\n' + error)
         return results
@@ -490,10 +490,10 @@ def worker_main():
     raw = stdin.read()
     if raw == b'_':
         return
-    args = json.loads(raw)
+    args = msgpack_loads(raw)
     result = process_book_files(*args[1:])
     with open(args[0], 'wb') as f:
-        f.write(as_bytes(json.dumps(result)))
+        f.write(as_bytes(msgpack_dumps(result)))
 
 
 def virtualize_html(container, name, link_uid, link_to_map, virtualized_names):
@@ -520,8 +520,9 @@ def virtualize_html(container, name, link_uid, link_to_map, virtualized_names):
     return name in changed
 
 
-def process_book_files(names, container_dir, opfpath, virtualize_resources, link_uid, container=None):
-    container = container or SimpleContainer(container_dir, opfpath, default_log)
+def process_book_files(names, container_dir, opfpath, virtualize_resources, link_uid, data_for_clone, container=None):
+    if container is None:
+        container = SimpleContainer(container_dir, opfpath, default_log, clone_data=data_for_clone)
     link_to_map = {}
     html_data = {}
     virtualized_names = set()
@@ -541,10 +542,7 @@ def process_book_files(names, container_dir, opfpath, virtualize_resources, link
             transform_style_sheet(container, name, link_uid, virtualize_resources, virtualized_names)
         elif mt == 'image/svg+xml':
             transform_svg_image(container, name, link_uid, virtualize_resources, virtualized_names)
-    for v in itervalues(link_to_map):
-        for k in v:
-            v[k] = tuple(v[k])
-    return link_to_map, html_data, tuple(virtualized_names)
+    return link_to_map, html_data, virtualized_names
 
 
 def process_exploded_book(
@@ -607,7 +605,11 @@ def process_exploded_book(
         (n for n, mt in iteritems(container.mime_map) if mt in OEB_STYLES or mt in OEB_DOCS or mt == 'image/svg+xml'),
         key=work_priority)
 
-    results = render_manager(names, (tdir, opfpath, virtualize_resources, book_render_data['link_uid']), container)
+    results = render_manager(
+        names, (
+            tdir, opfpath, virtualize_resources, book_render_data['link_uid'], container.data_for_clone()
+        ), container
+    )
     ltm = book_render_data['link_to_map']
     html_data = {}
     virtualized_names = set()
@@ -621,10 +623,8 @@ def process_exploded_book(
 
     for link_to_map, hdata, vnames in results:
         html_data.update(hdata)
-        virtualized_names |= set(vnames)
+        virtualized_names |= vnames
         for k, v in iteritems(link_to_map):
-            for x in v:
-                v[x] = set(v[x])
             if k in ltm:
                 merge_ltm(ltm[k], v)
             else:
@@ -838,7 +838,6 @@ def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, ex
             book_metadata=mi, virtualize_resources=virtualize_resources
         )
         if serialize_metadata:
-            from calibre.utils.serialize import json_dumps
             from calibre.ebooks.metadata.book.serialize import metadata_as_dict
             d = metadata_as_dict(mi)
             d.pop('cover_data', None)