Viewer: Run the first read book preparation in parallel

2025-07-09 03:04:10 -04:00 · 2019-10-21 16:20:00 +05:30 · 2019-10-21 16:20:00 +05:30 · 75dd89722a
commit 75dd89722a
parent 243144f3b9
1 changed files with 440 additions and 314 deletions
--- a/src/calibre/srv/render_book.py
+++ b/src/calibre/srv/render_book.py
@ -7,22 +7,26 @@ from __future__ import absolute_import, division, print_function, unicode_litera
 import json
 import os
 import re
 import shutil
 import sys
 import time
 from collections import defaultdict
 from datetime import datetime
 from functools import partial
 from itertools import count
 from math import ceil
 from css_parser import replaceUrls
 from css_parser.css import CSSRule
-from calibre import force_unicode, prepare_string_for_xml
+from calibre import detect_ncpus, force_unicode, prepare_string_for_xml
 from calibre.constants import iswindows
 from calibre.customize.ui import plugin_for_input_format
 from calibre.ebooks import parse_css_length
 from calibre.ebooks.css_transform_rules import StyleDeclaration
 from calibre.ebooks.oeb.base import (
-    EPUB_NS, OEB_DOCS, OEB_STYLES, OPF, XHTML, XHTML_NS, XLINK, XPath as _XPath, rewrite_links,
+    EPUB_NS, OEB_DOCS, OEB_STYLES, OPF, XHTML, XHTML_NS, XLINK, XPath as _XPath,
-    urlunquote
+    rewrite_links, urlunquote
 )
 from calibre.ebooks.oeb.iterator.book import extract_book
 from calibre.ebooks.oeb.polish.container import Container as ContainerBase
@ -31,9 +35,12 @@ from calibre.ebooks.oeb.polish.cover import (
 )
 from calibre.ebooks.oeb.polish.css import transform_inline_styles
 from calibre.ebooks.oeb.polish.toc import from_xpaths, get_landmarks, get_toc
-from calibre.ebooks.oeb.polish.utils import extract, guess_type
+from calibre.ebooks.oeb.polish.utils import guess_type
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.srv.metadata import encode_datetime
 from calibre.srv.opts import grouper
 from calibre.utils.date import EPOCH
 from calibre.utils.ipc.simple_worker import start_pipe_worker
 from calibre.utils.iso8601 import parse_iso8601
 from calibre.utils.logging import default_log
 from calibre.utils.serialize import json_loads
@ -42,7 +49,9 @@ from polyglot.binary import (
    as_base64_unicode as encode_component, from_base64_bytes,
    from_base64_unicode as decode_component
 )
-from polyglot.builtins import is_py3, iteritems, map, unicode_type
+from polyglot.builtins import (
    as_bytes, is_py3, iteritems, itervalues, map, unicode_type
 )
 from polyglot.urllib import quote, urlparse
 RENDER_VERSION = 1
@ -220,95 +229,16 @@ def toc_anchor_map(toc):
    return dict(ans)
-class Container(ContainerBase):
+def serialize_parsed_html(root):
    return as_bytes(json.dumps(html_as_dict(root), ensure_ascii=False, separators=(',', ':')))
 class SimpleContainer(ContainerBase):
    tweak_mode = True
    def __init__(
        self, book_fmt, opfpath, input_fmt, tdir, log=None, book_hash=None, save_bookmark_data=False,
        book_metadata=None, allow_no_cover=True, virtualize_resources=True
    ):
        log = log or default_log
        self.allow_no_cover = allow_no_cover
        ContainerBase.__init__(self, tdir, opfpath, log)
        self.book_metadata = book_metadata
        input_plugin = plugin_for_input_format(input_fmt)
        self.is_comic = bool(getattr(input_plugin, 'is_image_collection', False))
        if save_bookmark_data:
            bm_file = 'META-INF/calibre_bookmarks.txt'
            self.bookmark_data = None
            if self.exists(bm_file):
                with self.open(bm_file, 'rb') as f:
                    self.bookmark_data = f.read()
        # We do not add zero byte sized files as the IndexedDB API in the
        # browser has no good way to distinguish between zero byte files and
        # load failures.
        excluded_names = {
            name for name, mt in iteritems(self.mime_map) if
            name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') or
            name == 'mimetype' or not self.has_name_and_is_not_empty(name)}
        raster_cover_name, titlepage_name = self.create_cover_page(input_fmt.lower())
-        toc = get_toc(self).to_dict(count())
+def create_cover_page(container, input_fmt, allow_no_cover, book_metadata=None):
        if not toc or not toc.get('children'):
            toc = from_xpaths(self, ['//h:h1', '//h:h2', '//h:h3']).to_dict(count())
        spine = [name for name, is_linear in self.spine_names]
        spineq = frozenset(spine)
        landmarks = [l for l in get_landmarks(self) if l['dest'] in spineq]
        self.book_render_data = data = {
            'version': RENDER_VERSION,
            'toc':toc,
            'book_format': book_fmt,
            'spine':spine,
            'link_uid': uuid4(),
            'book_hash': book_hash,
            'is_comic': self.is_comic,
            'raster_cover_name': raster_cover_name,
            'title_page_name': titlepage_name,
            'has_maths': False,
            'total_length': 0,
            'spine_length': 0,
            'toc_anchor_map': toc_anchor_map(toc),
            'landmarks': landmarks,
            'link_to_map': {},
        }
        # Mark the spine as dirty since we have to ensure it is normalized
        for name in data['spine']:
            self.parsed(name), self.dirty(name)
        self.virtualized_names = set()
        self.transform_all(virtualize_resources)
        def manifest_data(name):
            mt = (self.mime_map.get(name) or 'application/octet-stream').lower()
            ans = {
                'size':os.path.getsize(self.name_path_map[name]),
                'is_virtualized': name in self.virtualized_names,
                'mimetype':mt,
                'is_html': mt in OEB_DOCS,
            }
            if ans['is_html']:
                root = self.parsed(name)
                ans['length'] = l = get_length(root)
                self.book_render_data['total_length'] += l
                if name in data['spine']:
                    self.book_render_data['spine_length'] += l
                ans['has_maths'] = hm = check_for_maths(root)
                if hm:
                    self.book_render_data['has_maths'] = True
                ans['anchor_map'] = anchor_map(root)
            return ans
        data['files'] = {name:manifest_data(name) for name in set(self.name_path_map) - excluded_names}
        self.commit()
        for name in excluded_names:
            os.remove(self.name_path_map[name])
        data = json.dumps(self.book_render_data, ensure_ascii=False)
        if not isinstance(data, bytes):
            data = data.encode('utf-8')
        with lopen(os.path.join(self.root, 'calibre-book-manifest.json'), 'wb') as f:
            f.write(data)
    def create_cover_page(self, input_fmt):
    templ = '''
    <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
    <head><style>
@ -326,9 +256,9 @@ class Container(ContainerBase):
    '''
    def generic_cover():
-            if self.book_metadata is not None:
+        if book_metadata is not None:
            from calibre.ebooks.covers import create_cover
-                mi = self.book_metadata
+            mi = book_metadata
            return create_cover(mi.title, mi.authors, mi.series, mi.series_index)
        return BLANK_JPEG
@ -336,144 +266,230 @@ class Container(ContainerBase):
        def image_callback(cover_image, wrapped_image):
            if cover_image:
-                    image_callback.cover_data = self.raw_data(cover_image, decode=False)
+                image_callback.cover_data = container.raw_data(cover_image, decode=False)
            if wrapped_image and not getattr(image_callback, 'cover_data', None):
-                    image_callback.cover_data = self.raw_data(wrapped_image, decode=False)
+                image_callback.cover_data = container.raw_data(wrapped_image, decode=False)
        def cover_path(action, data):
            if action == 'write_image':
                cdata = getattr(image_callback, 'cover_data', None) or generic_cover()
                data.write(cdata)
-            if self.allow_no_cover and not has_epub_cover(self):
+        if allow_no_cover and not has_epub_cover(container):
            return None, None
        raster_cover_name, titlepage_name = set_epub_cover(
-                    self, cover_path, (lambda *a: None), options={'template':templ},
+                container, cover_path, (lambda *a: None), options={'template':templ},
                image_callback=image_callback)
    else:
-            raster_cover_name = find_cover_image(self, strict=True)
+        raster_cover_name = find_cover_image(container, strict=True)
        if raster_cover_name is None:
-                if self.allow_no_cover:
+            if allow_no_cover:
                return None, None
-                item = self.generate_item(name='cover.jpeg', id_prefix='cover')
+            item = container.generate_item(name='cover.jpeg', id_prefix='cover')
-                raster_cover_name = self.href_to_name(item.get('href'), self.opf_name)
+            raster_cover_name = container.href_to_name(item.get('href'), container.opf_name)
-                with self.open(raster_cover_name, 'wb') as dest:
+            with container.open(raster_cover_name, 'wb') as dest:
                dest.write(generic_cover())
-            if self.is_comic:
+        if container.is_comic:
            return raster_cover_name, None
-            item = self.generate_item(name='titlepage.html', id_prefix='titlepage')
+        item = container.generate_item(name='titlepage.html', id_prefix='titlepage')
-            titlepage_name = self.href_to_name(item.get('href'), self.opf_name)
+        titlepage_name = container.href_to_name(item.get('href'), container.opf_name)
-            raw = templ % prepare_string_for_xml(self.name_to_href(raster_cover_name, titlepage_name), True)
+        raw = templ % prepare_string_for_xml(container.name_to_href(raster_cover_name, titlepage_name), True)
-            with self.open(titlepage_name, 'wb') as f:
+        with container.open(titlepage_name, 'wb') as f:
            f.write(raw.encode('utf-8'))
-            spine = self.opf_xpath('//opf:spine')[0]
+        spine = container.opf_xpath('//opf:spine')[0]
        ref = spine.makeelement(OPF('itemref'), idref=item.get('id'))
-            self.insert_into_xml(spine, ref, index=0)
+        container.insert_into_xml(spine, ref, index=0)
-            self.dirty(self.opf_name)
+        container.dirty(container.opf_name)
    return raster_cover_name, titlepage_name
-    def transform_html(self, name, virtualize_resources):
+
-        style_xpath = XPath('//h:style')
+def transform_style_sheet(container, name, link_uid, virtualize_resources, virtualized_names):
    changed = False
    sheet = container.parsed(name)
    if virtualize_resources:
        changed_names = set()
        link_replacer = create_link_replacer(container, link_uid, changed_names)
        replaceUrls(sheet, partial(link_replacer, name))
        if name in changed_names:
            changed = True
            virtualized_names.add(name)
    if transform_sheet(sheet):
        changed = True
    if changed:
        raw = container.serialize_item(name)
    else:
        raw = container.raw_data(name, decode=False)
    raw = raw.lstrip()
    if not raw.startswith(b'@charset'):
        raw = b'@charset "UTF-8";\n' + raw
        changed = True
    if changed:
        with container.open(name, 'wb') as f:
            f.write(raw)
 def transform_svg_image(container, name, link_uid, virtualize_resources, virtualized_names):
    if not virtualize_resources:
        return
    link_replacer = create_link_replacer(container, link_uid, set())
    xlink = XLINK('href')
    altered = False
    xlink_xpath = XPath('//*[@xl:href]')
    for elem in xlink_xpath(container.parsed(name)):
        href = elem.get(xlink)
        if not href.startswith('#'):
            elem.set(xlink, link_replacer(name, href))
            altered = True
    if altered:
        virtualized_names.add(name)
        container.dirty(name)
        container.commit_item(name)
 def transform_html(container, name, virtualize_resources, link_uid, link_to_map, virtualized_names):
    link_xpath = XPath('//h:a[@href]')
    img_xpath = XPath('//h:img[@src]')
    res_link_xpath = XPath('//h:link[@href]')
-        root = self.parsed(name)
+    root = container.parsed(name)
-        head = ensure_head(root)
+    changed_names = set()
-        changed = False
+    link_replacer = create_link_replacer(container, link_uid, changed_names)
        for style in style_xpath(root):
            # Firefox flakes out sometimes when dynamically creating <style> tags,
            # so convert them to external stylesheets to ensure they never fail
            if style.text and (style.get('type') or 'text/css').lower() == 'text/css':
                in_head = has_ancestor(style, head)
                if not in_head:
                    extract(style)
                    head.append(style)
                css = style.text
                style.clear()
                style.tag = XHTML('link')
                style.set('type', 'text/css')
                style.set('rel', 'stylesheet')
                sname = self.add_file(name + '.css', css.encode('utf-8'), modify_name_if_needed=True)
                style.set('href', self.name_to_href(sname, name))
                changed = True
    # Used for viewing images
    for img in img_xpath(root):
-            img_name = self.href_to_name(img.get('src'), name)
+        img_name = container.href_to_name(img.get('src'), name)
        if img_name:
            img.set('data-calibre-src', img_name)
                changed = True
-        # Disable non stylsheet link tags. This link will not be loaded by the
+    # Disable non-stylesheet link tags. This link will not be loaded by the
    # browser anyway and will causes the resource load check to hang
    for link in res_link_xpath(root):
        ltype = (link.get('type') or 'text/css').lower()
        rel = (link.get('rel') or 'stylesheet').lower()
        if ltype != 'text/css' or rel != 'stylesheet':
            link.attrib.clear()
    def transform_and_virtualize_sheet(sheet):
        changed = transform_sheet(sheet)
        if virtualize_resources:
            replaceUrls(sheet, partial(link_replacer, name))
            if name in changed_names:
                virtualized_names.add(name)
                changed = True
        return changed
    # Transform <style> and style=""
-        if transform_inline_styles(self, name, transform_sheet=transform_sheet, transform_style=transform_declaration):
+    transform_inline_styles(container, name, transform_sheet=transform_and_virtualize_sheet, transform_style=transform_declaration)
            changed = True
-        if not virtualize_resources:
+    if virtualize_resources:
-            link_uid = self.book_render_data['link_uid']
+        virtualize_html(container, name, link_uid, link_to_map, virtualized_names)
-            link_replacer = create_link_replacer(self, link_uid, set())
+    else:
            ltm = self.book_render_data['link_to_map']
        for a in link_xpath(root):
            href = link_replacer(name, a.get('href'))
            if href and href.startswith(link_uid):
                a.set('href', 'javascript:void(0)')
                parts = decode_url(href.split('|')[1])
                lname, lfrag = parts[0], parts[1]
-                    ltm.setdefault(lname, {}).setdefault(lfrag or '', set()).add(name)
+                link_to_map.setdefault(lname, {}).setdefault(lfrag or '', set()).add(name)
                a.set('data-' + link_uid, json.dumps({'name':lname, 'frag':lfrag}, ensure_ascii=False))
                    changed = True
-        if changed:
+    shtml = serialize_parsed_html(root)
-            self.dirty(name)
+    with container.open(name, 'wb') as f:
        f.write(shtml)
    def transform_css(self, name):
        sheet = self.parsed(name)
        if transform_sheet(sheet):
            self.dirty(name)
-    def transform_all(self, virtualize_resources):
+class RenderManager(object):
        for name, mt in tuple(iteritems(self.mime_map)):
            mt = mt.lower()
            if mt in OEB_DOCS:
                self.transform_html(name, virtualize_resources)
        for name, mt in tuple(iteritems(self.mime_map)):
            mt = mt.lower()
            if mt in OEB_STYLES:
                self.transform_css(name)
        if virtualize_resources:
            self.virtualize_resources()
-        ltm = self.book_render_data['link_to_map']
+    def launch_worker(self):
-        for name, amap in iteritems(ltm):
+        with lopen(os.path.join(self.tdir, '{}.json'.format(len(self.workers))), 'wb') as output:
-            for k, v in tuple(iteritems(amap)):
+            error = lopen(os.path.join(self.tdir, '{}.error'.format(len(self.workers))), 'wb')
-                amap[k] = tuple(v)  # needed for JSON serialization
+            p = start_pipe_worker('from calibre.srv.render_book import worker_main; worker_main()', stdout=error, stderr=error)
            p.output_path = output.name
            p.error_path = error.name
        self.workers.append(p)
-    def virtualize_resources(self):
+    def __enter__(self):
        self.workers = []
        self.tdir = PersistentTemporaryDirectory()
        self.launch_worker(), self.launch_worker()
        return self
    def __exit__(self, *a):
        while self.workers:
            p = self.workers.pop()
            if p.returncode is None:
                p.terminate()
            if not iswindows and p.poll() is None:
                time.sleep(0.02)
                if p.poll() is None:
                    p.kill()
        del self.workers
        try:
            shutil.rmtree(self.tdir)
        except EnvironmentError:
            time.sleep(0.1)
            try:
                shutil.rmtree(self.tdir)
            except EnvironmentError:
                pass
        del self.tdir
    def __call__(self, names, args, in_process_container):
        num_workers = min(detect_ncpus(), len(names))
        if num_workers > 1:
            total_sz = sum(os.path.getsize(in_process_container.name_path_map[n]) for n in names)
            if total_sz < 128 * 1024:
                num_workers = 1
        if num_workers == 1:
            return [process_book_files(names, *args, container=in_process_container)]
        while len(self.workers) < num_workers:
            self.launch_worker()
        group_sz = int(ceil(len(names) / num_workers))
        for group, worker in zip(grouper(group_sz, names), self.workers):
            worker.stdin.write(as_bytes(json.dumps((worker.output_path, group,) + args)))
            worker.stdin.flush(), worker.stdin.close()
            worker.job_sent = True
        for worker in self.workers:
            if not hasattr(worker, 'job_sent'):
                worker.stdin.write(b'_'), worker.stdin.flush(), worker.stdin.close()
        error = None
        results = []
        for worker in self.workers:
            if not hasattr(worker, 'job_sent'):
                worker.wait()
                continue
            if worker.wait() != 0:
                with lopen(worker.error_path, 'rb') as f:
                    error = f.read().decode('utf-8', 'replace')
            else:
                with lopen(worker.output_path, 'rb') as f:
                    results.append(json.loads(f.read()))
        if error is not None:
            raise Exception('Render worker failed with error:\n' + error)
        return results
 def worker_main():
    stdin = getattr(sys.stdin, 'buffer', sys.stdin)
    raw = stdin.read()
    if raw == b'_':
        return
    args = json.loads(raw)
    result = process_book_files(*args[1:])
    with open(args[0], 'wb') as f:
        f.write(as_bytes(json.dumps(result)))
 def virtualize_html(container, name, link_uid, link_to_map, virtualized_names):
    changed = set()
        link_uid = self.book_render_data['link_uid']
        xlink_xpath = XPath('//*[@xl:href]')
    link_xpath = XPath('//h:a[@href]')
-        link_replacer = create_link_replacer(self, link_uid, changed)
+    link_replacer = create_link_replacer(container, link_uid, changed)
-        ltm = self.book_render_data['link_to_map']
+    virtualized_names.add(name)
-
+    root = container.parsed(name)
        for name, mt in iteritems(self.mime_map):
            mt = mt.lower()
            if mt in OEB_STYLES:
                replaceUrls(self.parsed(name), partial(link_replacer, name))
                self.virtualized_names.add(name)
            elif mt in OEB_DOCS:
                self.virtualized_names.add(name)
                root = self.parsed(name)
    rewrite_links(root, partial(link_replacer, name))
    for a in link_xpath(root):
        href = a.get('href')
@ -481,36 +497,161 @@ class Container(ContainerBase):
            a.set('href', 'javascript:void(0)')
            parts = decode_url(href.split('|')[1])
            lname, lfrag = parts[0], parts[1]
-                        ltm.setdefault(lname, {}).setdefault(lfrag or '', set()).add(name)
+            link_to_map.setdefault(lname, {}).setdefault(lfrag or '', set()).add(name)
            a.set('data-' + link_uid, json.dumps({'name':lname, 'frag':lfrag}, ensure_ascii=False))
        else:
            a.set('target', '_blank')
            a.set('rel', 'noopener noreferrer')
    return name in changed
 def process_book_files(names, container_dir, opfpath, virtualize_resources, link_uid, container=None):
    container = container or SimpleContainer(container_dir, opfpath, default_log)
    link_to_map = {}
    html_data = {}
    virtualized_names = set()
    for name in names:
        if name is None:
            continue
        mt = container.mime_map[name].lower()
        if mt in OEB_DOCS:
            root = container.parsed(name)
            html_data[name] = {
                'length': get_length(root),
                'has_maths': check_for_maths(root),
                'anchor_map': anchor_map(root)
            }
            transform_html(container, name, virtualize_resources, link_uid, link_to_map, virtualized_names)
        elif mt in OEB_STYLES:
            transform_style_sheet(container, name, link_uid, virtualize_resources, virtualized_names)
        elif mt == 'image/svg+xml':
-                self.virtualized_names.add(name)
+            transform_svg_image(container, name, link_uid, virtualize_resources, virtualized_names)
-                xlink = XLINK('href')
+    for v in itervalues(link_to_map):
-                altered = False
+        for k in v:
-                for elem in xlink_xpath(self.parsed(name)):
+            v[k] = tuple(v[k])
-                    href = elem.get(xlink)
+    return link_to_map, html_data, tuple(virtualized_names)
                    if not href.startswith('#'):
                        elem.set(xlink, link_replacer(name, href))
                        altered = True
                if altered:
                    changed.add(name)
        tuple(map(self.dirty, changed))
-    def serialize_item(self, name):
+def process_exploded_book(
-        mt = (self.mime_map[name] or '').lower()
+    book_fmt, opfpath, input_fmt, tdir, render_manager, log=None, book_hash=None, save_bookmark_data=False,
-        if mt in OEB_STYLES:
+    book_metadata=None, allow_no_cover=True, virtualize_resources=True
-            ans = ContainerBase.serialize_item(self, name).lstrip()
+):
-            if not ans.startswith(b'@charset'):
+    log = log or default_log
-                ans = b'@charset "UTF-8";\n' + ans
+    container = SimpleContainer(tdir, opfpath, log)
    input_plugin = plugin_for_input_format(input_fmt)
    is_comic = bool(getattr(input_plugin, 'is_image_collection', False))
    bookmark_data = None
    if save_bookmark_data:
        bm_file = 'META-INF/calibre_bookmarks.txt'
        if container.exists(bm_file):
            with container.open(bm_file, 'rb') as f:
                bookmark_data = f.read()
    # We do not add zero byte sized files as the IndexedDB API in the
    # browser has no good way to distinguish between zero byte files and
    # load failures.
    excluded_names = {
        name for name, mt in iteritems(container.mime_map) if
        name == container.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') or
        name == 'mimetype' or not container.has_name_and_is_not_empty(name)}
    raster_cover_name, titlepage_name = create_cover_page(container, input_fmt.lower(), allow_no_cover, book_metadata)
    toc = get_toc(container, verify_destinations=False).to_dict(count())
    if not toc or not toc.get('children'):
        toc = from_xpaths(container, ['//h:h1', '//h:h2', '//h:h3']).to_dict(count())
    spine = [name for name, is_linear in container.spine_names]
    spineq = frozenset(spine)
    landmarks = [l for l in get_landmarks(container) if l['dest'] in spineq]
    book_render_data = {
        'version': RENDER_VERSION,
        'toc':toc,
        'book_format': book_fmt,
        'spine':spine,
        'link_uid': uuid4(),
        'book_hash': book_hash,
        'is_comic': is_comic,
        'raster_cover_name': raster_cover_name,
        'title_page_name': titlepage_name,
        'has_maths': False,
        'total_length': 0,
        'spine_length': 0,
        'toc_anchor_map': toc_anchor_map(toc),
        'landmarks': landmarks,
        'link_to_map': {},
    }
    def work_priority(name):
        # ensure workers with large files or stylesheets
        # have the less names
        size = os.path.getsize(container.name_path_map[name]),
        is_html = container.mime_map.get(name) in OEB_DOCS
        return (0 if is_html else 1), size
    names = sorted(
        (n for n, mt in iteritems(container.mime_map) if mt in OEB_STYLES or mt in OEB_DOCS or mt == 'image/svg+xml'),
        key=work_priority)
    results = render_manager(names, (tdir, opfpath, virtualize_resources, book_render_data['link_uid']), container)
    ltm = book_render_data['link_to_map']
    html_data = {}
    virtualized_names = set()
    def merge_ltm(dest, src):
        for k, v in iteritems(src):
            if k in dest:
                dest[k] |= v
            else:
                dest[k] = v
    for link_to_map, hdata, vnames in results:
        html_data.update(hdata)
        virtualized_names |= set(vnames)
        for k, v in iteritems(link_to_map):
            for x in v:
                v[x] = set(v[x])
            if k in ltm:
                merge_ltm(ltm[k], v)
            else:
                ltm[k] = v
    def manifest_data(name):
        mt = (container.mime_map.get(name) or 'application/octet-stream').lower()
        ans = {
            'size':os.path.getsize(container.name_path_map[name]),
            'is_virtualized': name in virtualized_names,
            'mimetype':mt,
            'is_html': mt in OEB_DOCS,
        }
        if ans['is_html']:
            data = html_data[name]
            ans['length'] = l = data['length']
            book_render_data['total_length'] += l
            if name in book_render_data['spine']:
                book_render_data['spine_length'] += l
            ans['has_maths'] = hm = data['has_maths']
            if hm:
                book_render_data['has_maths'] = True
            ans['anchor_map'] = data['anchor_map']
        return ans
-        if mt not in OEB_DOCS:
+
-            return ContainerBase.serialize_item(self, name)
+    book_render_data['files'] = {name:manifest_data(name) for name in set(container.name_path_map) - excluded_names}
-        root = self.parsed(name)
+    container.commit()
-        return json.dumps(html_as_dict(root), ensure_ascii=False, separators=(',', ':')).encode('utf-8')
+
    for name in excluded_names:
        os.remove(container.name_path_map[name])
    ltm = book_render_data['link_to_map']
    for name, amap in iteritems(ltm):
        for k, v in tuple(iteritems(amap)):
            amap[k] = tuple(v)  # needed for JSON serialization
    data = as_bytes(json.dumps(book_render_data, ensure_ascii=False))
    with lopen(os.path.join(container.root, 'calibre-book-manifest.json'), 'wb') as f:
        f.write(data)
    return container, bookmark_data
 def split_name(name):
@ -566,22 +707,6 @@ def serialize_elem(elem, nsmap):
    return ans
 def ensure_head(root):
    # Make sure we have only a single <head>
    heads = list(root.iterchildren(XHTML('head')))
    if len(heads) != 1:
        if not heads:
            root.insert(0, root.makeelement(XHTML('head')))
            return root[0]
        head = heads[0]
        for eh in heads[1:]:
            for child in eh.iterchildren('*'):
                head.append(child)
            extract(eh)
        return head
    return heads[0]
 def ensure_body(root):
    # Make sure we have only a single <body>
    bodies = list(root.iterchildren(XHTML('body')))
@ -685,6 +810,7 @@ def get_stored_annotations(container):
 def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, extract_annotations=False, virtualize_resources=True):
    with RenderManager() as render_manager:
        mi = None
        if serialize_metadata:
            from calibre.ebooks.metadata.meta import get_metadata
@ -692,9 +818,9 @@ def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, ex
            with lopen(pathtoebook, 'rb') as f, quick_metadata:
                mi = get_metadata(f, os.path.splitext(pathtoebook)[1][1:].lower())
        book_fmt, opfpath, input_fmt = extract_book(pathtoebook, output_dir, log=default_log)
-    container = Container(
+        container, bookmark_data = process_exploded_book(
-        book_fmt, opfpath, input_fmt, output_dir, book_hash=book_hash,
+            book_fmt, opfpath, input_fmt, output_dir, render_manager,
-        save_bookmark_data=extract_annotations,
+            book_hash=book_hash, save_bookmark_data=extract_annotations,
            book_metadata=mi, virtualize_resources=virtualize_resources
        )
        if serialize_metadata:
@ -707,7 +833,7 @@ def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, ex
                f.write(json_dumps(d))
        if extract_annotations:
            annotations = None
-        if container.bookmark_data:
+            if bookmark_data:
                annotations = json_dumps(tuple(get_stored_annotations(container)))
            if annotations:
                with lopen(os.path.join(output_dir, 'calibre-book-annotations.json'), 'wb') as f: