diff --git a/src/calibre/srv/render_book.py b/src/calibre/srv/render_book.py
index d44d85b9ff..97395f0ad5 100644
--- a/src/calibre/srv/render_book.py
+++ b/src/calibre/srv/render_book.py
@@ -7,22 +7,26 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import json
import os
import re
+import shutil
import sys
+import time
from collections import defaultdict
from datetime import datetime
from functools import partial
from itertools import count
+from math import ceil
from css_parser import replaceUrls
from css_parser.css import CSSRule
-from calibre import force_unicode, prepare_string_for_xml
+from calibre import detect_ncpus, force_unicode, prepare_string_for_xml
+from calibre.constants import iswindows
from calibre.customize.ui import plugin_for_input_format
from calibre.ebooks import parse_css_length
from calibre.ebooks.css_transform_rules import StyleDeclaration
from calibre.ebooks.oeb.base import (
- EPUB_NS, OEB_DOCS, OEB_STYLES, OPF, XHTML, XHTML_NS, XLINK, XPath as _XPath, rewrite_links,
- urlunquote
+ EPUB_NS, OEB_DOCS, OEB_STYLES, OPF, XHTML, XHTML_NS, XLINK, XPath as _XPath,
+ rewrite_links, urlunquote
)
from calibre.ebooks.oeb.iterator.book import extract_book
from calibre.ebooks.oeb.polish.container import Container as ContainerBase
@@ -31,9 +35,12 @@ from calibre.ebooks.oeb.polish.cover import (
)
from calibre.ebooks.oeb.polish.css import transform_inline_styles
from calibre.ebooks.oeb.polish.toc import from_xpaths, get_landmarks, get_toc
-from calibre.ebooks.oeb.polish.utils import extract, guess_type
+from calibre.ebooks.oeb.polish.utils import guess_type
+from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.srv.metadata import encode_datetime
+from calibre.srv.opts import grouper
from calibre.utils.date import EPOCH
+from calibre.utils.ipc.simple_worker import start_pipe_worker
from calibre.utils.iso8601 import parse_iso8601
from calibre.utils.logging import default_log
from calibre.utils.serialize import json_loads
@@ -42,7 +49,9 @@ from polyglot.binary import (
as_base64_unicode as encode_component, from_base64_bytes,
from_base64_unicode as decode_component
)
-from polyglot.builtins import is_py3, iteritems, map, unicode_type
+from polyglot.builtins import (
+ as_bytes, is_py3, iteritems, itervalues, map, unicode_type
+)
from polyglot.urllib import quote, urlparse
RENDER_VERSION = 1
@@ -220,297 +229,429 @@ def toc_anchor_map(toc):
return dict(ans)
-class Container(ContainerBase):
+def serialize_parsed_html(root):
+ return as_bytes(json.dumps(html_as_dict(root), ensure_ascii=False, separators=(',', ':')))
+
+
+class SimpleContainer(ContainerBase):
tweak_mode = True
- def __init__(
- self, book_fmt, opfpath, input_fmt, tdir, log=None, book_hash=None, save_bookmark_data=False,
- book_metadata=None, allow_no_cover=True, virtualize_resources=True
- ):
- log = log or default_log
- self.allow_no_cover = allow_no_cover
- ContainerBase.__init__(self, tdir, opfpath, log)
- self.book_metadata = book_metadata
- input_plugin = plugin_for_input_format(input_fmt)
- self.is_comic = bool(getattr(input_plugin, 'is_image_collection', False))
- if save_bookmark_data:
- bm_file = 'META-INF/calibre_bookmarks.txt'
- self.bookmark_data = None
- if self.exists(bm_file):
- with self.open(bm_file, 'rb') as f:
- self.bookmark_data = f.read()
- # We do not add zero byte sized files as the IndexedDB API in the
- # browser has no good way to distinguish between zero byte files and
- # load failures.
- excluded_names = {
- name for name, mt in iteritems(self.mime_map) if
- name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') or
- name == 'mimetype' or not self.has_name_and_is_not_empty(name)}
- raster_cover_name, titlepage_name = self.create_cover_page(input_fmt.lower())
- toc = get_toc(self).to_dict(count())
- if not toc or not toc.get('children'):
- toc = from_xpaths(self, ['//h:h1', '//h:h2', '//h:h3']).to_dict(count())
- spine = [name for name, is_linear in self.spine_names]
- spineq = frozenset(spine)
- landmarks = [l for l in get_landmarks(self) if l['dest'] in spineq]
+def create_cover_page(container, input_fmt, allow_no_cover, book_metadata=None):
+ templ = '''
+
+
+ '''
- self.book_render_data = data = {
- 'version': RENDER_VERSION,
- 'toc':toc,
- 'book_format': book_fmt,
- 'spine':spine,
- 'link_uid': uuid4(),
- 'book_hash': book_hash,
- 'is_comic': self.is_comic,
- 'raster_cover_name': raster_cover_name,
- 'title_page_name': titlepage_name,
- 'has_maths': False,
- 'total_length': 0,
- 'spine_length': 0,
- 'toc_anchor_map': toc_anchor_map(toc),
- 'landmarks': landmarks,
- 'link_to_map': {},
- }
- # Mark the spine as dirty since we have to ensure it is normalized
- for name in data['spine']:
- self.parsed(name), self.dirty(name)
- self.virtualized_names = set()
- self.transform_all(virtualize_resources)
+ def generic_cover():
+ if book_metadata is not None:
+ from calibre.ebooks.covers import create_cover
+ mi = book_metadata
+ return create_cover(mi.title, mi.authors, mi.series, mi.series_index)
+ return BLANK_JPEG
- def manifest_data(name):
- mt = (self.mime_map.get(name) or 'application/octet-stream').lower()
- ans = {
- 'size':os.path.getsize(self.name_path_map[name]),
- 'is_virtualized': name in self.virtualized_names,
- 'mimetype':mt,
- 'is_html': mt in OEB_DOCS,
- }
- if ans['is_html']:
- root = self.parsed(name)
- ans['length'] = l = get_length(root)
- self.book_render_data['total_length'] += l
- if name in data['spine']:
- self.book_render_data['spine_length'] += l
- ans['has_maths'] = hm = check_for_maths(root)
- if hm:
- self.book_render_data['has_maths'] = True
- ans['anchor_map'] = anchor_map(root)
- return ans
- data['files'] = {name:manifest_data(name) for name in set(self.name_path_map) - excluded_names}
- self.commit()
- for name in excluded_names:
- os.remove(self.name_path_map[name])
- data = json.dumps(self.book_render_data, ensure_ascii=False)
- if not isinstance(data, bytes):
- data = data.encode('utf-8')
- with lopen(os.path.join(self.root, 'calibre-book-manifest.json'), 'wb') as f:
- f.write(data)
+ if input_fmt == 'epub':
- def create_cover_page(self, input_fmt):
- templ = '''
-
-
- '''
+ def image_callback(cover_image, wrapped_image):
+ if cover_image:
+ image_callback.cover_data = container.raw_data(cover_image, decode=False)
+ if wrapped_image and not getattr(image_callback, 'cover_data', None):
+ image_callback.cover_data = container.raw_data(wrapped_image, decode=False)
- def generic_cover():
- if self.book_metadata is not None:
- from calibre.ebooks.covers import create_cover
- mi = self.book_metadata
- return create_cover(mi.title, mi.authors, mi.series, mi.series_index)
- return BLANK_JPEG
+ def cover_path(action, data):
+ if action == 'write_image':
+ cdata = getattr(image_callback, 'cover_data', None) or generic_cover()
+ data.write(cdata)
- if input_fmt == 'epub':
-
- def image_callback(cover_image, wrapped_image):
- if cover_image:
- image_callback.cover_data = self.raw_data(cover_image, decode=False)
- if wrapped_image and not getattr(image_callback, 'cover_data', None):
- image_callback.cover_data = self.raw_data(wrapped_image, decode=False)
-
- def cover_path(action, data):
- if action == 'write_image':
- cdata = getattr(image_callback, 'cover_data', None) or generic_cover()
- data.write(cdata)
-
- if self.allow_no_cover and not has_epub_cover(self):
+ if allow_no_cover and not has_epub_cover(container):
+ return None, None
+ raster_cover_name, titlepage_name = set_epub_cover(
+ container, cover_path, (lambda *a: None), options={'template':templ},
+ image_callback=image_callback)
+ else:
+ raster_cover_name = find_cover_image(container, strict=True)
+ if raster_cover_name is None:
+ if allow_no_cover:
return None, None
- raster_cover_name, titlepage_name = set_epub_cover(
- self, cover_path, (lambda *a: None), options={'template':templ},
- image_callback=image_callback)
- else:
- raster_cover_name = find_cover_image(self, strict=True)
- if raster_cover_name is None:
- if self.allow_no_cover:
- return None, None
- item = self.generate_item(name='cover.jpeg', id_prefix='cover')
- raster_cover_name = self.href_to_name(item.get('href'), self.opf_name)
- with self.open(raster_cover_name, 'wb') as dest:
- dest.write(generic_cover())
- if self.is_comic:
- return raster_cover_name, None
- item = self.generate_item(name='titlepage.html', id_prefix='titlepage')
- titlepage_name = self.href_to_name(item.get('href'), self.opf_name)
- raw = templ % prepare_string_for_xml(self.name_to_href(raster_cover_name, titlepage_name), True)
- with self.open(titlepage_name, 'wb') as f:
- f.write(raw.encode('utf-8'))
- spine = self.opf_xpath('//opf:spine')[0]
- ref = spine.makeelement(OPF('itemref'), idref=item.get('id'))
- self.insert_into_xml(spine, ref, index=0)
- self.dirty(self.opf_name)
- return raster_cover_name, titlepage_name
+ item = container.generate_item(name='cover.jpeg', id_prefix='cover')
+ raster_cover_name = container.href_to_name(item.get('href'), container.opf_name)
+ with container.open(raster_cover_name, 'wb') as dest:
+ dest.write(generic_cover())
+ if container.is_comic:
+ return raster_cover_name, None
+ item = container.generate_item(name='titlepage.html', id_prefix='titlepage')
+ titlepage_name = container.href_to_name(item.get('href'), container.opf_name)
+ raw = templ % prepare_string_for_xml(container.name_to_href(raster_cover_name, titlepage_name), True)
+ with container.open(titlepage_name, 'wb') as f:
+ f.write(raw.encode('utf-8'))
+ spine = container.opf_xpath('//opf:spine')[0]
+ ref = spine.makeelement(OPF('itemref'), idref=item.get('id'))
+ container.insert_into_xml(spine, ref, index=0)
+ container.dirty(container.opf_name)
+ return raster_cover_name, titlepage_name
- def transform_html(self, name, virtualize_resources):
- style_xpath = XPath('//h:style')
- link_xpath = XPath('//h:a[@href]')
- img_xpath = XPath('//h:img[@src]')
- res_link_xpath = XPath('//h:link[@href]')
- root = self.parsed(name)
- head = ensure_head(root)
- changed = False
- for style in style_xpath(root):
- # Firefox flakes out sometimes when dynamically creating