diff --git a/src/calibre/srv/render_book.py b/src/calibre/srv/render_book.py index d44d85b9ff..97395f0ad5 100644 --- a/src/calibre/srv/render_book.py +++ b/src/calibre/srv/render_book.py @@ -7,22 +7,26 @@ from __future__ import absolute_import, division, print_function, unicode_litera import json import os import re +import shutil import sys +import time from collections import defaultdict from datetime import datetime from functools import partial from itertools import count +from math import ceil from css_parser import replaceUrls from css_parser.css import CSSRule -from calibre import force_unicode, prepare_string_for_xml +from calibre import detect_ncpus, force_unicode, prepare_string_for_xml +from calibre.constants import iswindows from calibre.customize.ui import plugin_for_input_format from calibre.ebooks import parse_css_length from calibre.ebooks.css_transform_rules import StyleDeclaration from calibre.ebooks.oeb.base import ( - EPUB_NS, OEB_DOCS, OEB_STYLES, OPF, XHTML, XHTML_NS, XLINK, XPath as _XPath, rewrite_links, - urlunquote + EPUB_NS, OEB_DOCS, OEB_STYLES, OPF, XHTML, XHTML_NS, XLINK, XPath as _XPath, + rewrite_links, urlunquote ) from calibre.ebooks.oeb.iterator.book import extract_book from calibre.ebooks.oeb.polish.container import Container as ContainerBase @@ -31,9 +35,12 @@ from calibre.ebooks.oeb.polish.cover import ( ) from calibre.ebooks.oeb.polish.css import transform_inline_styles from calibre.ebooks.oeb.polish.toc import from_xpaths, get_landmarks, get_toc -from calibre.ebooks.oeb.polish.utils import extract, guess_type +from calibre.ebooks.oeb.polish.utils import guess_type +from calibre.ptempfile import PersistentTemporaryDirectory from calibre.srv.metadata import encode_datetime +from calibre.srv.opts import grouper from calibre.utils.date import EPOCH +from calibre.utils.ipc.simple_worker import start_pipe_worker from calibre.utils.iso8601 import parse_iso8601 from calibre.utils.logging import default_log from calibre.utils.serialize import json_loads @@ -42,7 +49,9 @@ from polyglot.binary import ( as_base64_unicode as encode_component, from_base64_bytes, from_base64_unicode as decode_component ) -from polyglot.builtins import is_py3, iteritems, map, unicode_type +from polyglot.builtins import ( + as_bytes, is_py3, iteritems, itervalues, map, unicode_type +) from polyglot.urllib import quote, urlparse RENDER_VERSION = 1 @@ -220,297 +229,429 @@ def toc_anchor_map(toc): return dict(ans) -class Container(ContainerBase): +def serialize_parsed_html(root): + return as_bytes(json.dumps(html_as_dict(root), ensure_ascii=False, separators=(',', ':'))) + + +class SimpleContainer(ContainerBase): tweak_mode = True - def __init__( - self, book_fmt, opfpath, input_fmt, tdir, log=None, book_hash=None, save_bookmark_data=False, - book_metadata=None, allow_no_cover=True, virtualize_resources=True - ): - log = log or default_log - self.allow_no_cover = allow_no_cover - ContainerBase.__init__(self, tdir, opfpath, log) - self.book_metadata = book_metadata - input_plugin = plugin_for_input_format(input_fmt) - self.is_comic = bool(getattr(input_plugin, 'is_image_collection', False)) - if save_bookmark_data: - bm_file = 'META-INF/calibre_bookmarks.txt' - self.bookmark_data = None - if self.exists(bm_file): - with self.open(bm_file, 'rb') as f: - self.bookmark_data = f.read() - # We do not add zero byte sized files as the IndexedDB API in the - # browser has no good way to distinguish between zero byte files and - # load failures. - excluded_names = { - name for name, mt in iteritems(self.mime_map) if - name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') or - name == 'mimetype' or not self.has_name_and_is_not_empty(name)} - raster_cover_name, titlepage_name = self.create_cover_page(input_fmt.lower()) - toc = get_toc(self).to_dict(count()) - if not toc or not toc.get('children'): - toc = from_xpaths(self, ['//h:h1', '//h:h2', '//h:h3']).to_dict(count()) - spine = [name for name, is_linear in self.spine_names] - spineq = frozenset(spine) - landmarks = [l for l in get_landmarks(self) if l['dest'] in spineq] +def create_cover_page(container, input_fmt, allow_no_cover, book_metadata=None): + templ = ''' + + + ''' - self.book_render_data = data = { - 'version': RENDER_VERSION, - 'toc':toc, - 'book_format': book_fmt, - 'spine':spine, - 'link_uid': uuid4(), - 'book_hash': book_hash, - 'is_comic': self.is_comic, - 'raster_cover_name': raster_cover_name, - 'title_page_name': titlepage_name, - 'has_maths': False, - 'total_length': 0, - 'spine_length': 0, - 'toc_anchor_map': toc_anchor_map(toc), - 'landmarks': landmarks, - 'link_to_map': {}, - } - # Mark the spine as dirty since we have to ensure it is normalized - for name in data['spine']: - self.parsed(name), self.dirty(name) - self.virtualized_names = set() - self.transform_all(virtualize_resources) + def generic_cover(): + if book_metadata is not None: + from calibre.ebooks.covers import create_cover + mi = book_metadata + return create_cover(mi.title, mi.authors, mi.series, mi.series_index) + return BLANK_JPEG - def manifest_data(name): - mt = (self.mime_map.get(name) or 'application/octet-stream').lower() - ans = { - 'size':os.path.getsize(self.name_path_map[name]), - 'is_virtualized': name in self.virtualized_names, - 'mimetype':mt, - 'is_html': mt in OEB_DOCS, - } - if ans['is_html']: - root = self.parsed(name) - ans['length'] = l = get_length(root) - self.book_render_data['total_length'] += l - if name in data['spine']: - self.book_render_data['spine_length'] += l - ans['has_maths'] = hm = check_for_maths(root) - if hm: - self.book_render_data['has_maths'] = True - ans['anchor_map'] = anchor_map(root) - return ans - data['files'] = {name:manifest_data(name) for name in set(self.name_path_map) - excluded_names} - self.commit() - for name in excluded_names: - os.remove(self.name_path_map[name]) - data = json.dumps(self.book_render_data, ensure_ascii=False) - if not isinstance(data, bytes): - data = data.encode('utf-8') - with lopen(os.path.join(self.root, 'calibre-book-manifest.json'), 'wb') as f: - f.write(data) + if input_fmt == 'epub': - def create_cover_page(self, input_fmt): - templ = ''' - - - ''' + def image_callback(cover_image, wrapped_image): + if cover_image: + image_callback.cover_data = container.raw_data(cover_image, decode=False) + if wrapped_image and not getattr(image_callback, 'cover_data', None): + image_callback.cover_data = container.raw_data(wrapped_image, decode=False) - def generic_cover(): - if self.book_metadata is not None: - from calibre.ebooks.covers import create_cover - mi = self.book_metadata - return create_cover(mi.title, mi.authors, mi.series, mi.series_index) - return BLANK_JPEG + def cover_path(action, data): + if action == 'write_image': + cdata = getattr(image_callback, 'cover_data', None) or generic_cover() + data.write(cdata) - if input_fmt == 'epub': - - def image_callback(cover_image, wrapped_image): - if cover_image: - image_callback.cover_data = self.raw_data(cover_image, decode=False) - if wrapped_image and not getattr(image_callback, 'cover_data', None): - image_callback.cover_data = self.raw_data(wrapped_image, decode=False) - - def cover_path(action, data): - if action == 'write_image': - cdata = getattr(image_callback, 'cover_data', None) or generic_cover() - data.write(cdata) - - if self.allow_no_cover and not has_epub_cover(self): + if allow_no_cover and not has_epub_cover(container): + return None, None + raster_cover_name, titlepage_name = set_epub_cover( + container, cover_path, (lambda *a: None), options={'template':templ}, + image_callback=image_callback) + else: + raster_cover_name = find_cover_image(container, strict=True) + if raster_cover_name is None: + if allow_no_cover: return None, None - raster_cover_name, titlepage_name = set_epub_cover( - self, cover_path, (lambda *a: None), options={'template':templ}, - image_callback=image_callback) - else: - raster_cover_name = find_cover_image(self, strict=True) - if raster_cover_name is None: - if self.allow_no_cover: - return None, None - item = self.generate_item(name='cover.jpeg', id_prefix='cover') - raster_cover_name = self.href_to_name(item.get('href'), self.opf_name) - with self.open(raster_cover_name, 'wb') as dest: - dest.write(generic_cover()) - if self.is_comic: - return raster_cover_name, None - item = self.generate_item(name='titlepage.html', id_prefix='titlepage') - titlepage_name = self.href_to_name(item.get('href'), self.opf_name) - raw = templ % prepare_string_for_xml(self.name_to_href(raster_cover_name, titlepage_name), True) - with self.open(titlepage_name, 'wb') as f: - f.write(raw.encode('utf-8')) - spine = self.opf_xpath('//opf:spine')[0] - ref = spine.makeelement(OPF('itemref'), idref=item.get('id')) - self.insert_into_xml(spine, ref, index=0) - self.dirty(self.opf_name) - return raster_cover_name, titlepage_name + item = container.generate_item(name='cover.jpeg', id_prefix='cover') + raster_cover_name = container.href_to_name(item.get('href'), container.opf_name) + with container.open(raster_cover_name, 'wb') as dest: + dest.write(generic_cover()) + if container.is_comic: + return raster_cover_name, None + item = container.generate_item(name='titlepage.html', id_prefix='titlepage') + titlepage_name = container.href_to_name(item.get('href'), container.opf_name) + raw = templ % prepare_string_for_xml(container.name_to_href(raster_cover_name, titlepage_name), True) + with container.open(titlepage_name, 'wb') as f: + f.write(raw.encode('utf-8')) + spine = container.opf_xpath('//opf:spine')[0] + ref = spine.makeelement(OPF('itemref'), idref=item.get('id')) + container.insert_into_xml(spine, ref, index=0) + container.dirty(container.opf_name) + return raster_cover_name, titlepage_name - def transform_html(self, name, virtualize_resources): - style_xpath = XPath('//h:style') - link_xpath = XPath('//h:a[@href]') - img_xpath = XPath('//h:img[@src]') - res_link_xpath = XPath('//h:link[@href]') - root = self.parsed(name) - head = ensure_head(root) - changed = False - for style in style_xpath(root): - # Firefox flakes out sometimes when dynamically creating