diff --git a/src/calibre/ebooks/conversion/plugins/mobi_output.py b/src/calibre/ebooks/conversion/plugins/mobi_output.py index 72314b4237..86c9b6b7fd 100644 --- a/src/calibre/ebooks/conversion/plugins/mobi_output.py +++ b/src/calibre/ebooks/conversion/plugins/mobi_output.py @@ -6,8 +6,6 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -from cStringIO import StringIO - from calibre.customize.conversion import OutputFormatPlugin from calibre.customize.conversion import OptionRecommendation @@ -79,18 +77,9 @@ class MOBIOutput(OutputFormatPlugin): def check_for_masthead(self): found = 'masthead' in self.oeb.guide if not found: + from calibre.ebooks import generate_masthead self.oeb.log.debug('No masthead found in manifest, generating default mastheadImage...') - try: - from PIL import Image as PILImage - PILImage - except ImportError: - import Image as PILImage - - raw = open(P('content_server/calibre_banner.png'), 'rb') - im = PILImage.open(raw) - of = StringIO() - im.save(of, 'GIF') - raw = of.getvalue() + raw = generate_masthead(unicode(self.oeb.metadata['title'][0])) id, href = self.oeb.manifest.generate('masthead', 'masthead') self.oeb.manifest.add(id, href, 'image/gif', data=raw) self.oeb.guide.add('masthead', 'Masthead Image', href) @@ -151,17 +140,45 @@ class MOBIOutput(OutputFormatPlugin): # Fix up the periodical href to point to first section href toc.nodes[0].href = toc.nodes[0].nodes[0].href + def remove_html_cover(self): + from calibre.ebooks.oeb.base import OEB_DOCS + + oeb = self.oeb + if not oeb.metadata.cover \ + or 'cover' not in oeb.guide: + return + href = oeb.guide['cover'].href + del oeb.guide['cover'] + item = oeb.manifest.hrefs[href] + if item.spine_position is not None: + self.log.warn('Found an HTML cover: ', item.href, 'removing it.', + 'If you find some content missing from the output MOBI, it ' + 'is because you misidentified the HTML cover in the input ' + 'document') + oeb.spine.remove(item) + if item.media_type in OEB_DOCS: + self.oeb.manifest.remove(item) + def convert(self, oeb, output_path, input_plugin, opts, log): + from calibre.utils.config import tweaks + from calibre.ebooks.mobi.writer2.resources import Resources self.log, self.opts, self.oeb = log, opts, oeb - kf8 = self.create_kf8() - self.write_mobi(input_plugin, output_path, kf8) + create_kf8 = tweaks.get('create_kf8', False) + + self.remove_html_cover() + resources = Resources(oeb, opts, self.is_periodical, + add_fonts=create_kf8) + + kf8 = self.create_kf8() if create_kf8 else None + + self.write_mobi(input_plugin, output_path, kf8, resources) def create_kf8(self): from calibre.ebooks.mobi.writer8.main import KF8Writer return KF8Writer(self.oeb, self.opts) - def write_mobi(self, input_plugin, output_path, kf8): + def write_mobi(self, input_plugin, output_path, kf8, resources): from calibre.ebooks.mobi.mobiml import MobiMLizer from calibre.ebooks.oeb.transforms.manglecase import CaseMangler from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable @@ -180,12 +197,20 @@ class MOBIOutput(OutputFormatPlugin): rasterizer(oeb, opts) except Unavailable: self.log.warn('SVG rasterizer unavailable, SVG will not be converted') + else: + # Add rasterized SVG images + # Note that this means for SVG images that are simple wrappers + # around raster images, there will now be two copies of the image + # in the MOBI file. This could probably be fixed for common cases + # by detecting it and replacing the SVG with the raster image, but + # it isn't worth the effort to me. + resources.add_extra_images() mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables) mobimlizer(oeb, opts) self.check_for_periodical() write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz') from calibre.ebooks.mobi.writer2.main import MobiWriter - writer = MobiWriter(opts, + writer = MobiWriter(opts, resources, kf8, write_page_breaks_after_item=write_page_breaks_after_item) writer(oeb, output_path) diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py index 4af7fdbf2c..d276689224 100644 --- a/src/calibre/ebooks/mobi/mobiml.py +++ b/src/calibre/ebooks/mobi/mobiml.py @@ -10,7 +10,7 @@ import copy import re from lxml import etree from calibre.ebooks.oeb.base import namespace, barename -from calibre.ebooks.oeb.base import XHTML, XHTML_NS, OEB_DOCS, urlnormalize +from calibre.ebooks.oeb.base import XHTML, XHTML_NS, urlnormalize from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.transforms.flatcss import KeyMapper from calibre.utils.magick.draw import identify_data @@ -109,26 +109,8 @@ class MobiMLizer(object): self.profile = profile = context.dest self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items()) self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys()) - self.remove_html_cover() self.mobimlize_spine() - def remove_html_cover(self): - oeb = self.oeb - if not oeb.metadata.cover \ - or 'cover' not in oeb.guide: - return - href = oeb.guide['cover'].href - del oeb.guide['cover'] - item = oeb.manifest.hrefs[href] - if item.spine_position is not None: - self.log.warn('Found an HTML cover,', item.href, 'removing it.', - 'If you find some content missing from the output MOBI, it ' - 'is because you misidentified the HTML cover in the input ' - 'document') - oeb.spine.remove(item) - if item.media_type in OEB_DOCS: - self.oeb.manifest.remove(item) - def mobimlize_spine(self): 'Iterate over the spine and convert it to MOBIML' for item in self.oeb.spine: diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index 99321fab12..d21482f8e2 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -11,17 +11,15 @@ import re, random, time from cStringIO import StringIO from struct import pack -from calibre.ebooks import normalize, generate_masthead -from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES +from calibre.ebooks import normalize from calibre.ebooks.mobi.writer2.serializer import Serializer from calibre.ebooks.compression.palmdoc import compress_doc from calibre.ebooks.mobi.langcodes import iana2mobi from calibre.utils.filenames import ascii_filename from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE) -from calibre.ebooks.mobi.utils import (rescale_image, encint, mobify_image, - encode_trailing_data, align_block, detect_periodical) +from calibre.ebooks.mobi.utils import (encint, encode_trailing_data, + align_block, detect_periodical) from calibre.ebooks.mobi.writer2.indexer import Indexer -from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE EXTH_CODES = { 'creator': 100, @@ -50,8 +48,10 @@ WRITE_UNCROSSABLE_BREAKS = False class MobiWriter(object): COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+') - def __init__(self, opts, write_page_breaks_after_item=True): + def __init__(self, opts, resources, kf8, write_page_breaks_after_item=True): self.opts = opts + self.resources = resources + self.kf8 = kf8 self.write_page_breaks_after_item = write_page_breaks_after_item self.compression = UNCOMPRESSED if opts.dont_compress else PALMDOC self.prefer_author_sort = opts.prefer_author_sort @@ -151,64 +151,12 @@ class MobiWriter(object): # Images {{{ def generate_images(self): - oeb = self.oeb - oeb.logger.info('Serializing images...') - self.image_records = [] - self.image_map = {} - self.masthead_offset = 0 - index = 1 - - mh_href = None - if 'masthead' in oeb.guide and oeb.guide['masthead'].href: - mh_href = oeb.guide['masthead'].href - self.image_records.append(None) - index += 1 - elif self.is_periodical: - # Generate a default masthead - data = generate_masthead(unicode(self.oeb.metadata['title'][0])) - self.image_records.append(data) - index += 1 - - cover_href = self.cover_offset = self.thumbnail_offset = None - if (oeb.metadata.cover and - unicode(oeb.metadata.cover[0]) in oeb.manifest.ids): - cover_id = unicode(oeb.metadata.cover[0]) - item = oeb.manifest.ids[cover_id] - cover_href = item.href - - for item in self.oeb.manifest.values(): - if item.media_type not in OEB_RASTER_IMAGES: continue - try: - data = item.data - if self.opts.mobi_keep_original_images: - data = mobify_image(data) - else: - data = rescale_image(data) - except: - oeb.logger.warn('Bad image file %r' % item.href) - continue - else: - if mh_href and item.href == mh_href: - self.image_records[0] = data - continue - - self.image_records.append(data) - self.image_map[item.href] = index - index += 1 - - if cover_href and item.href == cover_href: - self.cover_offset = self.image_map[item.href] - 1 - try: - data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN, - maxsizeb=MAX_THUMB_SIZE) - except: - oeb.logger.warn('Failed to generate thumbnail') - else: - self.image_records.append(data) - self.thumbnail_offset = index - 1 - index += 1 - finally: - item.unload_data_from_memory() + resources = self.resources + self.image_records = resources.records + self.image_map = resources.item_map + self.masthead_offset = resources.masthead_offset + self.cover_offset = resources.cover_offset + self.thumbnail_offset = resources.thumbnail_offset if self.image_records and self.image_records[0] is None: raise ValueError('Failed to find masthead image in manifest') diff --git a/src/calibre/ebooks/mobi/writer2/resources.py b/src/calibre/ebooks/mobi/writer2/resources.py new file mode 100644 index 0000000000..44d8a6d0e5 --- /dev/null +++ b/src/calibre/ebooks/mobi/writer2/resources.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2012, Kovid Goyal ' +__docformat__ = 'restructuredtext en' + +from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE +from calibre.ebooks.mobi.utils import (rescale_image, mobify_image) +from calibre.ebooks import generate_masthead +from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES + +class Resources(object): + + def __init__(self, oeb, opts, is_periodical, add_fonts=False): + self.oeb, self.log, self.opts = oeb, oeb.log, opts + self.is_periodical = is_periodical + + self.item_map = {} + self.records = [] + self.masthead_offset = 0 + self.cover_offset = self.thumbnail_offset = None + + self.add_resources(add_fonts) + + def process_image(self, data): + return (mobify_image(data) if self.opts.mobi_keep_original_images else + rescale_image(data)) + + def add_resources(self, add_fonts): + oeb = self.oeb + oeb.logger.info('Serializing resources...') + index = 1 + + mh_href = None + if 'masthead' in oeb.guide and oeb.guide['masthead'].href: + mh_href = oeb.guide['masthead'].href + self.records.append(None) + index += 1 + elif self.is_periodical: + # Generate a default masthead + data = generate_masthead(unicode(self.oeb.metadata['title'][0])) + self.records.append(data) + index += 1 + + cover_href = self.cover_offset = self.thumbnail_offset = None + if (oeb.metadata.cover and + unicode(oeb.metadata.cover[0]) in oeb.manifest.ids): + cover_id = unicode(oeb.metadata.cover[0]) + item = oeb.manifest.ids[cover_id] + cover_href = item.href + + for item in self.oeb.manifest.values(): + if item.media_type not in OEB_RASTER_IMAGES: continue + try: + data = self.process_image(item.data) + except: + self.log.warn('Bad image file %r' % item.href) + continue + else: + if mh_href and item.href == mh_href: + self.records[0] = data + continue + + self.records.append(data) + self.item_map[item.href] = index + index += 1 + + if cover_href and item.href == cover_href: + self.cover_offset = self.item_map[item.href] - 1 + try: + data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN, + maxsizeb=MAX_THUMB_SIZE) + except: + self.log.warn('Failed to generate thumbnail') + else: + self.records.append(data) + self.thumbnail_offset = index - 1 + index += 1 + finally: + item.unload_data_from_memory() + + def add_extra_images(self): + ''' + Add any images that were created after the call to add_resources() + ''' + for item in self.oeb.manifest.values(): + if (item.media_type not in OEB_RASTER_IMAGES or item.href in + self.item_map): continue + try: + data = self.process_image(item.data) + except: + self.log.warn('Bad image file %r' % item.href) + else: + self.records.append(data) + self.item_map[item.href] = len(self.records) + finally: + item.unload_data_from_memory() + + + diff --git a/src/calibre/ebooks/mobi/writer8/main.py b/src/calibre/ebooks/mobi/writer8/main.py index fc4234eb10..3a24490faa 100644 --- a/src/calibre/ebooks/mobi/writer8/main.py +++ b/src/calibre/ebooks/mobi/writer8/main.py @@ -7,9 +7,43 @@ __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import copy + +import cssutils + +from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME) + +XML_DOCS = OEB_DOCS | {SVG_MIME} class KF8Writer(object): def __init__(self, oeb, opts): self.oeb, self.opts, self.log = oeb, opts, oeb.log + self.dup_data() + + self.create_pieces() + + def dup_data(self): + ''' Duplicate data so that any changes we make to markup/CSS only + affect KF8 output and not MOBI 6 output ''' + self._data_cache = {} + for item in self.oeb.manifest: + if item.media_type in XML_DOCS: + self._data_cache[item.href] = copy.deepcopy(item.data) + elif item.media_type in OEB_STYLES: + # I can't figure out how to make an efficient copy of the + # in-memory CSSStylesheet, as deepcopy doesn't work (raises an + # exception) + self._data_cache[item.href] = cssutils.parseString( + item.data.cssText) + + def data(self, item): + return self._data_cache.get(item.href, item.data) + + def create_pieces(self): + self.flows = [None] # First flow item is reserved for the text + + for item in self.oeb.spine: + root = self.data(item) + root