Move the resource packaging code out of the Mobi 6 writer so it can be shared by both Mobi 8 and Mobi 6

This commit is contained in:
Kovid Goyal 2012-04-16 00:58:03 +05:30
parent d24d70ab23
commit 49115aa77e
5 changed files with 192 additions and 100 deletions

View File

@ -6,8 +6,6 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from cStringIO import StringIO
from calibre.customize.conversion import OutputFormatPlugin
from calibre.customize.conversion import OptionRecommendation
@ -79,18 +77,9 @@ class MOBIOutput(OutputFormatPlugin):
def check_for_masthead(self):
found = 'masthead' in self.oeb.guide
if not found:
from calibre.ebooks import generate_masthead
self.oeb.log.debug('No masthead found in manifest, generating default mastheadImage...')
try:
from PIL import Image as PILImage
PILImage
except ImportError:
import Image as PILImage
raw = open(P('content_server/calibre_banner.png'), 'rb')
im = PILImage.open(raw)
of = StringIO()
im.save(of, 'GIF')
raw = of.getvalue()
raw = generate_masthead(unicode(self.oeb.metadata['title'][0]))
id, href = self.oeb.manifest.generate('masthead', 'masthead')
self.oeb.manifest.add(id, href, 'image/gif', data=raw)
self.oeb.guide.add('masthead', 'Masthead Image', href)
@ -151,17 +140,45 @@ class MOBIOutput(OutputFormatPlugin):
# Fix up the periodical href to point to first section href
toc.nodes[0].href = toc.nodes[0].nodes[0].href
def remove_html_cover(self):
from calibre.ebooks.oeb.base import OEB_DOCS
oeb = self.oeb
if not oeb.metadata.cover \
or 'cover' not in oeb.guide:
return
href = oeb.guide['cover'].href
del oeb.guide['cover']
item = oeb.manifest.hrefs[href]
if item.spine_position is not None:
self.log.warn('Found an HTML cover: ', item.href, 'removing it.',
'If you find some content missing from the output MOBI, it '
'is because you misidentified the HTML cover in the input '
'document')
oeb.spine.remove(item)
if item.media_type in OEB_DOCS:
self.oeb.manifest.remove(item)
def convert(self, oeb, output_path, input_plugin, opts, log):
from calibre.utils.config import tweaks
from calibre.ebooks.mobi.writer2.resources import Resources
self.log, self.opts, self.oeb = log, opts, oeb
kf8 = self.create_kf8()
self.write_mobi(input_plugin, output_path, kf8)
create_kf8 = tweaks.get('create_kf8', False)
self.remove_html_cover()
resources = Resources(oeb, opts, self.is_periodical,
add_fonts=create_kf8)
kf8 = self.create_kf8() if create_kf8 else None
self.write_mobi(input_plugin, output_path, kf8, resources)
def create_kf8(self):
from calibre.ebooks.mobi.writer8.main import KF8Writer
return KF8Writer(self.oeb, self.opts)
def write_mobi(self, input_plugin, output_path, kf8):
def write_mobi(self, input_plugin, output_path, kf8, resources):
from calibre.ebooks.mobi.mobiml import MobiMLizer
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
@ -180,12 +197,20 @@ class MOBIOutput(OutputFormatPlugin):
rasterizer(oeb, opts)
except Unavailable:
self.log.warn('SVG rasterizer unavailable, SVG will not be converted')
else:
# Add rasterized SVG images
# Note that this means for SVG images that are simple wrappers
# around raster images, there will now be two copies of the image
# in the MOBI file. This could probably be fixed for common cases
# by detecting it and replacing the SVG with the raster image, but
# it isn't worth the effort to me.
resources.add_extra_images()
mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
mobimlizer(oeb, opts)
self.check_for_periodical()
write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz')
from calibre.ebooks.mobi.writer2.main import MobiWriter
writer = MobiWriter(opts,
writer = MobiWriter(opts, resources, kf8,
write_page_breaks_after_item=write_page_breaks_after_item)
writer(oeb, output_path)

View File

@ -10,7 +10,7 @@ import copy
import re
from lxml import etree
from calibre.ebooks.oeb.base import namespace, barename
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, OEB_DOCS, urlnormalize
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, urlnormalize
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.transforms.flatcss import KeyMapper
from calibre.utils.magick.draw import identify_data
@ -109,26 +109,8 @@ class MobiMLizer(object):
self.profile = profile = context.dest
self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys())
self.remove_html_cover()
self.mobimlize_spine()
def remove_html_cover(self):
oeb = self.oeb
if not oeb.metadata.cover \
or 'cover' not in oeb.guide:
return
href = oeb.guide['cover'].href
del oeb.guide['cover']
item = oeb.manifest.hrefs[href]
if item.spine_position is not None:
self.log.warn('Found an HTML cover,', item.href, 'removing it.',
'If you find some content missing from the output MOBI, it '
'is because you misidentified the HTML cover in the input '
'document')
oeb.spine.remove(item)
if item.media_type in OEB_DOCS:
self.oeb.manifest.remove(item)
def mobimlize_spine(self):
'Iterate over the spine and convert it to MOBIML'
for item in self.oeb.spine:

View File

@ -11,17 +11,15 @@ import re, random, time
from cStringIO import StringIO
from struct import pack
from calibre.ebooks import normalize, generate_masthead
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
from calibre.ebooks import normalize
from calibre.ebooks.mobi.writer2.serializer import Serializer
from calibre.ebooks.compression.palmdoc import compress_doc
from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.utils.filenames import ascii_filename
from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE)
from calibre.ebooks.mobi.utils import (rescale_image, encint, mobify_image,
encode_trailing_data, align_block, detect_periodical)
from calibre.ebooks.mobi.utils import (encint, encode_trailing_data,
align_block, detect_periodical)
from calibre.ebooks.mobi.writer2.indexer import Indexer
from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE
EXTH_CODES = {
'creator': 100,
@ -50,8 +48,10 @@ WRITE_UNCROSSABLE_BREAKS = False
class MobiWriter(object):
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
def __init__(self, opts, write_page_breaks_after_item=True):
def __init__(self, opts, resources, kf8, write_page_breaks_after_item=True):
self.opts = opts
self.resources = resources
self.kf8 = kf8
self.write_page_breaks_after_item = write_page_breaks_after_item
self.compression = UNCOMPRESSED if opts.dont_compress else PALMDOC
self.prefer_author_sort = opts.prefer_author_sort
@ -151,64 +151,12 @@ class MobiWriter(object):
# Images {{{
def generate_images(self):
oeb = self.oeb
oeb.logger.info('Serializing images...')
self.image_records = []
self.image_map = {}
self.masthead_offset = 0
index = 1
mh_href = None
if 'masthead' in oeb.guide and oeb.guide['masthead'].href:
mh_href = oeb.guide['masthead'].href
self.image_records.append(None)
index += 1
elif self.is_periodical:
# Generate a default masthead
data = generate_masthead(unicode(self.oeb.metadata['title'][0]))
self.image_records.append(data)
index += 1
cover_href = self.cover_offset = self.thumbnail_offset = None
if (oeb.metadata.cover and
unicode(oeb.metadata.cover[0]) in oeb.manifest.ids):
cover_id = unicode(oeb.metadata.cover[0])
item = oeb.manifest.ids[cover_id]
cover_href = item.href
for item in self.oeb.manifest.values():
if item.media_type not in OEB_RASTER_IMAGES: continue
try:
data = item.data
if self.opts.mobi_keep_original_images:
data = mobify_image(data)
else:
data = rescale_image(data)
except:
oeb.logger.warn('Bad image file %r' % item.href)
continue
else:
if mh_href and item.href == mh_href:
self.image_records[0] = data
continue
self.image_records.append(data)
self.image_map[item.href] = index
index += 1
if cover_href and item.href == cover_href:
self.cover_offset = self.image_map[item.href] - 1
try:
data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN,
maxsizeb=MAX_THUMB_SIZE)
except:
oeb.logger.warn('Failed to generate thumbnail')
else:
self.image_records.append(data)
self.thumbnail_offset = index - 1
index += 1
finally:
item.unload_data_from_memory()
resources = self.resources
self.image_records = resources.records
self.image_map = resources.item_map
self.masthead_offset = resources.masthead_offset
self.cover_offset = resources.cover_offset
self.thumbnail_offset = resources.thumbnail_offset
if self.image_records and self.image_records[0] is None:
raise ValueError('Failed to find masthead image in manifest')

View File

@ -0,0 +1,103 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE
from calibre.ebooks.mobi.utils import (rescale_image, mobify_image)
from calibre.ebooks import generate_masthead
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
class Resources(object):
def __init__(self, oeb, opts, is_periodical, add_fonts=False):
self.oeb, self.log, self.opts = oeb, oeb.log, opts
self.is_periodical = is_periodical
self.item_map = {}
self.records = []
self.masthead_offset = 0
self.cover_offset = self.thumbnail_offset = None
self.add_resources(add_fonts)
def process_image(self, data):
return (mobify_image(data) if self.opts.mobi_keep_original_images else
rescale_image(data))
def add_resources(self, add_fonts):
oeb = self.oeb
oeb.logger.info('Serializing resources...')
index = 1
mh_href = None
if 'masthead' in oeb.guide and oeb.guide['masthead'].href:
mh_href = oeb.guide['masthead'].href
self.records.append(None)
index += 1
elif self.is_periodical:
# Generate a default masthead
data = generate_masthead(unicode(self.oeb.metadata['title'][0]))
self.records.append(data)
index += 1
cover_href = self.cover_offset = self.thumbnail_offset = None
if (oeb.metadata.cover and
unicode(oeb.metadata.cover[0]) in oeb.manifest.ids):
cover_id = unicode(oeb.metadata.cover[0])
item = oeb.manifest.ids[cover_id]
cover_href = item.href
for item in self.oeb.manifest.values():
if item.media_type not in OEB_RASTER_IMAGES: continue
try:
data = self.process_image(item.data)
except:
self.log.warn('Bad image file %r' % item.href)
continue
else:
if mh_href and item.href == mh_href:
self.records[0] = data
continue
self.records.append(data)
self.item_map[item.href] = index
index += 1
if cover_href and item.href == cover_href:
self.cover_offset = self.item_map[item.href] - 1
try:
data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN,
maxsizeb=MAX_THUMB_SIZE)
except:
self.log.warn('Failed to generate thumbnail')
else:
self.records.append(data)
self.thumbnail_offset = index - 1
index += 1
finally:
item.unload_data_from_memory()
def add_extra_images(self):
'''
Add any images that were created after the call to add_resources()
'''
for item in self.oeb.manifest.values():
if (item.media_type not in OEB_RASTER_IMAGES or item.href in
self.item_map): continue
try:
data = self.process_image(item.data)
except:
self.log.warn('Bad image file %r' % item.href)
else:
self.records.append(data)
self.item_map[item.href] = len(self.records)
finally:
item.unload_data_from_memory()

View File

@ -7,9 +7,43 @@ __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import copy
import cssutils
from calibre.ebooks.oeb.base import (OEB_DOCS, OEB_STYLES, SVG_MIME)
XML_DOCS = OEB_DOCS | {SVG_MIME}
class KF8Writer(object):
def __init__(self, oeb, opts):
self.oeb, self.opts, self.log = oeb, opts, oeb.log
self.dup_data()
self.create_pieces()
def dup_data(self):
''' Duplicate data so that any changes we make to markup/CSS only
affect KF8 output and not MOBI 6 output '''
self._data_cache = {}
for item in self.oeb.manifest:
if item.media_type in XML_DOCS:
self._data_cache[item.href] = copy.deepcopy(item.data)
elif item.media_type in OEB_STYLES:
# I can't figure out how to make an efficient copy of the
# in-memory CSSStylesheet, as deepcopy doesn't work (raises an
# exception)
self._data_cache[item.href] = cssutils.parseString(
item.data.cssText)
def data(self, item):
return self._data_cache.get(item.href, item.data)
def create_pieces(self):
self.flows = [None] # First flow item is reserved for the text
for item in self.oeb.spine:
root = self.data(item)
root