Mobipocket support:

- Actually command-line oeb2mobi
  - Fixes for a few minor bugs
  - Ensure LIT support still works
This commit is contained in:
Marshall T. Vandegrift 2009-01-11 21:45:28 -05:00
parent 2397ee84bf
commit 8cd38455f2
10 changed files with 167 additions and 93 deletions

View File

@ -27,11 +27,16 @@ from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES, OEB_CSS_MIME, \
CSS_MIME, OPF_MIME, XML_NS, XML
from calibre.ebooks.oeb.base import namespace, barename, prefixname, \
urlnormalize, xpath
from calibre.ebooks.oeb.base import FauxLogger, OEBBook
from calibre.ebooks.oeb.base import Logger, OEBBook
from calibre.ebooks.oeb.profile import Context
from calibre.ebooks.oeb.stylizer import Stylizer
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
from calibre.ebooks.lit.lzx import Compressor
import calibre
from calibre import LoggingInterface
from calibre import plugins
msdes, msdeserror = plugins['msdes']
import calibre.ebooks.lit.mssha1 as mssha1
@ -138,9 +143,9 @@ def warn(x):
class ReBinary(object):
NSRMAP = {'': None, XML_NS: 'xml'}
def __init__(self, root, path, oeb, map=HTML_MAP, logger=FauxLogger()):
def __init__(self, root, path, oeb, map=HTML_MAP):
self.path = path
self.logger = logger
self.logger = oeb.logger
self.dir = os.path.dirname(path)
self.manifest = oeb.manifest
self.tags, self.tattrs = map
@ -294,10 +299,9 @@ def preserve(function):
return wrapper
class LitWriter(object):
def __init__(self, oeb, logger=FauxLogger()):
self._oeb = oeb
self._logger = logger
self._litize_oeb()
def __init__(self):
# Wow, no options
pass
def _litize_oeb(self):
oeb = self._oeb
@ -306,32 +310,27 @@ class LitWriter(object):
if oeb.metadata.cover:
id = str(oeb.metadata.cover[0])
cover = oeb.manifest[id]
elif MS_COVER_TYPE in oeb.guide:
href = oeb.guide[MS_COVER_TYPE].href
cover = oeb.manifest.hrefs[href]
elif 'cover' in oeb.guide:
href = oeb.guide['cover'].href
cover = oeb.manifest.hrefs[href]
else:
html = oeb.spine[0].data
imgs = xpath(html, '//img[position()=1]')
href = imgs[0].get('src') if imgs else None
cover = oeb.manifest.hrefs[href] if href else None
if cover:
if not oeb.metadata.cover:
oeb.metadata.add('cover', cover.id)
for type, title in ALL_MS_COVER_TYPES:
if type not in oeb.guide:
oeb.guide.add(type, title, cover.href)
else:
self._logger.log_warn('No suitable cover image found.')
self._logger.warn('No suitable cover image found.')
def dump(self, stream):
def dump(self, oeb, path):
if hasattr(path, 'write'):
return self._dump_stream(oeb, path)
with open(path, 'w+b') as stream:
return self._dump_stream(oeb, stream)
def _dump_stream(self, oeb, stream):
self._oeb = oeb
self._logger = oeb.logger
self._stream = stream
self._sections = [StringIO() for i in xrange(4)]
self._directory = []
self._meta = None
self._dump()
self._litize_oeb()
self._write_content()
def _write(self, *data):
for datum in data:
@ -345,7 +344,7 @@ class LitWriter(object):
def _tell(self):
return self._stream.tell()
def _dump(self):
def _write_content(self):
# Build content sections
self._build_sections()
@ -474,8 +473,7 @@ class LitWriter(object):
secnum = 0
if not isinstance(data, basestring):
self._add_folder(name)
rebin = ReBinary(data, item.href, self._oeb, map=HTML_MAP,
logger=self._logger)
rebin = ReBinary(data, item.href, self._oeb, map=HTML_MAP)
self._add_file(name + '/ahc', rebin.ahc, 0)
self._add_file(name + '/aht', rebin.aht, 0)
item.page_breaks = rebin.page_breaks
@ -554,8 +552,7 @@ class LitWriter(object):
meta.attrib['ms--minimum_level'] = '0'
meta.attrib['ms--attr5'] = '1'
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP,
logger=self._logger)
rebin = ReBinary(meta, 'content.opf', self._oeb, map=OPF_MAP)
meta = rebin.content
self._meta = meta
self._add_file('/meta', meta)
@ -719,19 +716,33 @@ def option_parser():
help=_('Useful for debugging.'))
return parser
def oeb2lit(opts, opfpath):
logger = LoggingInterface(logging.getLogger('oeb2lit'))
def oeb2lit(opts, inpath):
logger = Logger(logging.getLogger('oeb2lit'))
logger.setup_cli_handler(opts.verbose)
litpath = opts.output
if litpath is None:
litpath = os.path.basename(opfpath)
litpath = os.path.splitext(litpath)[0] + '.lit'
litpath = os.path.abspath(litpath)
lit = LitWriter(OEBBook(opfpath, logger=logger), logger=logger)
with open(litpath, 'wb') as f:
lit.dump(f)
run_plugins_on_postprocess(litpath, 'lit')
logger.log_info(_('Output written to ')+litpath)
outpath = opts.output
if outpath is None:
outpath = os.path.basename(inpath)
outpath = os.path.splitext(outpath)[0] + '.lit'
outpath = os.path.abspath(outpath)
context = Context('Firefox', 'MSReader')
oeb = OEBBook(inpath, logger=logger)
tocadder = HTMLTOCAdder()
tocadder.transform(oeb, context)
mangler = CaseMangler()
mangler.transform(oeb, context)
fbase = context.dest.fbase
fkey = context.dest.fnames.values()
flattener = CSSFlattener(
fbase=fbase, fkey=fkey, unfloat=True, untable=True)
flattener.transform(oeb, context)
rasterizer = SVGRasterizer()
rasterizer.transform(oeb, context)
trimmer = ManifestTrimmer()
trimmer.transform(oeb, context)
lit = LitWriter()
lit.dump(oeb, outpath)
run_plugins_on_postprocess(outpath, 'lit')
logger.info(_('Output written to ') + outpath)
def main(argv=sys.argv):
@ -740,8 +751,8 @@ def main(argv=sys.argv):
if len(args) != 1:
parser.print_help()
return 1
opfpath = args[0]
oeb2lit(opts, opfpath)
inpath = args[0]
oeb2lit(opts, inpath)
return 0
if __name__ == '__main__':

View File

@ -114,10 +114,10 @@ class MobiMLizer(object):
def mobimlize_measure(self, ptsize):
if isinstance(ptsize, basestring):
return ptsize
fbase = self.profile.fbase
if ptsize < fbase:
embase = self.profile.fbase
if ptsize < embase:
return "%dpt" % int(round(ptsize))
return "%dem" % int(round(ptsize / fbase))
return "%dem" % int(round(ptsize / embase))
def preize_text(self, text):
text = unicode(text).replace(u' ', u'\xa0')

View File

@ -17,12 +17,13 @@ import re
from itertools import izip, count
from collections import defaultdict
from urlparse import urldefrag
import logging
from lxml import etree
from PIL import Image
from calibre.ebooks.oeb.base import XML_NS, XHTML, XHTML_NS, OEB_DOCS, \
OEB_RASTER_IMAGES
from calibre.ebooks.oeb.base import xpath, barename, namespace, prefixname
from calibre.ebooks.oeb.base import FauxLogger, OEBBook
from calibre.ebooks.oeb.base import Logger, OEBBook
from calibre.ebooks.oeb.profile import Context
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
@ -32,12 +33,12 @@ from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
from calibre.ebooks.mobi.palmdoc import compress_doc
from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.ebooks.mobi.mobiml import MBP_NS, MBP, MobiMLizer
from calibre.customize.ui import run_plugins_on_postprocess
# TODO:
# - Allow override CSS (?)
# - Generate index records
# - Generate in-content ToC
# - Command line options, etc.
# - Optionally rasterize tables
EXTH_CODES = {
'creator': 100,
@ -60,7 +61,8 @@ UNCOMPRESSED = 1
PALMDOC = 2
HUFFDIC = 17480
MAX_IMAGE_SIZE = 63 * 1024
PALM_MAX_IMAGE_SIZE = 63 * 1024
OTHER_MAX_IMAGE_SIZE = 10 * 1024 * 1024
MAX_THUMB_SIZE = 16 * 1024
MAX_THUMB_DIMEN = (180, 240)
@ -115,7 +117,7 @@ class Serializer(object):
buffer.write('<guide>')
for ref in self.oeb.guide.values():
path, frag = urldefrag(ref.href)
if hrefs[path].media_type not in OEB_DOCS or
if hrefs[path].media_type not in OEB_DOCS or \
not ref.title:
continue
buffer.write('<reference title="%s" type="%s" '
@ -229,9 +231,9 @@ class Serializer(object):
class MobiWriter(object):
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
def __init__(self, compression=None, logger=FauxLogger()):
def __init__(self, compression=None, imagemax=None):
self._compression = compression or UNCOMPRESSED
self._logger = logger
self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
def dump(self, oeb, path):
if hasattr(path, 'write'):
@ -307,6 +309,8 @@ class MobiWriter(object):
text = StringIO(text)
nrecords = 0
offset = 0
if self._compression != UNCOMPRESSED:
self._oeb.logger.info('Compressing markup content...')
data, overlap = self._read_text_record(text)
while len(data) > 0:
if self._compression == PALMDOC:
@ -382,7 +386,7 @@ class MobiWriter(object):
coverid = metadata.cover[0] if metadata.cover else None
for _, href in images:
item = self._oeb.manifest.hrefs[href]
data = self._rescale_image(item.data, MAX_IMAGE_SIZE)
data = self._rescale_image(item.data, self._imagemax)
self._records.append(data)
def _generate_record0(self):
@ -476,30 +480,62 @@ class MobiWriter(object):
self._write(record)
def main(argv=sys.argv):
from calibre.ebooks.oeb.base import DirWriter
inpath, outpath = argv[1:]
def option_parser():
from calibre.utils.config import OptionParser
parser = OptionParser(usage=_('%prog [options] OPFFILE'))
parser.add_option(
'-o', '--output', default=None,
help=_('Output file. Default is derived from input filename.'))
parser.add_option(
'-c', '--compress', default=False, action='store_true',
help=_('Compress file text using PalmDOC compression.'))
parser.add_option(
'-r', '--rescale-images', default=False, action='store_true',
help=_('Modify images to meet Palm device size limitations.'))
parser.add_option(
'-v', '--verbose', default=False, action='store_true',
help=_('Useful for debugging.'))
return parser
def oeb2mobi(opts, inpath):
logger = Logger(logging.getLogger('oeb2mobi'))
logger.setup_cli_handler(opts.verbose)
outpath = opts.output
if outpath is None:
outpath = os.path.basename(inpath)
outpath = os.path.splitext(outpath)[0] + '.mobi'
compression = PALMDOC if opts.compress else UNCOMPRESSED
imagemax = MAX_IMAGE_SIZE if opts.rescale_images else None
context = Context('Firefox', 'EZReader')
oeb = OEBBook(inpath)
#writer = MobiWriter(compression=PALMDOC)
writer = MobiWriter(compression=UNCOMPRESSED)
#writer = DirWriter()
oeb = OEBBook(inpath, logger=logger)
tocadder = HTMLTOCAdder()
tocadder.transform(oeb, context)
mangler = CaseMangler()
mangler.transform(oeb, context)
fbase = context.dest.fbase
fkey = context.dest.fnums.values()
tocadder = HTMLTOCAdder()
mangler = CaseMangler()
flattener = CSSFlattener(
fbase=fbase, fkey=fkey, unfloat=True, untable=True)
rasterizer = SVGRasterizer()
trimmer = ManifestTrimmer()
mobimlizer = MobiMLizer()
tocadder.transform(oeb, context)
mangler.transform(oeb, context)
flattener.transform(oeb, context)
rasterizer = SVGRasterizer()
rasterizer.transform(oeb, context)
mobimlizer.transform(oeb, context)
trimmer = ManifestTrimmer()
trimmer.transform(oeb, context)
mobimlizer = MobiMLizer()
mobimlizer.transform(oeb, context)
writer = MobiWriter(compression=compression, imagemax=imagemax)
writer.dump(oeb, outpath)
run_plugins_on_postprocess(outpath, 'mobi')
logger.info(_('Output written to ') + outpath)
def main(argv=sys.argv):
parser = option_parser()
opts, args = parser.parse_args(argv[1:])
if len(args) != 1:
parser.print_help()
return 1
inpath = args[0]
oeb2mobi(opts, inpath)
return 0
if __name__ == '__main__':

View File

@ -372,6 +372,9 @@ class Manifest(object):
def __eq__(self, other):
return id(self) == id(other)
def __ne__(self, other):
return not self.__eq__(other)
def __cmp__(self, other):
result = cmp(self.spine_position, other.spine_position)
if result != 0:

View File

@ -36,23 +36,26 @@ PROFILES = {
fsizes=[7.5, 9, 10, 12, 15.5, 20, 22, 24]),
'MSReader':
Profile(width=480, height=652, dpi=100.0, fbase=13,
Profile(width=480, height=652, dpi=96, fbase=13,
fsizes=[10, 11, 13, 16, 18, 20, 22, 26]),
# Not really, but let's pretend
'MobiDesktop':
'Mobipocket':
Profile(width=600, height=800, dpi=96, fbase=18,
fsizes=[14, 14, 16, 18, 20, 22, 24, 26]),
# No clue on usable screen size; DPI should be good
'EZReader':
Profile(width=584, height=754, dpi=168.451, fbase=18,
fsizes=[14, 14, 16, 18, 20, 22, 24, 26]),
Profile(width=584, height=754, dpi=168.451, fbase=16,
fsizes=[12, 12, 14, 16, 18, 21, 24, 28]),
# No clue on usable screen size; DPI should be good
'CybookG3':
Profile(width=584, height=754, dpi=168.451, fbase=12,
fsizes=[9, 10, 11, 12, 14, 17, 20, 24]),
Profile(width=584, height=754, dpi=168.451, fbase=16,
fsizes=[12, 12, 14, 16, 18, 21, 24, 28]),
'Kindle':
Profile(width=525, height=640, dpi=168.451, fbase=16,
fsizes=[12, 12, 14, 16, 18, 21, 24, 28]),
'Firefox':
Profile(width=800, height=600, dpi=100.0, fbase=12,

View File

@ -23,7 +23,7 @@ from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
from lxml import etree
from lxml.cssselect import css_to_xpath, ExpressionError
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
from calibre.ebooks.oeb.base import barename, urlnormalize
from calibre.ebooks.oeb.base import XPNSMAP, xpath, barename, urlnormalize
from calibre.ebooks.oeb.profile import PROFILES
from calibre.resources import html_css
@ -87,10 +87,6 @@ FONT_SIZE_NAMES = set(['xx-small', 'x-small', 'small', 'medium', 'large',
'x-large', 'xx-large'])
XPNSMAP = {'h': XHTML_NS,}
def xpath(elem, expr):
return elem.xpath(expr, namespaces=XPNSMAP)
class CSSSelector(etree.XPath):
MIN_SPACE_RE = re.compile(r' *([>~+]) *')
LOCAL_NAME_RE = re.compile(r"(?<!local-)name[(][)] *= *'[^:]+:")
@ -269,6 +265,7 @@ class Style(object):
self._fontSize = None
self._width = None
self._height = None
self._lineHeight = None
stylizer._styles[element] = self
def _update_cssdict(self, cssdict):
@ -427,6 +424,27 @@ class Style(object):
self._height = result
return self._height
@property
def lineHeight(self):
if self._lineHeight is None:
result = None
parent = self._getparent()
if 'line-height' in self._style:
lineh = self._style['line-height']
try:
float(lineh)
except ValueError:
result = self._unit_convert(lineh, base=self.fontSize)
else:
result = float(lineh) * self.fontSize
elif parent is not None:
# TODO: proper inheritance
result = parent.lineHeight
else:
result = 1.2 * self.fontSize
self._lineHeight = result
return self._lineHeight
def __str__(self):
items = self._style.items()
items.sort()

View File

@ -186,7 +186,6 @@ class CSSFlattener(object):
cssdict['margin-left'] = "%d%%" % (percent * 100)
left -= style['text-indent']
if self.unfloat and 'float' in cssdict \
and tag not in ('img', 'object') \
and cssdict.get('display', 'none') != 'none':
del cssdict['display']
if self.untable and 'display' in cssdict \

View File

@ -66,7 +66,8 @@ class HTMLTOCAdder(object):
element(head, XHTML('link'), rel='stylesheet', type=CSS_MIME,
href=css_href)
body = element(contents, XHTML('body'),
attrib={'class': 'calibre_toc'})
attrib={'id': 'calibre_toc',
'class': 'calibre_toc'})
h1 = element(body, XHTML('h1'),
attrib={'class': 'calibre_toc_header'})
h1.text = 'Table of Contents'
@ -74,13 +75,13 @@ class HTMLTOCAdder(object):
id, href = oeb.manifest.generate('contents', 'contents.xhtml')
item = oeb.manifest.add(id, href, XHTML_MIME, data=contents)
oeb.spine.add(item, linear=False)
oeb.guide.add('toc', 'Table of Contents', href)
oeb.guide.add('toc', 'Table of Contents', href + '#calibre_toc')
def add_toc_level(self, elem, toc):
for node in toc:
block = element(elem, XHTML('div'),
attrib={'class': 'calibre_toc_block'})
line = element(elem, XHTML('a'),
line = element(block, XHTML('a'),
attrib={'href': node.href,
'class': 'calibre_toc_line'})
line.text = node.title

View File

@ -21,7 +21,7 @@ from PyQt4.QtGui import QPainter
from PyQt4.QtSvg import QSvgRenderer
from PyQt4.QtGui import QApplication
from calibre.ebooks.oeb.base import XHTML_NS, XHTML, SVG_NS, SVG, XLINK
from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME
from calibre.ebooks.oeb.base import SVG_MIME, PNG_MIME, JPEG_MIME
from calibre.ebooks.oeb.base import xml2str, xpath, namespace, barename
from calibre.ebooks.oeb.stylizer import Stylizer
@ -41,7 +41,7 @@ class SVGRasterizer(object):
self.rasterize_spine()
self.rasterize_cover()
def rasterize_svg(self, elem, width=0, height=0):
def rasterize_svg(self, elem, width=0, height=0, format='PNG'):
data = QByteArray(xml2str(elem))
svg = QSvgRenderer(data)
size = svg.defaultSize()
@ -63,7 +63,7 @@ class SVGRasterizer(object):
array = QByteArray()
buffer = QBuffer(array)
buffer.open(QIODevice.WriteOnly)
image.save(buffer, 'PNG')
image.save(buffer, format)
return str(array)
def dataize_manifest(self):
@ -171,6 +171,8 @@ class SVGRasterizer(object):
manifest.add(id, href, PNG_MIME, data=data)
self.images[key] = href
elem.tag = XHTML('img')
for attr in elem.attrib:
del elem.attrib[attr]
elem.attrib['src'] = item.relhref(href)
elem.text = None
for child in elem:

View File

@ -54,6 +54,7 @@ entry_points = {
'isbndb = calibre.ebooks.metadata.isbndb:main',
'librarything = calibre.ebooks.metadata.library_thing:main',
'mobi2oeb = calibre.ebooks.mobi.reader:main',
'oeb2mobi = calibre.ebooks.mobi.writer:main',
'lrf2html = calibre.ebooks.lrf.html.convert_to:main',
'lit2oeb = calibre.ebooks.lit.reader:main',
'oeb2lit = calibre.ebooks.lit.writer:main',