Improve handling of justification. Now calibre will explicitly change the justification of all left aligned paragraphs to justified or vice versa depending on the justification setting. This should make it possible to robustly convert all content to either justified or not. calibre will not touch centered or right aligned content.

This commit is contained in:
Kovid Goyal 2010-01-21 14:50:39 -07:00
parent 3c084bb83e
commit 24a6d43b91
13 changed files with 49 additions and 127 deletions

View File

@ -132,7 +132,8 @@ class FB2MLizer(object):
href = self.oeb_book.guide['titlepage'].href
item = self.oeb_book.manifest.hrefs[href]
if item.spine_position is None:
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
stylizer = Stylizer(item.data, item.href, self.oeb_book,
self.opts, self.opts.output_profile)
output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
return output
@ -152,7 +153,7 @@ class FB2MLizer(object):
text = []
for item in self.oeb_book.spine:
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
text.append(self.add_page_anchor(item))
text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
return ''.join(text)

View File

@ -32,7 +32,7 @@ class LITOutput(OutputFormatPlugin):
mangler(oeb, opts)
rasterizer = SVGRasterizer()
rasterizer(oeb, opts)
lit = LitWriter()
lit = LitWriter(self.opts)
lit(oeb, output_path)

View File

@ -134,7 +134,7 @@ def warn(x):
class ReBinary(object):
NSRMAP = {'': None, XML_NS: 'xml'}
def __init__(self, root, item, oeb, map=HTML_MAP):
def __init__(self, root, item, oeb, opts, map=HTML_MAP):
self.item = item
self.logger = oeb.logger
self.manifest = oeb.manifest
@ -143,7 +143,7 @@ class ReBinary(object):
self.anchors = []
self.page_breaks = []
self.is_html = is_html = map is HTML_MAP
self.stylizer = Stylizer(root, item.href, oeb) if is_html else None
self.stylizer = Stylizer(root, item.href, oeb, opts) if is_html else None
self.tree_to_binary(root)
self.content = self.buf.getvalue()
self.ahc = self.build_ahc() if is_html else None
@ -295,9 +295,8 @@ def preserve(function):
return wrapper
class LitWriter(object):
def __init__(self):
# Wow, no options
pass
def __init__(self, opts):
self.opts = opts
def _litize_oeb(self):
oeb = self._oeb
@ -469,7 +468,7 @@ class LitWriter(object):
secnum = 0
if isinstance(data, etree._Element):
self._add_folder(name)
rebin = ReBinary(data, item, self._oeb, map=HTML_MAP)
rebin = ReBinary(data, item, self._oeb, self.opts, map=HTML_MAP)
self._add_file(name + '/ahc', rebin.ahc, 0)
self._add_file(name + '/aht', rebin.aht, 0)
item.page_breaks = rebin.page_breaks
@ -562,7 +561,7 @@ class LitWriter(object):
meta.attrib['ms--minimum_level'] = '0'
meta.attrib['ms--attr5'] = '1'
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP)
rebin = ReBinary(meta, None, self._oeb, self.opts, map=OPF_MAP)
meta = rebin.content
self._meta = meta
self._add_file('/meta', meta)

View File

@ -92,6 +92,7 @@ class MobiMLizer(object):
def __call__(self, oeb, context):
oeb.logger.info('Converting XHTML to Mobipocket markup...')
self.oeb = oeb
self.opts = context
self.profile = profile = context.dest
self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys())
@ -114,7 +115,7 @@ class MobiMLizer(object):
def mobimlize_spine(self):
'Iterate over the spine and convert it to MOBIML'
for item in self.oeb.spine:
stylizer = Stylizer(item.data, item.href, self.oeb, self.profile)
stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.profile)
body = item.data.find(XHTML('body'))
nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
nbody = etree.SubElement(nroot, XHTML('body'))

View File

@ -1,99 +0,0 @@
'''
Registry associating file extensions with Reader classes.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys, os, logging
from itertools import chain
import calibre
from calibre.ebooks.oeb.base import OEBError
from calibre.ebooks.oeb.reader import OEBReader
from calibre.ebooks.oeb.writer import OEBWriter
from calibre.ebooks.lit.reader import LitReader
from calibre.ebooks.lit.writer import LitWriter
from calibre.ebooks.mobi.reader import MobiReader
from calibre.ebooks.mobi.writer import MobiWriter
from calibre.ebooks.oeb.base import OEBBook
from calibre.ebooks.oeb.profile import Context
from calibre.utils.config import Config
__all__ = ['get_reader']
REGISTRY = {
'.opf': (OEBReader, None),
'.lit': (LitReader, LitWriter),
'.mobi': (MobiReader, MobiWriter),
}
def ReaderFactory(path):
if os.path.isdir(path):
return OEBReader
ext = os.path.splitext(path)[1].lower()
Reader = REGISTRY.get(ext, (None, None))[0]
if Reader is None:
raise OEBError('Unknown e-book file extension %r' % ext)
return Reader
def WriterFactory(path):
if os.path.isdir(path):
return OEBWriter
ext = os.path.splitext(path)[1].lower()
if not os.path.exists(path) and not ext:
return OEBWriter
Writer = REGISTRY.get(ext, (None, None))[1]
if Writer is None:
raise OEBError('Unknown e-book file extension %r' % ext)
return Writer
def option_parser(Reader, Writer):
cfg = Config('ebook-convert', _('Options to control e-book conversion.'))
Reader.config(cfg)
for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
Transform.config(cfg)
Writer.config(cfg)
parser = cfg.option_parser()
parser.add_option('--encoding', default=None,
help=_('Character encoding for input. Default is to auto detect.'))
parser.add_option('-o', '--output', default=None,
help=_('Output file. Default is derived from input filename.'))
parser.add_option('-p', '--pretty-print', action='store_true',
default=False, help=_('Produce more human-readable XML output.'))
parser.add_option('-v', '--verbose', default=0, action='count',
help=_('Useful for debugging.'))
return parser
def main(argv=sys.argv):
if len(argv) < 3:
print _("Usage: ebook-convert INFILE OUTFILE [OPTIONS..]")
return 1
inpath, outpath = argv[1], argv[2]
Reader = ReaderFactory(inpath)
Writer = WriterFactory(outpath)
parser = option_parser(Reader, Writer)
opts, args = parser.parse_args(argv[3:])
if len(args) != 0:
parser.print_help()
return 1
logger = logging.getLogger('ebook-convert')
calibre.setup_cli_handlers(logger, logging.DEBUG)
encoding = opts.encoding
pretty_print = opts.pretty_print
oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger)
context = Context(Reader.DEFAULT_PROFILE, Writer.DEFAULT_PROFILE)
reader = Reader.generate(opts)
writer = Writer.generate(opts)
transforms = []
for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
transforms.append(Transform.generate(opts))
reader(oeb, inpath)
for transform in transforms:
transform(oeb, context)
writer(oeb, outpath)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -110,9 +110,9 @@ class CSSSelector(etree.XPath):
class Stylizer(object):
STYLESHEETS = WeakKeyDictionary()
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505'],
def __init__(self, tree, path, oeb, opts, profile=PROFILES['PRS505'],
extra_css='', user_css=''):
self.oeb = oeb
self.oeb, self.opts = oeb, opts
self.profile = profile
self.logger = oeb.logger
item = oeb.manifest.hrefs[path]
@ -249,6 +249,8 @@ class Stylizer(object):
style.update(self._normalize_font(prop.cssValue))
elif name == 'list-style':
style.update(self._normalize_list_style(prop.cssValue))
elif name == 'text-align':
style.update(self._normalize_text_align(prop.cssValue))
else:
style[name] = prop.value
if 'font-size' in style:
@ -306,6 +308,19 @@ class Stylizer(object):
return style
def _normalize_text_align(self, cssvalue):
style = {}
text = cssvalue.cssText
if text == 'inherit':
style['text-align'] = 'inherit'
else:
if text in ('left', 'justify'):
val = 'left' if self.opts.dont_justify else 'justify'
style['text-align'] = val
else:
style['text-align'] = text
return style
def _normalize_font(self, cssvalue):
composition = ('font-style', 'font-variant', 'font-weight',
'font-size', 'line-height', 'font-family')

View File

@ -141,7 +141,7 @@ class CSSFlattener(object):
bs.append('text-align: '+ \
('left' if self.context.dont_justify else 'justify'))
body.set('style', '; '.join(bs))
stylizer = Stylizer(html, item.href, self.oeb, profile,
stylizer = Stylizer(html, item.href, self.oeb, self.context, profile,
user_css=self.context.extra_css,
extra_css=css)
self.stylizers[item] = stylizer

View File

@ -33,6 +33,7 @@ class CaseMangler(object):
def __call__(self, oeb, context):
oeb.logger.info('Applying case-transforming CSS...')
self.oeb = oeb
self.opts = context
self.profile = context.source
self.mangle_spine()
@ -44,7 +45,7 @@ class CaseMangler(object):
relhref = item.relhref(href)
etree.SubElement(html.find(XHTML('head')), XHTML('link'),
rel='stylesheet', href=relhref, type=CSS_MIME)
stylizer = Stylizer(html, item.href, self.oeb, self.profile)
stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
self.mangle_elem(html.find(XHTML('body')), stylizer)
def text_transform(self, transform, text):

View File

@ -44,6 +44,7 @@ class SVGRasterizer(object):
def __call__(self, oeb, context):
oeb.logger.info('Rasterizing SVG images...')
self.oeb = oeb
self.opts = context
self.profile = context.dest
self.images = {}
self.dataize_manifest()
@ -102,7 +103,7 @@ class SVGRasterizer(object):
def rasterize_spine(self):
for item in self.oeb.spine:
html = item.data
stylizer = Stylizer(html, item.href, self.oeb, self.profile)
stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
self.rasterize_item(item, stylizer)
def rasterize_item(self, item, stylizer):

View File

@ -113,7 +113,8 @@ class PMLMLizer(object):
href = self.oeb_book.guide['titlepage'].href
item = self.oeb_book.manifest.hrefs[href]
if item.spine_position is None:
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
stylizer = Stylizer(item.data, item.href, self.oeb_book,
self.opts, self.opts.output_profile)
output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
return output

View File

@ -90,7 +90,8 @@ class RBMLizer(object):
href = self.oeb_book.guide['titlepage'].href
item = self.oeb_book.manifest.hrefs[href]
if item.spine_position is None:
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
stylizer = Stylizer(item.data, item.href, self.oeb_book,
self.opts, self.opts.output_profile)
output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
return output
@ -111,7 +112,7 @@ class RBMLizer(object):
output = [u'']
for item in self.oeb_book.spine:
self.log.debug('Converting %s to RocketBook HTML...' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
output.append(self.add_page_anchor(item))
output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
return ''.join(output)

View File

@ -111,12 +111,13 @@ class RTFMLizer(object):
href = self.oeb_book.guide['titlepage'].href
item = self.oeb_book.manifest.hrefs[href]
if item.spine_position is None:
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
stylizer = Stylizer(item.data, item.href, self.oeb_book,
self.opts, self.opts.output_profile)
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
output += '{\\page } '
for item in self.oeb_book.spine:
self.log.debug('Converting %s to RTF markup...' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
output += self.footer()
output = self.insert_images(output)

View File

@ -54,7 +54,7 @@ class TXTMLizer(object):
output.append(self.get_toc())
for item in self.oeb_book.spine:
self.log.debug('Converting %s to TXT...' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
content = self.remove_newlines(content)
output += self.dump_text(etree.fromstring(content), stylizer)