Improve handling of justification. Now calibre will explicitly change the justification of all left aligned paragraphs to justified or vice versa depending on the justification setting. This should make it possible to robustly convert all content to either justified or not. calibre will not touch centered or right aligned content.

This commit is contained in:
Kovid Goyal 2010-01-21 14:50:39 -07:00
parent 3c084bb83e
commit 24a6d43b91
13 changed files with 49 additions and 127 deletions

View File

@ -132,7 +132,8 @@ class FB2MLizer(object):
href = self.oeb_book.guide['titlepage'].href href = self.oeb_book.guide['titlepage'].href
item = self.oeb_book.manifest.hrefs[href] item = self.oeb_book.manifest.hrefs[href]
if item.spine_position is None: if item.spine_position is None:
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book,
self.opts, self.opts.output_profile)
output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item)) output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
return output return output
@ -152,7 +153,7 @@ class FB2MLizer(object):
text = [] text = []
for item in self.oeb_book.spine: for item in self.oeb_book.spine:
self.log.debug('Converting %s to FictionBook2 XML' % item.href) self.log.debug('Converting %s to FictionBook2 XML' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
text.append(self.add_page_anchor(item)) text.append(self.add_page_anchor(item))
text += self.dump_text(item.data.find(XHTML('body')), stylizer, item) text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
return ''.join(text) return ''.join(text)

View File

@ -32,7 +32,7 @@ class LITOutput(OutputFormatPlugin):
mangler(oeb, opts) mangler(oeb, opts)
rasterizer = SVGRasterizer() rasterizer = SVGRasterizer()
rasterizer(oeb, opts) rasterizer(oeb, opts)
lit = LitWriter() lit = LitWriter(self.opts)
lit(oeb, output_path) lit(oeb, output_path)

View File

@ -134,7 +134,7 @@ def warn(x):
class ReBinary(object): class ReBinary(object):
NSRMAP = {'': None, XML_NS: 'xml'} NSRMAP = {'': None, XML_NS: 'xml'}
def __init__(self, root, item, oeb, map=HTML_MAP): def __init__(self, root, item, oeb, opts, map=HTML_MAP):
self.item = item self.item = item
self.logger = oeb.logger self.logger = oeb.logger
self.manifest = oeb.manifest self.manifest = oeb.manifest
@ -143,7 +143,7 @@ class ReBinary(object):
self.anchors = [] self.anchors = []
self.page_breaks = [] self.page_breaks = []
self.is_html = is_html = map is HTML_MAP self.is_html = is_html = map is HTML_MAP
self.stylizer = Stylizer(root, item.href, oeb) if is_html else None self.stylizer = Stylizer(root, item.href, oeb, opts) if is_html else None
self.tree_to_binary(root) self.tree_to_binary(root)
self.content = self.buf.getvalue() self.content = self.buf.getvalue()
self.ahc = self.build_ahc() if is_html else None self.ahc = self.build_ahc() if is_html else None
@ -295,9 +295,8 @@ def preserve(function):
return wrapper return wrapper
class LitWriter(object): class LitWriter(object):
def __init__(self): def __init__(self, opts):
# Wow, no options self.opts = opts
pass
def _litize_oeb(self): def _litize_oeb(self):
oeb = self._oeb oeb = self._oeb
@ -469,7 +468,7 @@ class LitWriter(object):
secnum = 0 secnum = 0
if isinstance(data, etree._Element): if isinstance(data, etree._Element):
self._add_folder(name) self._add_folder(name)
rebin = ReBinary(data, item, self._oeb, map=HTML_MAP) rebin = ReBinary(data, item, self._oeb, self.opts, map=HTML_MAP)
self._add_file(name + '/ahc', rebin.ahc, 0) self._add_file(name + '/ahc', rebin.ahc, 0)
self._add_file(name + '/aht', rebin.aht, 0) self._add_file(name + '/aht', rebin.aht, 0)
item.page_breaks = rebin.page_breaks item.page_breaks = rebin.page_breaks
@ -562,7 +561,7 @@ class LitWriter(object):
meta.attrib['ms--minimum_level'] = '0' meta.attrib['ms--minimum_level'] = '0'
meta.attrib['ms--attr5'] = '1' meta.attrib['ms--attr5'] = '1'
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper() meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP) rebin = ReBinary(meta, None, self._oeb, self.opts, map=OPF_MAP)
meta = rebin.content meta = rebin.content
self._meta = meta self._meta = meta
self._add_file('/meta', meta) self._add_file('/meta', meta)

View File

@ -92,6 +92,7 @@ class MobiMLizer(object):
def __call__(self, oeb, context): def __call__(self, oeb, context):
oeb.logger.info('Converting XHTML to Mobipocket markup...') oeb.logger.info('Converting XHTML to Mobipocket markup...')
self.oeb = oeb self.oeb = oeb
self.opts = context
self.profile = profile = context.dest self.profile = profile = context.dest
self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items()) self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys()) self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys())
@ -114,7 +115,7 @@ class MobiMLizer(object):
def mobimlize_spine(self): def mobimlize_spine(self):
'Iterate over the spine and convert it to MOBIML' 'Iterate over the spine and convert it to MOBIML'
for item in self.oeb.spine: for item in self.oeb.spine:
stylizer = Stylizer(item.data, item.href, self.oeb, self.profile) stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.profile)
body = item.data.find(XHTML('body')) body = item.data.find(XHTML('body'))
nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP) nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
nbody = etree.SubElement(nroot, XHTML('body')) nbody = etree.SubElement(nroot, XHTML('body'))

View File

@ -1,99 +0,0 @@
'''
Registry associating file extensions with Reader classes.
'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys, os, logging
from itertools import chain
import calibre
from calibre.ebooks.oeb.base import OEBError
from calibre.ebooks.oeb.reader import OEBReader
from calibre.ebooks.oeb.writer import OEBWriter
from calibre.ebooks.lit.reader import LitReader
from calibre.ebooks.lit.writer import LitWriter
from calibre.ebooks.mobi.reader import MobiReader
from calibre.ebooks.mobi.writer import MobiWriter
from calibre.ebooks.oeb.base import OEBBook
from calibre.ebooks.oeb.profile import Context
from calibre.utils.config import Config
__all__ = ['get_reader']
REGISTRY = {
'.opf': (OEBReader, None),
'.lit': (LitReader, LitWriter),
'.mobi': (MobiReader, MobiWriter),
}
def ReaderFactory(path):
if os.path.isdir(path):
return OEBReader
ext = os.path.splitext(path)[1].lower()
Reader = REGISTRY.get(ext, (None, None))[0]
if Reader is None:
raise OEBError('Unknown e-book file extension %r' % ext)
return Reader
def WriterFactory(path):
if os.path.isdir(path):
return OEBWriter
ext = os.path.splitext(path)[1].lower()
if not os.path.exists(path) and not ext:
return OEBWriter
Writer = REGISTRY.get(ext, (None, None))[1]
if Writer is None:
raise OEBError('Unknown e-book file extension %r' % ext)
return Writer
def option_parser(Reader, Writer):
cfg = Config('ebook-convert', _('Options to control e-book conversion.'))
Reader.config(cfg)
for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
Transform.config(cfg)
Writer.config(cfg)
parser = cfg.option_parser()
parser.add_option('--encoding', default=None,
help=_('Character encoding for input. Default is to auto detect.'))
parser.add_option('-o', '--output', default=None,
help=_('Output file. Default is derived from input filename.'))
parser.add_option('-p', '--pretty-print', action='store_true',
default=False, help=_('Produce more human-readable XML output.'))
parser.add_option('-v', '--verbose', default=0, action='count',
help=_('Useful for debugging.'))
return parser
def main(argv=sys.argv):
if len(argv) < 3:
print _("Usage: ebook-convert INFILE OUTFILE [OPTIONS..]")
return 1
inpath, outpath = argv[1], argv[2]
Reader = ReaderFactory(inpath)
Writer = WriterFactory(outpath)
parser = option_parser(Reader, Writer)
opts, args = parser.parse_args(argv[3:])
if len(args) != 0:
parser.print_help()
return 1
logger = logging.getLogger('ebook-convert')
calibre.setup_cli_handlers(logger, logging.DEBUG)
encoding = opts.encoding
pretty_print = opts.pretty_print
oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger)
context = Context(Reader.DEFAULT_PROFILE, Writer.DEFAULT_PROFILE)
reader = Reader.generate(opts)
writer = Writer.generate(opts)
transforms = []
for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
transforms.append(Transform.generate(opts))
reader(oeb, inpath)
for transform in transforms:
transform(oeb, context)
writer(oeb, outpath)
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -110,9 +110,9 @@ class CSSSelector(etree.XPath):
class Stylizer(object): class Stylizer(object):
STYLESHEETS = WeakKeyDictionary() STYLESHEETS = WeakKeyDictionary()
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505'], def __init__(self, tree, path, oeb, opts, profile=PROFILES['PRS505'],
extra_css='', user_css=''): extra_css='', user_css=''):
self.oeb = oeb self.oeb, self.opts = oeb, opts
self.profile = profile self.profile = profile
self.logger = oeb.logger self.logger = oeb.logger
item = oeb.manifest.hrefs[path] item = oeb.manifest.hrefs[path]
@ -249,6 +249,8 @@ class Stylizer(object):
style.update(self._normalize_font(prop.cssValue)) style.update(self._normalize_font(prop.cssValue))
elif name == 'list-style': elif name == 'list-style':
style.update(self._normalize_list_style(prop.cssValue)) style.update(self._normalize_list_style(prop.cssValue))
elif name == 'text-align':
style.update(self._normalize_text_align(prop.cssValue))
else: else:
style[name] = prop.value style[name] = prop.value
if 'font-size' in style: if 'font-size' in style:
@ -306,6 +308,19 @@ class Stylizer(object):
return style return style
def _normalize_text_align(self, cssvalue):
style = {}
text = cssvalue.cssText
if text == 'inherit':
style['text-align'] = 'inherit'
else:
if text in ('left', 'justify'):
val = 'left' if self.opts.dont_justify else 'justify'
style['text-align'] = val
else:
style['text-align'] = text
return style
def _normalize_font(self, cssvalue): def _normalize_font(self, cssvalue):
composition = ('font-style', 'font-variant', 'font-weight', composition = ('font-style', 'font-variant', 'font-weight',
'font-size', 'line-height', 'font-family') 'font-size', 'line-height', 'font-family')

View File

@ -141,7 +141,7 @@ class CSSFlattener(object):
bs.append('text-align: '+ \ bs.append('text-align: '+ \
('left' if self.context.dont_justify else 'justify')) ('left' if self.context.dont_justify else 'justify'))
body.set('style', '; '.join(bs)) body.set('style', '; '.join(bs))
stylizer = Stylizer(html, item.href, self.oeb, profile, stylizer = Stylizer(html, item.href, self.oeb, self.context, profile,
user_css=self.context.extra_css, user_css=self.context.extra_css,
extra_css=css) extra_css=css)
self.stylizers[item] = stylizer self.stylizers[item] = stylizer

View File

@ -29,13 +29,14 @@ class CaseMangler(object):
@classmethod @classmethod
def generate(cls, opts): def generate(cls, opts):
return cls() return cls()
def __call__(self, oeb, context): def __call__(self, oeb, context):
oeb.logger.info('Applying case-transforming CSS...') oeb.logger.info('Applying case-transforming CSS...')
self.oeb = oeb self.oeb = oeb
self.opts = context
self.profile = context.source self.profile = context.source
self.mangle_spine() self.mangle_spine()
def mangle_spine(self): def mangle_spine(self):
id, href = self.oeb.manifest.generate('manglecase', 'manglecase.css') id, href = self.oeb.manifest.generate('manglecase', 'manglecase.css')
self.oeb.manifest.add(id, href, CSS_MIME, data=CASE_MANGLER_CSS) self.oeb.manifest.add(id, href, CSS_MIME, data=CASE_MANGLER_CSS)
@ -44,9 +45,9 @@ class CaseMangler(object):
relhref = item.relhref(href) relhref = item.relhref(href)
etree.SubElement(html.find(XHTML('head')), XHTML('link'), etree.SubElement(html.find(XHTML('head')), XHTML('link'),
rel='stylesheet', href=relhref, type=CSS_MIME) rel='stylesheet', href=relhref, type=CSS_MIME)
stylizer = Stylizer(html, item.href, self.oeb, self.profile) stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
self.mangle_elem(html.find(XHTML('body')), stylizer) self.mangle_elem(html.find(XHTML('body')), stylizer)
def text_transform(self, transform, text): def text_transform(self, transform, text):
if transform == 'capitalize': if transform == 'capitalize':
return text.title() return text.title()
@ -55,7 +56,7 @@ class CaseMangler(object):
elif transform == 'lowercase': elif transform == 'lowercase':
return text.lower() return text.lower()
return text return text
def split_text(self, text): def split_text(self, text):
results = [''] results = ['']
isupper = text[0].isupper() isupper = text[0].isupper()
@ -66,7 +67,7 @@ class CaseMangler(object):
isupper = not isupper isupper = not isupper
results.append(char) results.append(char)
return results return results
def smallcaps_elem(self, elem, attr): def smallcaps_elem(self, elem, attr):
texts = self.split_text(getattr(elem, attr)) texts = self.split_text(getattr(elem, attr))
setattr(elem, attr, None) setattr(elem, attr, None)
@ -90,7 +91,7 @@ class CaseMangler(object):
last.tail = tail last.tail = tail
child.tail = None child.tail = None
last = child last = child
def mangle_elem(self, elem, stylizer): def mangle_elem(self, elem, stylizer):
if not isinstance(elem.tag, basestring) or \ if not isinstance(elem.tag, basestring) or \
namespace(elem.tag) != XHTML_NS: namespace(elem.tag) != XHTML_NS:

View File

@ -44,6 +44,7 @@ class SVGRasterizer(object):
def __call__(self, oeb, context): def __call__(self, oeb, context):
oeb.logger.info('Rasterizing SVG images...') oeb.logger.info('Rasterizing SVG images...')
self.oeb = oeb self.oeb = oeb
self.opts = context
self.profile = context.dest self.profile = context.dest
self.images = {} self.images = {}
self.dataize_manifest() self.dataize_manifest()
@ -102,7 +103,7 @@ class SVGRasterizer(object):
def rasterize_spine(self): def rasterize_spine(self):
for item in self.oeb.spine: for item in self.oeb.spine:
html = item.data html = item.data
stylizer = Stylizer(html, item.href, self.oeb, self.profile) stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
self.rasterize_item(item, stylizer) self.rasterize_item(item, stylizer)
def rasterize_item(self, item, stylizer): def rasterize_item(self, item, stylizer):

View File

@ -113,7 +113,8 @@ class PMLMLizer(object):
href = self.oeb_book.guide['titlepage'].href href = self.oeb_book.guide['titlepage'].href
item = self.oeb_book.manifest.hrefs[href] item = self.oeb_book.manifest.hrefs[href]
if item.spine_position is None: if item.spine_position is None:
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book,
self.opts, self.opts.output_profile)
output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item)) output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
return output return output

View File

@ -90,7 +90,8 @@ class RBMLizer(object):
href = self.oeb_book.guide['titlepage'].href href = self.oeb_book.guide['titlepage'].href
item = self.oeb_book.manifest.hrefs[href] item = self.oeb_book.manifest.hrefs[href]
if item.spine_position is None: if item.spine_position is None:
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book,
self.opts, self.opts.output_profile)
output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item)) output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
return output return output
@ -111,7 +112,7 @@ class RBMLizer(object):
output = [u''] output = [u'']
for item in self.oeb_book.spine: for item in self.oeb_book.spine:
self.log.debug('Converting %s to RocketBook HTML...' % item.href) self.log.debug('Converting %s to RocketBook HTML...' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
output.append(self.add_page_anchor(item)) output.append(self.add_page_anchor(item))
output += self.dump_text(item.data.find(XHTML('body')), stylizer, item) output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
return ''.join(output) return ''.join(output)

View File

@ -111,12 +111,13 @@ class RTFMLizer(object):
href = self.oeb_book.guide['titlepage'].href href = self.oeb_book.guide['titlepage'].href
item = self.oeb_book.manifest.hrefs[href] item = self.oeb_book.manifest.hrefs[href]
if item.spine_position is None: if item.spine_position is None:
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book,
self.opts, self.opts.output_profile)
output += self.dump_text(item.data.find(XHTML('body')), stylizer) output += self.dump_text(item.data.find(XHTML('body')), stylizer)
output += '{\\page } ' output += '{\\page } '
for item in self.oeb_book.spine: for item in self.oeb_book.spine:
self.log.debug('Converting %s to RTF markup...' % item.href) self.log.debug('Converting %s to RTF markup...' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
output += self.dump_text(item.data.find(XHTML('body')), stylizer) output += self.dump_text(item.data.find(XHTML('body')), stylizer)
output += self.footer() output += self.footer()
output = self.insert_images(output) output = self.insert_images(output)

View File

@ -54,7 +54,7 @@ class TXTMLizer(object):
output.append(self.get_toc()) output.append(self.get_toc())
for item in self.oeb_book.spine: for item in self.oeb_book.spine:
self.log.debug('Converting %s to TXT...' % item.href) self.log.debug('Converting %s to TXT...' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode)) content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
content = self.remove_newlines(content) content = self.remove_newlines(content)
output += self.dump_text(etree.fromstring(content), stylizer) output += self.dump_text(etree.fromstring(content), stylizer)