From 24a6d43b9115a4f6d4bb451c31158b0bf4618186 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 21 Jan 2010 14:50:39 -0700 Subject: [PATCH] Improve handling of justification. Now calibre will explicitly change the justification of all left aligned paragraphs to justified or vice versa depending on the justification setting. This should make it possible to robustly convert all content to either justified or not. calibre will not touch centered or right aligned content. --- src/calibre/ebooks/fb2/fb2ml.py | 5 +- src/calibre/ebooks/lit/output.py | 2 +- src/calibre/ebooks/lit/writer.py | 13 ++- src/calibre/ebooks/mobi/mobiml.py | 3 +- src/calibre/ebooks/oeb/factory.py | 99 ------------------- src/calibre/ebooks/oeb/stylizer.py | 19 +++- src/calibre/ebooks/oeb/transforms/flatcss.py | 2 +- .../ebooks/oeb/transforms/manglecase.py | 15 +-- .../ebooks/oeb/transforms/rasterize.py | 3 +- src/calibre/ebooks/pml/pmlml.py | 3 +- src/calibre/ebooks/rb/rbml.py | 5 +- src/calibre/ebooks/rtf/rtfml.py | 5 +- src/calibre/ebooks/txt/txtml.py | 2 +- 13 files changed, 49 insertions(+), 127 deletions(-) delete mode 100644 src/calibre/ebooks/oeb/factory.py diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 42feeb2330..c8428cf136 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -132,7 +132,8 @@ class FB2MLizer(object): href = self.oeb_book.guide['titlepage'].href item = self.oeb_book.manifest.hrefs[href] if item.spine_position is None: - stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) + stylizer = Stylizer(item.data, item.href, self.oeb_book, + self.opts, self.opts.output_profile) output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item)) return output @@ -152,7 +153,7 @@ class FB2MLizer(object): text = [] for item in self.oeb_book.spine: self.log.debug('Converting %s to FictionBook2 XML' % item.href) - stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) + stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) text.append(self.add_page_anchor(item)) text += self.dump_text(item.data.find(XHTML('body')), stylizer, item) return ''.join(text) diff --git a/src/calibre/ebooks/lit/output.py b/src/calibre/ebooks/lit/output.py index 2a08ff51a8..423fb9ce7c 100644 --- a/src/calibre/ebooks/lit/output.py +++ b/src/calibre/ebooks/lit/output.py @@ -32,7 +32,7 @@ class LITOutput(OutputFormatPlugin): mangler(oeb, opts) rasterizer = SVGRasterizer() rasterizer(oeb, opts) - lit = LitWriter() + lit = LitWriter(self.opts) lit(oeb, output_path) diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py index 6dd5068032..cf9ea6aa77 100644 --- a/src/calibre/ebooks/lit/writer.py +++ b/src/calibre/ebooks/lit/writer.py @@ -134,7 +134,7 @@ def warn(x): class ReBinary(object): NSRMAP = {'': None, XML_NS: 'xml'} - def __init__(self, root, item, oeb, map=HTML_MAP): + def __init__(self, root, item, oeb, opts, map=HTML_MAP): self.item = item self.logger = oeb.logger self.manifest = oeb.manifest @@ -143,7 +143,7 @@ class ReBinary(object): self.anchors = [] self.page_breaks = [] self.is_html = is_html = map is HTML_MAP - self.stylizer = Stylizer(root, item.href, oeb) if is_html else None + self.stylizer = Stylizer(root, item.href, oeb, opts) if is_html else None self.tree_to_binary(root) self.content = self.buf.getvalue() self.ahc = self.build_ahc() if is_html else None @@ -295,9 +295,8 @@ def preserve(function): return wrapper class LitWriter(object): - def __init__(self): - # Wow, no options - pass + def __init__(self, opts): + self.opts = opts def _litize_oeb(self): oeb = self._oeb @@ -469,7 +468,7 @@ class LitWriter(object): secnum = 0 if isinstance(data, etree._Element): self._add_folder(name) - rebin = ReBinary(data, item, self._oeb, map=HTML_MAP) + rebin = ReBinary(data, item, self._oeb, self.opts, map=HTML_MAP) self._add_file(name + '/ahc', rebin.ahc, 0) self._add_file(name + '/aht', rebin.aht, 0) item.page_breaks = rebin.page_breaks @@ -562,7 +561,7 @@ class LitWriter(object): meta.attrib['ms--minimum_level'] = '0' meta.attrib['ms--attr5'] = '1' meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper() - rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP) + rebin = ReBinary(meta, None, self._oeb, self.opts, map=OPF_MAP) meta = rebin.content self._meta = meta self._add_file('/meta', meta) diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py index aa69ba446b..f958b63a12 100644 --- a/src/calibre/ebooks/mobi/mobiml.py +++ b/src/calibre/ebooks/mobi/mobiml.py @@ -92,6 +92,7 @@ class MobiMLizer(object): def __call__(self, oeb, context): oeb.logger.info('Converting XHTML to Mobipocket markup...') self.oeb = oeb + self.opts = context self.profile = profile = context.dest self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items()) self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys()) @@ -114,7 +115,7 @@ class MobiMLizer(object): def mobimlize_spine(self): 'Iterate over the spine and convert it to MOBIML' for item in self.oeb.spine: - stylizer = Stylizer(item.data, item.href, self.oeb, self.profile) + stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.profile) body = item.data.find(XHTML('body')) nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP) nbody = etree.SubElement(nroot, XHTML('body')) diff --git a/src/calibre/ebooks/oeb/factory.py b/src/calibre/ebooks/oeb/factory.py deleted file mode 100644 index 8add71d20d..0000000000 --- a/src/calibre/ebooks/oeb/factory.py +++ /dev/null @@ -1,99 +0,0 @@ -''' -Registry associating file extensions with Reader classes. -''' -from __future__ import with_statement - -__license__ = 'GPL v3' -__copyright__ = '2008, Marshall T. Vandegrift ' - -import sys, os, logging -from itertools import chain -import calibre -from calibre.ebooks.oeb.base import OEBError -from calibre.ebooks.oeb.reader import OEBReader -from calibre.ebooks.oeb.writer import OEBWriter -from calibre.ebooks.lit.reader import LitReader -from calibre.ebooks.lit.writer import LitWriter -from calibre.ebooks.mobi.reader import MobiReader -from calibre.ebooks.mobi.writer import MobiWriter -from calibre.ebooks.oeb.base import OEBBook -from calibre.ebooks.oeb.profile import Context -from calibre.utils.config import Config - -__all__ = ['get_reader'] - -REGISTRY = { - '.opf': (OEBReader, None), - '.lit': (LitReader, LitWriter), - '.mobi': (MobiReader, MobiWriter), - } - -def ReaderFactory(path): - if os.path.isdir(path): - return OEBReader - ext = os.path.splitext(path)[1].lower() - Reader = REGISTRY.get(ext, (None, None))[0] - if Reader is None: - raise OEBError('Unknown e-book file extension %r' % ext) - return Reader - -def WriterFactory(path): - if os.path.isdir(path): - return OEBWriter - ext = os.path.splitext(path)[1].lower() - if not os.path.exists(path) and not ext: - return OEBWriter - Writer = REGISTRY.get(ext, (None, None))[1] - if Writer is None: - raise OEBError('Unknown e-book file extension %r' % ext) - return Writer - - -def option_parser(Reader, Writer): - cfg = Config('ebook-convert', _('Options to control e-book conversion.')) - Reader.config(cfg) - for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS): - Transform.config(cfg) - Writer.config(cfg) - parser = cfg.option_parser() - parser.add_option('--encoding', default=None, - help=_('Character encoding for input. Default is to auto detect.')) - parser.add_option('-o', '--output', default=None, - help=_('Output file. Default is derived from input filename.')) - parser.add_option('-p', '--pretty-print', action='store_true', - default=False, help=_('Produce more human-readable XML output.')) - parser.add_option('-v', '--verbose', default=0, action='count', - help=_('Useful for debugging.')) - return parser - -def main(argv=sys.argv): - if len(argv) < 3: - print _("Usage: ebook-convert INFILE OUTFILE [OPTIONS..]") - return 1 - inpath, outpath = argv[1], argv[2] - Reader = ReaderFactory(inpath) - Writer = WriterFactory(outpath) - parser = option_parser(Reader, Writer) - opts, args = parser.parse_args(argv[3:]) - if len(args) != 0: - parser.print_help() - return 1 - logger = logging.getLogger('ebook-convert') - calibre.setup_cli_handlers(logger, logging.DEBUG) - encoding = opts.encoding - pretty_print = opts.pretty_print - oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger) - context = Context(Reader.DEFAULT_PROFILE, Writer.DEFAULT_PROFILE) - reader = Reader.generate(opts) - writer = Writer.generate(opts) - transforms = [] - for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS): - transforms.append(Transform.generate(opts)) - reader(oeb, inpath) - for transform in transforms: - transform(oeb, context) - writer(oeb, outpath) - return 0 - -if __name__ == '__main__': - sys.exit(main()) diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py index d0e394b9e5..26fb4ca980 100644 --- a/src/calibre/ebooks/oeb/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -110,9 +110,9 @@ class CSSSelector(etree.XPath): class Stylizer(object): STYLESHEETS = WeakKeyDictionary() - def __init__(self, tree, path, oeb, profile=PROFILES['PRS505'], + def __init__(self, tree, path, oeb, opts, profile=PROFILES['PRS505'], extra_css='', user_css=''): - self.oeb = oeb + self.oeb, self.opts = oeb, opts self.profile = profile self.logger = oeb.logger item = oeb.manifest.hrefs[path] @@ -249,6 +249,8 @@ class Stylizer(object): style.update(self._normalize_font(prop.cssValue)) elif name == 'list-style': style.update(self._normalize_list_style(prop.cssValue)) + elif name == 'text-align': + style.update(self._normalize_text_align(prop.cssValue)) else: style[name] = prop.value if 'font-size' in style: @@ -306,6 +308,19 @@ class Stylizer(object): return style + def _normalize_text_align(self, cssvalue): + style = {} + text = cssvalue.cssText + if text == 'inherit': + style['text-align'] = 'inherit' + else: + if text in ('left', 'justify'): + val = 'left' if self.opts.dont_justify else 'justify' + style['text-align'] = val + else: + style['text-align'] = text + return style + def _normalize_font(self, cssvalue): composition = ('font-style', 'font-variant', 'font-weight', 'font-size', 'line-height', 'font-family') diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index 61226ca4f4..1eb6afc1b5 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -141,7 +141,7 @@ class CSSFlattener(object): bs.append('text-align: '+ \ ('left' if self.context.dont_justify else 'justify')) body.set('style', '; '.join(bs)) - stylizer = Stylizer(html, item.href, self.oeb, profile, + stylizer = Stylizer(html, item.href, self.oeb, self.context, profile, user_css=self.context.extra_css, extra_css=css) self.stylizers[item] = stylizer diff --git a/src/calibre/ebooks/oeb/transforms/manglecase.py b/src/calibre/ebooks/oeb/transforms/manglecase.py index 4b852db6c4..04bf63ac1d 100644 --- a/src/calibre/ebooks/oeb/transforms/manglecase.py +++ b/src/calibre/ebooks/oeb/transforms/manglecase.py @@ -29,13 +29,14 @@ class CaseMangler(object): @classmethod def generate(cls, opts): return cls() - + def __call__(self, oeb, context): oeb.logger.info('Applying case-transforming CSS...') self.oeb = oeb + self.opts = context self.profile = context.source self.mangle_spine() - + def mangle_spine(self): id, href = self.oeb.manifest.generate('manglecase', 'manglecase.css') self.oeb.manifest.add(id, href, CSS_MIME, data=CASE_MANGLER_CSS) @@ -44,9 +45,9 @@ class CaseMangler(object): relhref = item.relhref(href) etree.SubElement(html.find(XHTML('head')), XHTML('link'), rel='stylesheet', href=relhref, type=CSS_MIME) - stylizer = Stylizer(html, item.href, self.oeb, self.profile) + stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile) self.mangle_elem(html.find(XHTML('body')), stylizer) - + def text_transform(self, transform, text): if transform == 'capitalize': return text.title() @@ -55,7 +56,7 @@ class CaseMangler(object): elif transform == 'lowercase': return text.lower() return text - + def split_text(self, text): results = [''] isupper = text[0].isupper() @@ -66,7 +67,7 @@ class CaseMangler(object): isupper = not isupper results.append(char) return results - + def smallcaps_elem(self, elem, attr): texts = self.split_text(getattr(elem, attr)) setattr(elem, attr, None) @@ -90,7 +91,7 @@ class CaseMangler(object): last.tail = tail child.tail = None last = child - + def mangle_elem(self, elem, stylizer): if not isinstance(elem.tag, basestring) or \ namespace(elem.tag) != XHTML_NS: diff --git a/src/calibre/ebooks/oeb/transforms/rasterize.py b/src/calibre/ebooks/oeb/transforms/rasterize.py index 30357b10d2..ac28e51b15 100644 --- a/src/calibre/ebooks/oeb/transforms/rasterize.py +++ b/src/calibre/ebooks/oeb/transforms/rasterize.py @@ -44,6 +44,7 @@ class SVGRasterizer(object): def __call__(self, oeb, context): oeb.logger.info('Rasterizing SVG images...') self.oeb = oeb + self.opts = context self.profile = context.dest self.images = {} self.dataize_manifest() @@ -102,7 +103,7 @@ class SVGRasterizer(object): def rasterize_spine(self): for item in self.oeb.spine: html = item.data - stylizer = Stylizer(html, item.href, self.oeb, self.profile) + stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile) self.rasterize_item(item, stylizer) def rasterize_item(self, item, stylizer): diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py index 4f3d5f23df..e3609fcddb 100644 --- a/src/calibre/ebooks/pml/pmlml.py +++ b/src/calibre/ebooks/pml/pmlml.py @@ -113,7 +113,8 @@ class PMLMLizer(object): href = self.oeb_book.guide['titlepage'].href item = self.oeb_book.manifest.hrefs[href] if item.spine_position is None: - stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) + stylizer = Stylizer(item.data, item.href, self.oeb_book, + self.opts, self.opts.output_profile) output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item)) return output diff --git a/src/calibre/ebooks/rb/rbml.py b/src/calibre/ebooks/rb/rbml.py index 5574aa94b6..50153d7d4d 100644 --- a/src/calibre/ebooks/rb/rbml.py +++ b/src/calibre/ebooks/rb/rbml.py @@ -90,7 +90,8 @@ class RBMLizer(object): href = self.oeb_book.guide['titlepage'].href item = self.oeb_book.manifest.hrefs[href] if item.spine_position is None: - stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) + stylizer = Stylizer(item.data, item.href, self.oeb_book, + self.opts, self.opts.output_profile) output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item)) return output @@ -111,7 +112,7 @@ class RBMLizer(object): output = [u''] for item in self.oeb_book.spine: self.log.debug('Converting %s to RocketBook HTML...' % item.href) - stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) + stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) output.append(self.add_page_anchor(item)) output += self.dump_text(item.data.find(XHTML('body')), stylizer, item) return ''.join(output) diff --git a/src/calibre/ebooks/rtf/rtfml.py b/src/calibre/ebooks/rtf/rtfml.py index 6aa48ad61b..1217482823 100644 --- a/src/calibre/ebooks/rtf/rtfml.py +++ b/src/calibre/ebooks/rtf/rtfml.py @@ -111,12 +111,13 @@ class RTFMLizer(object): href = self.oeb_book.guide['titlepage'].href item = self.oeb_book.manifest.hrefs[href] if item.spine_position is None: - stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) + stylizer = Stylizer(item.data, item.href, self.oeb_book, + self.opts, self.opts.output_profile) output += self.dump_text(item.data.find(XHTML('body')), stylizer) output += '{\\page } ' for item in self.oeb_book.spine: self.log.debug('Converting %s to RTF markup...' % item.href) - stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) + stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) output += self.dump_text(item.data.find(XHTML('body')), stylizer) output += self.footer() output = self.insert_images(output) diff --git a/src/calibre/ebooks/txt/txtml.py b/src/calibre/ebooks/txt/txtml.py index 7642e051fe..bb730c0720 100644 --- a/src/calibre/ebooks/txt/txtml.py +++ b/src/calibre/ebooks/txt/txtml.py @@ -54,7 +54,7 @@ class TXTMLizer(object): output.append(self.get_toc()) for item in self.oeb_book.spine: self.log.debug('Converting %s to TXT...' % item.href) - stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) + stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode)) content = self.remove_newlines(content) output += self.dump_text(etree.fromstring(content), stylizer)