mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
Improve handling of justification. Now calibre will explicitly change the justification of all left aligned paragraphs to justified or vice versa depending on the justification setting. This should make it possible to robustly convert all content to either justified or not. calibre will not touch centered or right aligned content.
This commit is contained in:
parent
3c084bb83e
commit
24a6d43b91
@ -132,7 +132,8 @@ class FB2MLizer(object):
|
||||
href = self.oeb_book.guide['titlepage'].href
|
||||
item = self.oeb_book.manifest.hrefs[href]
|
||||
if item.spine_position is None:
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book,
|
||||
self.opts, self.opts.output_profile)
|
||||
output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
|
||||
return output
|
||||
|
||||
@ -152,7 +153,7 @@ class FB2MLizer(object):
|
||||
text = []
|
||||
for item in self.oeb_book.spine:
|
||||
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||
text.append(self.add_page_anchor(item))
|
||||
text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
|
||||
return ''.join(text)
|
||||
|
@ -32,7 +32,7 @@ class LITOutput(OutputFormatPlugin):
|
||||
mangler(oeb, opts)
|
||||
rasterizer = SVGRasterizer()
|
||||
rasterizer(oeb, opts)
|
||||
lit = LitWriter()
|
||||
lit = LitWriter(self.opts)
|
||||
lit(oeb, output_path)
|
||||
|
||||
|
||||
|
@ -134,7 +134,7 @@ def warn(x):
|
||||
class ReBinary(object):
|
||||
NSRMAP = {'': None, XML_NS: 'xml'}
|
||||
|
||||
def __init__(self, root, item, oeb, map=HTML_MAP):
|
||||
def __init__(self, root, item, oeb, opts, map=HTML_MAP):
|
||||
self.item = item
|
||||
self.logger = oeb.logger
|
||||
self.manifest = oeb.manifest
|
||||
@ -143,7 +143,7 @@ class ReBinary(object):
|
||||
self.anchors = []
|
||||
self.page_breaks = []
|
||||
self.is_html = is_html = map is HTML_MAP
|
||||
self.stylizer = Stylizer(root, item.href, oeb) if is_html else None
|
||||
self.stylizer = Stylizer(root, item.href, oeb, opts) if is_html else None
|
||||
self.tree_to_binary(root)
|
||||
self.content = self.buf.getvalue()
|
||||
self.ahc = self.build_ahc() if is_html else None
|
||||
@ -295,9 +295,8 @@ def preserve(function):
|
||||
return wrapper
|
||||
|
||||
class LitWriter(object):
|
||||
def __init__(self):
|
||||
# Wow, no options
|
||||
pass
|
||||
def __init__(self, opts):
|
||||
self.opts = opts
|
||||
|
||||
def _litize_oeb(self):
|
||||
oeb = self._oeb
|
||||
@ -469,7 +468,7 @@ class LitWriter(object):
|
||||
secnum = 0
|
||||
if isinstance(data, etree._Element):
|
||||
self._add_folder(name)
|
||||
rebin = ReBinary(data, item, self._oeb, map=HTML_MAP)
|
||||
rebin = ReBinary(data, item, self._oeb, self.opts, map=HTML_MAP)
|
||||
self._add_file(name + '/ahc', rebin.ahc, 0)
|
||||
self._add_file(name + '/aht', rebin.aht, 0)
|
||||
item.page_breaks = rebin.page_breaks
|
||||
@ -562,7 +561,7 @@ class LitWriter(object):
|
||||
meta.attrib['ms--minimum_level'] = '0'
|
||||
meta.attrib['ms--attr5'] = '1'
|
||||
meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
|
||||
rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP)
|
||||
rebin = ReBinary(meta, None, self._oeb, self.opts, map=OPF_MAP)
|
||||
meta = rebin.content
|
||||
self._meta = meta
|
||||
self._add_file('/meta', meta)
|
||||
|
@ -92,6 +92,7 @@ class MobiMLizer(object):
|
||||
def __call__(self, oeb, context):
|
||||
oeb.logger.info('Converting XHTML to Mobipocket markup...')
|
||||
self.oeb = oeb
|
||||
self.opts = context
|
||||
self.profile = profile = context.dest
|
||||
self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
|
||||
self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys())
|
||||
@ -114,7 +115,7 @@ class MobiMLizer(object):
|
||||
def mobimlize_spine(self):
|
||||
'Iterate over the spine and convert it to MOBIML'
|
||||
for item in self.oeb.spine:
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb, self.profile)
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.profile)
|
||||
body = item.data.find(XHTML('body'))
|
||||
nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
|
||||
nbody = etree.SubElement(nroot, XHTML('body'))
|
||||
|
@ -1,99 +0,0 @@
|
||||
'''
|
||||
Registry associating file extensions with Reader classes.
|
||||
'''
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import sys, os, logging
|
||||
from itertools import chain
|
||||
import calibre
|
||||
from calibre.ebooks.oeb.base import OEBError
|
||||
from calibre.ebooks.oeb.reader import OEBReader
|
||||
from calibre.ebooks.oeb.writer import OEBWriter
|
||||
from calibre.ebooks.lit.reader import LitReader
|
||||
from calibre.ebooks.lit.writer import LitWriter
|
||||
from calibre.ebooks.mobi.reader import MobiReader
|
||||
from calibre.ebooks.mobi.writer import MobiWriter
|
||||
from calibre.ebooks.oeb.base import OEBBook
|
||||
from calibre.ebooks.oeb.profile import Context
|
||||
from calibre.utils.config import Config
|
||||
|
||||
__all__ = ['get_reader']
|
||||
|
||||
REGISTRY = {
|
||||
'.opf': (OEBReader, None),
|
||||
'.lit': (LitReader, LitWriter),
|
||||
'.mobi': (MobiReader, MobiWriter),
|
||||
}
|
||||
|
||||
def ReaderFactory(path):
|
||||
if os.path.isdir(path):
|
||||
return OEBReader
|
||||
ext = os.path.splitext(path)[1].lower()
|
||||
Reader = REGISTRY.get(ext, (None, None))[0]
|
||||
if Reader is None:
|
||||
raise OEBError('Unknown e-book file extension %r' % ext)
|
||||
return Reader
|
||||
|
||||
def WriterFactory(path):
|
||||
if os.path.isdir(path):
|
||||
return OEBWriter
|
||||
ext = os.path.splitext(path)[1].lower()
|
||||
if not os.path.exists(path) and not ext:
|
||||
return OEBWriter
|
||||
Writer = REGISTRY.get(ext, (None, None))[1]
|
||||
if Writer is None:
|
||||
raise OEBError('Unknown e-book file extension %r' % ext)
|
||||
return Writer
|
||||
|
||||
|
||||
def option_parser(Reader, Writer):
|
||||
cfg = Config('ebook-convert', _('Options to control e-book conversion.'))
|
||||
Reader.config(cfg)
|
||||
for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
|
||||
Transform.config(cfg)
|
||||
Writer.config(cfg)
|
||||
parser = cfg.option_parser()
|
||||
parser.add_option('--encoding', default=None,
|
||||
help=_('Character encoding for input. Default is to auto detect.'))
|
||||
parser.add_option('-o', '--output', default=None,
|
||||
help=_('Output file. Default is derived from input filename.'))
|
||||
parser.add_option('-p', '--pretty-print', action='store_true',
|
||||
default=False, help=_('Produce more human-readable XML output.'))
|
||||
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||
help=_('Useful for debugging.'))
|
||||
return parser
|
||||
|
||||
def main(argv=sys.argv):
|
||||
if len(argv) < 3:
|
||||
print _("Usage: ebook-convert INFILE OUTFILE [OPTIONS..]")
|
||||
return 1
|
||||
inpath, outpath = argv[1], argv[2]
|
||||
Reader = ReaderFactory(inpath)
|
||||
Writer = WriterFactory(outpath)
|
||||
parser = option_parser(Reader, Writer)
|
||||
opts, args = parser.parse_args(argv[3:])
|
||||
if len(args) != 0:
|
||||
parser.print_help()
|
||||
return 1
|
||||
logger = logging.getLogger('ebook-convert')
|
||||
calibre.setup_cli_handlers(logger, logging.DEBUG)
|
||||
encoding = opts.encoding
|
||||
pretty_print = opts.pretty_print
|
||||
oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger)
|
||||
context = Context(Reader.DEFAULT_PROFILE, Writer.DEFAULT_PROFILE)
|
||||
reader = Reader.generate(opts)
|
||||
writer = Writer.generate(opts)
|
||||
transforms = []
|
||||
for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
|
||||
transforms.append(Transform.generate(opts))
|
||||
reader(oeb, inpath)
|
||||
for transform in transforms:
|
||||
transform(oeb, context)
|
||||
writer(oeb, outpath)
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
@ -110,9 +110,9 @@ class CSSSelector(etree.XPath):
|
||||
class Stylizer(object):
|
||||
STYLESHEETS = WeakKeyDictionary()
|
||||
|
||||
def __init__(self, tree, path, oeb, profile=PROFILES['PRS505'],
|
||||
def __init__(self, tree, path, oeb, opts, profile=PROFILES['PRS505'],
|
||||
extra_css='', user_css=''):
|
||||
self.oeb = oeb
|
||||
self.oeb, self.opts = oeb, opts
|
||||
self.profile = profile
|
||||
self.logger = oeb.logger
|
||||
item = oeb.manifest.hrefs[path]
|
||||
@ -249,6 +249,8 @@ class Stylizer(object):
|
||||
style.update(self._normalize_font(prop.cssValue))
|
||||
elif name == 'list-style':
|
||||
style.update(self._normalize_list_style(prop.cssValue))
|
||||
elif name == 'text-align':
|
||||
style.update(self._normalize_text_align(prop.cssValue))
|
||||
else:
|
||||
style[name] = prop.value
|
||||
if 'font-size' in style:
|
||||
@ -306,6 +308,19 @@ class Stylizer(object):
|
||||
|
||||
return style
|
||||
|
||||
def _normalize_text_align(self, cssvalue):
|
||||
style = {}
|
||||
text = cssvalue.cssText
|
||||
if text == 'inherit':
|
||||
style['text-align'] = 'inherit'
|
||||
else:
|
||||
if text in ('left', 'justify'):
|
||||
val = 'left' if self.opts.dont_justify else 'justify'
|
||||
style['text-align'] = val
|
||||
else:
|
||||
style['text-align'] = text
|
||||
return style
|
||||
|
||||
def _normalize_font(self, cssvalue):
|
||||
composition = ('font-style', 'font-variant', 'font-weight',
|
||||
'font-size', 'line-height', 'font-family')
|
||||
|
@ -141,7 +141,7 @@ class CSSFlattener(object):
|
||||
bs.append('text-align: '+ \
|
||||
('left' if self.context.dont_justify else 'justify'))
|
||||
body.set('style', '; '.join(bs))
|
||||
stylizer = Stylizer(html, item.href, self.oeb, profile,
|
||||
stylizer = Stylizer(html, item.href, self.oeb, self.context, profile,
|
||||
user_css=self.context.extra_css,
|
||||
extra_css=css)
|
||||
self.stylizers[item] = stylizer
|
||||
|
@ -29,13 +29,14 @@ class CaseMangler(object):
|
||||
@classmethod
|
||||
def generate(cls, opts):
|
||||
return cls()
|
||||
|
||||
|
||||
def __call__(self, oeb, context):
|
||||
oeb.logger.info('Applying case-transforming CSS...')
|
||||
self.oeb = oeb
|
||||
self.opts = context
|
||||
self.profile = context.source
|
||||
self.mangle_spine()
|
||||
|
||||
|
||||
def mangle_spine(self):
|
||||
id, href = self.oeb.manifest.generate('manglecase', 'manglecase.css')
|
||||
self.oeb.manifest.add(id, href, CSS_MIME, data=CASE_MANGLER_CSS)
|
||||
@ -44,9 +45,9 @@ class CaseMangler(object):
|
||||
relhref = item.relhref(href)
|
||||
etree.SubElement(html.find(XHTML('head')), XHTML('link'),
|
||||
rel='stylesheet', href=relhref, type=CSS_MIME)
|
||||
stylizer = Stylizer(html, item.href, self.oeb, self.profile)
|
||||
stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
|
||||
self.mangle_elem(html.find(XHTML('body')), stylizer)
|
||||
|
||||
|
||||
def text_transform(self, transform, text):
|
||||
if transform == 'capitalize':
|
||||
return text.title()
|
||||
@ -55,7 +56,7 @@ class CaseMangler(object):
|
||||
elif transform == 'lowercase':
|
||||
return text.lower()
|
||||
return text
|
||||
|
||||
|
||||
def split_text(self, text):
|
||||
results = ['']
|
||||
isupper = text[0].isupper()
|
||||
@ -66,7 +67,7 @@ class CaseMangler(object):
|
||||
isupper = not isupper
|
||||
results.append(char)
|
||||
return results
|
||||
|
||||
|
||||
def smallcaps_elem(self, elem, attr):
|
||||
texts = self.split_text(getattr(elem, attr))
|
||||
setattr(elem, attr, None)
|
||||
@ -90,7 +91,7 @@ class CaseMangler(object):
|
||||
last.tail = tail
|
||||
child.tail = None
|
||||
last = child
|
||||
|
||||
|
||||
def mangle_elem(self, elem, stylizer):
|
||||
if not isinstance(elem.tag, basestring) or \
|
||||
namespace(elem.tag) != XHTML_NS:
|
||||
|
@ -44,6 +44,7 @@ class SVGRasterizer(object):
|
||||
def __call__(self, oeb, context):
|
||||
oeb.logger.info('Rasterizing SVG images...')
|
||||
self.oeb = oeb
|
||||
self.opts = context
|
||||
self.profile = context.dest
|
||||
self.images = {}
|
||||
self.dataize_manifest()
|
||||
@ -102,7 +103,7 @@ class SVGRasterizer(object):
|
||||
def rasterize_spine(self):
|
||||
for item in self.oeb.spine:
|
||||
html = item.data
|
||||
stylizer = Stylizer(html, item.href, self.oeb, self.profile)
|
||||
stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
|
||||
self.rasterize_item(item, stylizer)
|
||||
|
||||
def rasterize_item(self, item, stylizer):
|
||||
|
@ -113,7 +113,8 @@ class PMLMLizer(object):
|
||||
href = self.oeb_book.guide['titlepage'].href
|
||||
item = self.oeb_book.manifest.hrefs[href]
|
||||
if item.spine_position is None:
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book,
|
||||
self.opts, self.opts.output_profile)
|
||||
output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
|
||||
return output
|
||||
|
||||
|
@ -90,7 +90,8 @@ class RBMLizer(object):
|
||||
href = self.oeb_book.guide['titlepage'].href
|
||||
item = self.oeb_book.manifest.hrefs[href]
|
||||
if item.spine_position is None:
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book,
|
||||
self.opts, self.opts.output_profile)
|
||||
output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
|
||||
return output
|
||||
|
||||
@ -111,7 +112,7 @@ class RBMLizer(object):
|
||||
output = [u'']
|
||||
for item in self.oeb_book.spine:
|
||||
self.log.debug('Converting %s to RocketBook HTML...' % item.href)
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||
output.append(self.add_page_anchor(item))
|
||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
|
||||
return ''.join(output)
|
||||
|
@ -111,12 +111,13 @@ class RTFMLizer(object):
|
||||
href = self.oeb_book.guide['titlepage'].href
|
||||
item = self.oeb_book.manifest.hrefs[href]
|
||||
if item.spine_position is None:
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book,
|
||||
self.opts, self.opts.output_profile)
|
||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
||||
output += '{\\page } '
|
||||
for item in self.oeb_book.spine:
|
||||
self.log.debug('Converting %s to RTF markup...' % item.href)
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||
output += self.dump_text(item.data.find(XHTML('body')), stylizer)
|
||||
output += self.footer()
|
||||
output = self.insert_images(output)
|
||||
|
@ -54,7 +54,7 @@ class TXTMLizer(object):
|
||||
output.append(self.get_toc())
|
||||
for item in self.oeb_book.spine:
|
||||
self.log.debug('Converting %s to TXT...' % item.href)
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||
content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
|
||||
content = self.remove_newlines(content)
|
||||
output += self.dump_text(etree.fromstring(content), stylizer)
|
||||
|
Loading…
x
Reference in New Issue
Block a user