From 5dca63111427af5a8caddbff0d96a63b1bc9f5fe Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Wed, 11 Feb 2009 10:00:54 -0500 Subject: [PATCH] Demonstrable modularization of e-book conversion. --- src/calibre/ebooks/lit/reader.py | 1 + src/calibre/ebooks/mobi/mobiml.py | 10 ++- src/calibre/ebooks/mobi/writer.py | 40 +++++++-- src/calibre/ebooks/oeb/base.py | 11 +-- src/calibre/ebooks/oeb/factory.py | 87 +++++++++++++++++-- src/calibre/ebooks/oeb/reader.py | 24 +++++ src/calibre/ebooks/oeb/transforms/flatcss.py | 10 ++- src/calibre/ebooks/oeb/transforms/htmltoc.py | 13 ++- .../ebooks/oeb/transforms/manglecase.py | 10 ++- .../ebooks/oeb/transforms/rasterize.py | 10 ++- .../ebooks/oeb/transforms/trimmanifest.py | 10 ++- src/calibre/ebooks/oeb/writer.py | 57 +++--------- 12 files changed, 210 insertions(+), 73 deletions(-) diff --git a/src/calibre/ebooks/lit/reader.py b/src/calibre/ebooks/lit/reader.py index dd42434101..8cbb9514a8 100644 --- a/src/calibre/ebooks/lit/reader.py +++ b/src/calibre/ebooks/lit/reader.py @@ -802,6 +802,7 @@ class LitFile(object): class LitContainer(object): + """Simple Container-interface, read-only accessor for LIT files.""" def __init__(self, filename_or_stream): self._litfile = LitFile(filename_or_stream) diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py index 7ecd127452..b7418a5d19 100644 --- a/src/calibre/ebooks/mobi/mobiml.py +++ b/src/calibre/ebooks/mobi/mobiml.py @@ -82,7 +82,15 @@ class MobiMLizer(object): def __init__(self, ignore_tables=False): self.ignore_tables = ignore_tables - def transform(self, oeb, context): + @classmethod + def config(cls, cfg): + return cfg + + @classmethod + def generate(cls, opts): + return cls() + + def __call__(self, oeb, context): oeb.logger.info('Converting XHTML to Mobipocket markup...') self.oeb = oeb self.profile = profile = context.dest diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 380bdbf518..1b5d3ae652 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -295,6 +295,11 @@ class Serializer(object): class MobiWriter(object): COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+') + + DEFAULT_PROFILE = 'CybookG3' + + TRANSFORMS = [HTMLTOCAdder, CaseMangler, CSSFlattener, SVGRasterizer, + ManifestTrimmer, MobiMLizer] def __init__(self, compression=None, imagemax=None, prefer_author_sort=False): @@ -302,7 +307,32 @@ class MobiWriter(object): self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE self._prefer_author_sort = prefer_author_sort - def dump(self, oeb, path): + @classmethod + def config(cls, cfg): + """Add any book-writing options to the :class:`Config` object + :param:`cfg`. + """ + mobi = cfg.add_group('mobipocket', _('Mobipocket-specific options.')) + mobi('compress', ['--compress'], default=False, + help=_('Compress file text using PalmDOC compression. ' + 'Results in smaller files, but takes a long time to run.')) + mobi('rescale_images', ['--rescale-images'], default=False, + help=_('Modify images to meet Palm device size limitations.')) + mobi('prefer_author_sort', ['--prefer-author-sort'], default=False, + help=_('When present, use the author sorting information for ' + 'generating the Mobipocket author metadata.')) + return cfg + + @classmethod + def generate(cls, opts): + """Generate a Writer instance from command-line options.""" + compression = PALMDOC if opts.compress else UNCOMPRESSED + imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None + prefer_author_sort = opts.prefer_author_sort + return cls(compression=compression, imagemax=imagemax, + prefer_author_sort=prefer_author_sort) + + def __call__(self, oeb, path): if hasattr(path, 'write'): return self._dump_stream(oeb, path) with open(path, 'w+b') as stream: @@ -533,20 +563,12 @@ def config(defaults=None): c = StringConfig(defaults, desc) mobi = c.add_group('mobipocket', _('Mobipocket-specific options.')) - mobi('compress', ['--compress'], default=False, - help=_('Compress file text using PalmDOC compression. ' - 'Results in smaller files, but takes a long time to run.')) - mobi('rescale_images', ['--rescale-images'], default=False, - help=_('Modify images to meet Palm device size limitations.')) mobi('toc_title', ['--toc-title'], default=None, help=_('Title for any generated in-line table of contents.')) mobi('ignore_tables', ['--ignore-tables'], default=False, help=_('Render HTML tables as blocks of text instead of actual ' 'tables. This is neccessary if the HTML contains very large ' 'or complex tables.')) - mobi('prefer_author_sort', ['--prefer-author-sort'], default=False, - help=_('When present, use the author sorting information for ' - 'generating the Mobipocket author metadata.')) profiles = c.add_group('profiles', _('Device renderer profiles. ' 'Affects conversion of font sizes, image rescaling and rasterization ' 'of tables. Valid profiles are: %s.') % ', '.join(_profiles)) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index ce16fa76e5..c9d01b03fe 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -820,8 +820,10 @@ class Manifest(object): def __iter__(self): for item in self.items: yield item - values = __iter__ + def values(self): + return list(self.items) + def __contains__(self, item): return item in self.items @@ -1134,7 +1136,7 @@ class TOC(object): node.to_opf1(tour) return tour - def to_ncx(self, parent, depth=1): + def to_ncx(self, parent): for node in self.nodes: id = node.id or unicode(uuid.uuid4()) attrib = {'id': id, 'playOrder': '0'} @@ -1143,9 +1145,8 @@ class TOC(object): point = element(parent, NCX('navPoint'), attrib=attrib) label = etree.SubElement(point, NCX('navLabel')) element(label, NCX('text')).text = node.title - href = node.href if depth > 1 else urldefrag(node.href)[0] - element(point, NCX('content'), src=href) - node.to_ncx(point, depth+1) + element(point, NCX('content'), src=node.href) + node.to_ncx(point) return parent diff --git a/src/calibre/ebooks/oeb/factory.py b/src/calibre/ebooks/oeb/factory.py index 1ce33a4f00..684451044b 100644 --- a/src/calibre/ebooks/oeb/factory.py +++ b/src/calibre/ebooks/oeb/factory.py @@ -6,20 +6,93 @@ from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift ' -import os +import sys, os, logging +from itertools import chain from calibre.ebooks.oeb.base import OEBError from calibre.ebooks.oeb.reader import OEBReader +from calibre.ebooks.oeb.writer import OEBWriter from calibre.ebooks.lit.reader import LitReader +from calibre.ebooks.lit.writer import LitWriter +from calibre.ebooks.mobi.reader import MobiReader +from calibre.ebooks.mobi.writer import MobiWriter +from calibre.ebooks.oeb.base import Logger, OEBBook +from calibre.ebooks.oeb.profile import Context +from calibre.utils.config import Config __all__ = ['get_reader'] -READER_REGISTRY = { - '.opf': OEBReader, - '.lit': LitReader, +REGISTRY = { + '.opf': (OEBReader, None), + '.lit': (LitReader, LitWriter), + '.mobi': (MobiReader, MobiWriter), } def ReaderFactory(path): - ext = os.path.splitext(path)[1].lower() - if not ext: + if os.path.isdir(path): return OEBReader - return READER_REGISTRY[ext]() + ext = os.path.splitext(path)[1].lower() + Reader = REGISTRY.get(ext, (None, None))[0] + if Reader is None: + raise OEBError('Unknown e-book file extension %r' % ext) + return Reader + +def WriterFactory(path): + if os.path.isdir(path): + return OEBWriter + ext = os.path.splitext(path)[1].lower() + if not os.path.exists(path) and not ext: + return OEBWriter + Writer = REGISTRY.get(ext, (None, None))[1] + if Writer is None: + raise OEBError('Unknown e-book file extension %r' % ext) + return Writer + + +def option_parser(Reader, Writer): + cfg = Config('ebook-convert', _('Options to control e-book conversion.')) + Reader.config(cfg) + for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS): + Transform.config(cfg) + Writer.config(cfg) + parser = cfg.option_parser() + parser.add_option('--encoding', default=None, + help=_('Character encoding for input. Default is to auto detect.')) + parser.add_option('-o', '--output', default=None, + help=_('Output file. Default is derived from input filename.')) + parser.add_option('-p', '--pretty-print', action='store_true', + default=False, help=_('Produce more human-readable XML output.')) + parser.add_option('-v', '--verbose', default=0, action='count', + help=_('Useful for debugging.')) + return parser + +def main(argv=sys.argv): + if len(argv) < 3: + print _("Usage: ebook-convert INFILE OUTFILE [OPTIONS..]") + return 1 + inpath, outpath = argv[1], argv[2] + Reader = ReaderFactory(inpath) + Writer = WriterFactory(outpath) + parser = option_parser(Reader, Writer) + opts, args = parser.parse_args(argv[3:]) + if len(args) != 0: + parser.print_help() + return 1 + logger = Logger(logging.getLogger('ebook-convert')) + logger.setup_cli_handler(opts.verbose) + encoding = opts.encoding + pretty_print = opts.pretty_print + oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger) + context = Context(Reader.DEFAULT_PROFILE, Writer.DEFAULT_PROFILE) + reader = Reader.generate(opts) + writer = Writer.generate(opts) + transforms = [] + for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS): + transforms.append(Transform.generate(opts)) + reader(oeb, inpath) + for transform in transforms: + transform(oeb, context) + writer(oeb, outpath) + return 0 + +if __name__ == '__main__': + sys.exit(main()) diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index aa23ce1e96..0fce1c2b0d 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -31,15 +31,39 @@ from calibre.ptempfile import TemporaryDirectory __all__ = ['OEBReader'] class OEBReader(object): + """Read an OEBPS 1.x or OPF/OPS 2.0 file collection.""" COVER_SVG_XP = XPath('h:body//svg:svg[position() = 1]') COVER_OBJECT_XP = XPath('h:body//h:object[@data][position() = 1]') Container = DirContainer + """Container type used to access book files. Override in sub-classes.""" DEFAULT_PROFILE = 'PRS505' + """Default renderer profile for content read with this Reader.""" + + TRANSFORMS = [] + """List of transforms to apply to content read with this Reader.""" + + def __init__(self): + return + @classmethod + def config(cls, cfg): + """Add any book-reading options to the :class:`Config` object + :param:`cfg`. + """ + return + + @classmethod + def generate(cls, opts): + """Generate a Reader instance from command-line options.""" + return cls() + def __call__(self, oeb, path): + """Read the book at :param:`path` into the :class:`OEBBook` object + :param:`oeb`. + """ self.oeb = oeb self.logger = oeb.logger oeb.container = self.Container(path) diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index 01afcb08e2..ac9684a624 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -94,7 +94,15 @@ class CSSFlattener(object): self.unfloat = unfloat self.untable = untable - def transform(self, oeb, context): + @classmethod + def config(cls, cfg): + return cfg + + @classmethod + def generate(cls, opts): + return cls() + + def __call__(self, oeb, context): oeb.logger.info('Flattening CSS and remapping font sizes...') self.oeb = oeb self.context = context diff --git a/src/calibre/ebooks/oeb/transforms/htmltoc.py b/src/calibre/ebooks/oeb/transforms/htmltoc.py index 5508b58ec3..0040f39c14 100644 --- a/src/calibre/ebooks/oeb/transforms/htmltoc.py +++ b/src/calibre/ebooks/oeb/transforms/htmltoc.py @@ -52,7 +52,18 @@ class HTMLTOCAdder(object): self.title = title self.style = style - def transform(self, oeb, context): + @classmethod + def config(cls, cfg): + group = cfg.add_group('htmltoc', _('HTML TOC generation options.')) + group('toc_title', ['--toc-title'], default=None, + help=_('Title for any generated in-line table of contents.')) + return cfg + + @classmethod + def generate(cls, opts): + return cls(title=opts.toc_title) + + def __call__(self, oeb, context): if 'toc' in oeb.guide: return oeb.logger.info('Generating in-line TOC...') diff --git a/src/calibre/ebooks/oeb/transforms/manglecase.py b/src/calibre/ebooks/oeb/transforms/manglecase.py index 3a3d91364f..c819475a4d 100644 --- a/src/calibre/ebooks/oeb/transforms/manglecase.py +++ b/src/calibre/ebooks/oeb/transforms/manglecase.py @@ -29,7 +29,15 @@ CASE_MANGLER_CSS = """ TEXT_TRANSFORMS = set(['capitalize', 'uppercase', 'lowercase']) class CaseMangler(object): - def transform(self, oeb, context): + @classmethod + def config(cls, cfg): + return cfg + + @classmethod + def generate(cls, opts): + return cls() + + def __call__(self, oeb, context): oeb.logger.info('Applying case-transforming CSS...') self.oeb = oeb self.profile = context.source diff --git a/src/calibre/ebooks/oeb/transforms/rasterize.py b/src/calibre/ebooks/oeb/transforms/rasterize.py index 12a2812898..aef5c2c98b 100644 --- a/src/calibre/ebooks/oeb/transforms/rasterize.py +++ b/src/calibre/ebooks/oeb/transforms/rasterize.py @@ -34,7 +34,15 @@ class SVGRasterizer(object): if QApplication.instance() is None: QApplication([]) - def transform(self, oeb, context): + @classmethod + def config(cls, cfg): + return cfg + + @classmethod + def generate(cls, opts): + return cls() + + def __call__(self, oeb, context): oeb.logger.info('Rasterizing SVG images...') self.oeb = oeb self.profile = context.dest diff --git a/src/calibre/ebooks/oeb/transforms/trimmanifest.py b/src/calibre/ebooks/oeb/transforms/trimmanifest.py index a1d28e5a99..a5e7042617 100644 --- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py +++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py @@ -17,7 +17,15 @@ from calibre.ebooks.oeb.base import LINK_SELECTORS, CSSURL_RE from calibre.ebooks.oeb.base import urlnormalize class ManifestTrimmer(object): - def transform(self, oeb, context): + @classmethod + def config(cls, cfg): + return cfg + + @classmethod + def generate(cls, opts): + return cls() + + def __call__(self, oeb, context): oeb.logger.info('Trimming unused files from manifest...') used = set() hrefs = oeb.manifest.hrefs diff --git a/src/calibre/ebooks/oeb/writer.py b/src/calibre/ebooks/oeb/writer.py index c84db30c98..235965b50f 100644 --- a/src/calibre/ebooks/oeb/writer.py +++ b/src/calibre/ebooks/oeb/writer.py @@ -9,13 +9,16 @@ __copyright__ = '2008, Marshall T. Vandegrift ' import sys, os, logging from calibre.ebooks.oeb.base import OPF_MIME, xml2str from calibre.ebooks.oeb.base import Logger, DirContainer, OEBBook -from calibre.utils.config import Config __all__ = ['OEBWriter'] class OEBWriter(object): DEFAULT_PROFILE = 'PRS505' + """Default renderer profile for content written with this Writer.""" + TRANSFORMS = [] + """List of transforms to apply to content written with this Writer.""" + def __init__(self, version='2.0', page_map=False, pretty_print=False): self.version = version self.page_map = page_map @@ -23,6 +26,9 @@ class OEBWriter(object): @classmethod def config(cls, cfg): + """Add any book-writing options to the :class:`Config` object + :param:`cfg`. + """ oeb = cfg.add_group('oeb', _('OPF/NCX/etc. generation options.')) versions = ['1.2', '2.0'] oeb('opf_version', ['--opf-version'], default='2.0', choices=versions, @@ -34,6 +40,7 @@ class OEBWriter(object): @classmethod def generate(cls, opts): + """Generate a Writer instance from command-line options.""" version = opts.opf_version page_map = opts.adobe_page_map pretty_print = opts.pretty_print @@ -41,6 +48,9 @@ class OEBWriter(object): pretty_print=pretty_print) def __call__(self, oeb, path): + """Read the book in the :class:`OEBBook` object :param:`oeb` to a file + at :param:`path`. + """ version = int(self.version[0]) opfname = None if os.path.splitext(path)[1].lower() == '.opf': @@ -63,48 +73,3 @@ class OEBWriter(object): href = opfname output.write(href, xml2str(data, pretty_print=pretty_print)) return - - -def option_parser(): - cfg = Config('oeb', _('Options to control OEB conversion.')) - OEBWriter.config(cfg) - parser = cfg.option_parser() - parser.add_option('--encoding', default=None, - help=_('Character encoding for files. Default is to auto detect.')) - parser.add_option('-o', '--output', default=None, - help=_('Output file. Default is derived from input filename.')) - parser.add_option('-p', '--pretty-print', action='store_true', - default=False, help=_('Produce more human-readable XML output.')) - parser.add_option('-v', '--verbose', default=0, action='count', - help=_('Useful for debugging.')) - return parser - -def any2oeb(opts, inpath): - from calibre.ebooks.oeb.factory import ReaderFactory - logger = Logger(logging.getLogger('any2oeb')) - logger.setup_cli_handler(opts.verbose) - outpath = opts.output - if outpath is None: - outpath = os.path.basename(inpath) - outpath = os.path.splitext(outpath)[0] - encoding = opts.encoding - pretty_print = opts.pretty_print - oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger) - reader = ReaderFactory(inpath) - reader(oeb, inpath) - writer = OEBWriter.generate(opts) - writer(oeb, outpath) - return 0 - -def main(argv=sys.argv): - parser = option_parser() - opts, args = parser.parse_args(argv[1:]) - if len(args) != 1: - parser.print_help() - return 1 - inpath = args[0] - retval = any2oeb(opts, inpath) - return retval - -if __name__ == '__main__': - sys.exit(main())