mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Demonstrable modularization of e-book conversion.
This commit is contained in:
parent
e5984c02c7
commit
5dca631114
@ -802,6 +802,7 @@ class LitFile(object):
|
||||
|
||||
|
||||
class LitContainer(object):
|
||||
"""Simple Container-interface, read-only accessor for LIT files."""
|
||||
|
||||
def __init__(self, filename_or_stream):
|
||||
self._litfile = LitFile(filename_or_stream)
|
||||
|
@ -82,7 +82,15 @@ class MobiMLizer(object):
|
||||
def __init__(self, ignore_tables=False):
|
||||
self.ignore_tables = ignore_tables
|
||||
|
||||
def transform(self, oeb, context):
|
||||
@classmethod
|
||||
def config(cls, cfg):
|
||||
return cfg
|
||||
|
||||
@classmethod
|
||||
def generate(cls, opts):
|
||||
return cls()
|
||||
|
||||
def __call__(self, oeb, context):
|
||||
oeb.logger.info('Converting XHTML to Mobipocket markup...')
|
||||
self.oeb = oeb
|
||||
self.profile = profile = context.dest
|
||||
|
@ -296,13 +296,43 @@ class Serializer(object):
|
||||
class MobiWriter(object):
|
||||
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
||||
|
||||
DEFAULT_PROFILE = 'CybookG3'
|
||||
|
||||
TRANSFORMS = [HTMLTOCAdder, CaseMangler, CSSFlattener, SVGRasterizer,
|
||||
ManifestTrimmer, MobiMLizer]
|
||||
|
||||
def __init__(self, compression=None, imagemax=None,
|
||||
prefer_author_sort=False):
|
||||
self._compression = compression or UNCOMPRESSED
|
||||
self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
|
||||
self._prefer_author_sort = prefer_author_sort
|
||||
|
||||
def dump(self, oeb, path):
|
||||
@classmethod
|
||||
def config(cls, cfg):
|
||||
"""Add any book-writing options to the :class:`Config` object
|
||||
:param:`cfg`.
|
||||
"""
|
||||
mobi = cfg.add_group('mobipocket', _('Mobipocket-specific options.'))
|
||||
mobi('compress', ['--compress'], default=False,
|
||||
help=_('Compress file text using PalmDOC compression. '
|
||||
'Results in smaller files, but takes a long time to run.'))
|
||||
mobi('rescale_images', ['--rescale-images'], default=False,
|
||||
help=_('Modify images to meet Palm device size limitations.'))
|
||||
mobi('prefer_author_sort', ['--prefer-author-sort'], default=False,
|
||||
help=_('When present, use the author sorting information for '
|
||||
'generating the Mobipocket author metadata.'))
|
||||
return cfg
|
||||
|
||||
@classmethod
|
||||
def generate(cls, opts):
|
||||
"""Generate a Writer instance from command-line options."""
|
||||
compression = PALMDOC if opts.compress else UNCOMPRESSED
|
||||
imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
|
||||
prefer_author_sort = opts.prefer_author_sort
|
||||
return cls(compression=compression, imagemax=imagemax,
|
||||
prefer_author_sort=prefer_author_sort)
|
||||
|
||||
def __call__(self, oeb, path):
|
||||
if hasattr(path, 'write'):
|
||||
return self._dump_stream(oeb, path)
|
||||
with open(path, 'w+b') as stream:
|
||||
@ -533,20 +563,12 @@ def config(defaults=None):
|
||||
c = StringConfig(defaults, desc)
|
||||
|
||||
mobi = c.add_group('mobipocket', _('Mobipocket-specific options.'))
|
||||
mobi('compress', ['--compress'], default=False,
|
||||
help=_('Compress file text using PalmDOC compression. '
|
||||
'Results in smaller files, but takes a long time to run.'))
|
||||
mobi('rescale_images', ['--rescale-images'], default=False,
|
||||
help=_('Modify images to meet Palm device size limitations.'))
|
||||
mobi('toc_title', ['--toc-title'], default=None,
|
||||
help=_('Title for any generated in-line table of contents.'))
|
||||
mobi('ignore_tables', ['--ignore-tables'], default=False,
|
||||
help=_('Render HTML tables as blocks of text instead of actual '
|
||||
'tables. This is neccessary if the HTML contains very large '
|
||||
'or complex tables.'))
|
||||
mobi('prefer_author_sort', ['--prefer-author-sort'], default=False,
|
||||
help=_('When present, use the author sorting information for '
|
||||
'generating the Mobipocket author metadata.'))
|
||||
profiles = c.add_group('profiles', _('Device renderer profiles. '
|
||||
'Affects conversion of font sizes, image rescaling and rasterization '
|
||||
'of tables. Valid profiles are: %s.') % ', '.join(_profiles))
|
||||
|
@ -820,7 +820,9 @@ class Manifest(object):
|
||||
def __iter__(self):
|
||||
for item in self.items:
|
||||
yield item
|
||||
values = __iter__
|
||||
|
||||
def values(self):
|
||||
return list(self.items)
|
||||
|
||||
def __contains__(self, item):
|
||||
return item in self.items
|
||||
@ -1134,7 +1136,7 @@ class TOC(object):
|
||||
node.to_opf1(tour)
|
||||
return tour
|
||||
|
||||
def to_ncx(self, parent, depth=1):
|
||||
def to_ncx(self, parent):
|
||||
for node in self.nodes:
|
||||
id = node.id or unicode(uuid.uuid4())
|
||||
attrib = {'id': id, 'playOrder': '0'}
|
||||
@ -1143,9 +1145,8 @@ class TOC(object):
|
||||
point = element(parent, NCX('navPoint'), attrib=attrib)
|
||||
label = etree.SubElement(point, NCX('navLabel'))
|
||||
element(label, NCX('text')).text = node.title
|
||||
href = node.href if depth > 1 else urldefrag(node.href)[0]
|
||||
element(point, NCX('content'), src=href)
|
||||
node.to_ncx(point, depth+1)
|
||||
element(point, NCX('content'), src=node.href)
|
||||
node.to_ncx(point)
|
||||
return parent
|
||||
|
||||
|
||||
|
@ -6,20 +6,93 @@ from __future__ import with_statement
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
|
||||
import os
|
||||
import sys, os, logging
|
||||
from itertools import chain
|
||||
from calibre.ebooks.oeb.base import OEBError
|
||||
from calibre.ebooks.oeb.reader import OEBReader
|
||||
from calibre.ebooks.oeb.writer import OEBWriter
|
||||
from calibre.ebooks.lit.reader import LitReader
|
||||
from calibre.ebooks.lit.writer import LitWriter
|
||||
from calibre.ebooks.mobi.reader import MobiReader
|
||||
from calibre.ebooks.mobi.writer import MobiWriter
|
||||
from calibre.ebooks.oeb.base import Logger, OEBBook
|
||||
from calibre.ebooks.oeb.profile import Context
|
||||
from calibre.utils.config import Config
|
||||
|
||||
__all__ = ['get_reader']
|
||||
|
||||
READER_REGISTRY = {
|
||||
'.opf': OEBReader,
|
||||
'.lit': LitReader,
|
||||
REGISTRY = {
|
||||
'.opf': (OEBReader, None),
|
||||
'.lit': (LitReader, LitWriter),
|
||||
'.mobi': (MobiReader, MobiWriter),
|
||||
}
|
||||
|
||||
def ReaderFactory(path):
|
||||
ext = os.path.splitext(path)[1].lower()
|
||||
if not ext:
|
||||
if os.path.isdir(path):
|
||||
return OEBReader
|
||||
return READER_REGISTRY[ext]()
|
||||
ext = os.path.splitext(path)[1].lower()
|
||||
Reader = REGISTRY.get(ext, (None, None))[0]
|
||||
if Reader is None:
|
||||
raise OEBError('Unknown e-book file extension %r' % ext)
|
||||
return Reader
|
||||
|
||||
def WriterFactory(path):
|
||||
if os.path.isdir(path):
|
||||
return OEBWriter
|
||||
ext = os.path.splitext(path)[1].lower()
|
||||
if not os.path.exists(path) and not ext:
|
||||
return OEBWriter
|
||||
Writer = REGISTRY.get(ext, (None, None))[1]
|
||||
if Writer is None:
|
||||
raise OEBError('Unknown e-book file extension %r' % ext)
|
||||
return Writer
|
||||
|
||||
|
||||
def option_parser(Reader, Writer):
|
||||
cfg = Config('ebook-convert', _('Options to control e-book conversion.'))
|
||||
Reader.config(cfg)
|
||||
for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
|
||||
Transform.config(cfg)
|
||||
Writer.config(cfg)
|
||||
parser = cfg.option_parser()
|
||||
parser.add_option('--encoding', default=None,
|
||||
help=_('Character encoding for input. Default is to auto detect.'))
|
||||
parser.add_option('-o', '--output', default=None,
|
||||
help=_('Output file. Default is derived from input filename.'))
|
||||
parser.add_option('-p', '--pretty-print', action='store_true',
|
||||
default=False, help=_('Produce more human-readable XML output.'))
|
||||
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||
help=_('Useful for debugging.'))
|
||||
return parser
|
||||
|
||||
def main(argv=sys.argv):
|
||||
if len(argv) < 3:
|
||||
print _("Usage: ebook-convert INFILE OUTFILE [OPTIONS..]")
|
||||
return 1
|
||||
inpath, outpath = argv[1], argv[2]
|
||||
Reader = ReaderFactory(inpath)
|
||||
Writer = WriterFactory(outpath)
|
||||
parser = option_parser(Reader, Writer)
|
||||
opts, args = parser.parse_args(argv[3:])
|
||||
if len(args) != 0:
|
||||
parser.print_help()
|
||||
return 1
|
||||
logger = Logger(logging.getLogger('ebook-convert'))
|
||||
logger.setup_cli_handler(opts.verbose)
|
||||
encoding = opts.encoding
|
||||
pretty_print = opts.pretty_print
|
||||
oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger)
|
||||
context = Context(Reader.DEFAULT_PROFILE, Writer.DEFAULT_PROFILE)
|
||||
reader = Reader.generate(opts)
|
||||
writer = Writer.generate(opts)
|
||||
transforms = []
|
||||
for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
|
||||
transforms.append(Transform.generate(opts))
|
||||
reader(oeb, inpath)
|
||||
for transform in transforms:
|
||||
transform(oeb, context)
|
||||
writer(oeb, outpath)
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
@ -31,15 +31,39 @@ from calibre.ptempfile import TemporaryDirectory
|
||||
__all__ = ['OEBReader']
|
||||
|
||||
class OEBReader(object):
|
||||
"""Read an OEBPS 1.x or OPF/OPS 2.0 file collection."""
|
||||
|
||||
COVER_SVG_XP = XPath('h:body//svg:svg[position() = 1]')
|
||||
COVER_OBJECT_XP = XPath('h:body//h:object[@data][position() = 1]')
|
||||
|
||||
Container = DirContainer
|
||||
"""Container type used to access book files. Override in sub-classes."""
|
||||
|
||||
DEFAULT_PROFILE = 'PRS505'
|
||||
"""Default renderer profile for content read with this Reader."""
|
||||
|
||||
TRANSFORMS = []
|
||||
"""List of transforms to apply to content read with this Reader."""
|
||||
|
||||
def __init__(self):
|
||||
return
|
||||
|
||||
@classmethod
|
||||
def config(cls, cfg):
|
||||
"""Add any book-reading options to the :class:`Config` object
|
||||
:param:`cfg`.
|
||||
"""
|
||||
return
|
||||
|
||||
@classmethod
|
||||
def generate(cls, opts):
|
||||
"""Generate a Reader instance from command-line options."""
|
||||
return cls()
|
||||
|
||||
def __call__(self, oeb, path):
|
||||
"""Read the book at :param:`path` into the :class:`OEBBook` object
|
||||
:param:`oeb`.
|
||||
"""
|
||||
self.oeb = oeb
|
||||
self.logger = oeb.logger
|
||||
oeb.container = self.Container(path)
|
||||
|
@ -94,7 +94,15 @@ class CSSFlattener(object):
|
||||
self.unfloat = unfloat
|
||||
self.untable = untable
|
||||
|
||||
def transform(self, oeb, context):
|
||||
@classmethod
|
||||
def config(cls, cfg):
|
||||
return cfg
|
||||
|
||||
@classmethod
|
||||
def generate(cls, opts):
|
||||
return cls()
|
||||
|
||||
def __call__(self, oeb, context):
|
||||
oeb.logger.info('Flattening CSS and remapping font sizes...')
|
||||
self.oeb = oeb
|
||||
self.context = context
|
||||
|
@ -52,7 +52,18 @@ class HTMLTOCAdder(object):
|
||||
self.title = title
|
||||
self.style = style
|
||||
|
||||
def transform(self, oeb, context):
|
||||
@classmethod
|
||||
def config(cls, cfg):
|
||||
group = cfg.add_group('htmltoc', _('HTML TOC generation options.'))
|
||||
group('toc_title', ['--toc-title'], default=None,
|
||||
help=_('Title for any generated in-line table of contents.'))
|
||||
return cfg
|
||||
|
||||
@classmethod
|
||||
def generate(cls, opts):
|
||||
return cls(title=opts.toc_title)
|
||||
|
||||
def __call__(self, oeb, context):
|
||||
if 'toc' in oeb.guide:
|
||||
return
|
||||
oeb.logger.info('Generating in-line TOC...')
|
||||
|
@ -29,7 +29,15 @@ CASE_MANGLER_CSS = """
|
||||
TEXT_TRANSFORMS = set(['capitalize', 'uppercase', 'lowercase'])
|
||||
|
||||
class CaseMangler(object):
|
||||
def transform(self, oeb, context):
|
||||
@classmethod
|
||||
def config(cls, cfg):
|
||||
return cfg
|
||||
|
||||
@classmethod
|
||||
def generate(cls, opts):
|
||||
return cls()
|
||||
|
||||
def __call__(self, oeb, context):
|
||||
oeb.logger.info('Applying case-transforming CSS...')
|
||||
self.oeb = oeb
|
||||
self.profile = context.source
|
||||
|
@ -34,7 +34,15 @@ class SVGRasterizer(object):
|
||||
if QApplication.instance() is None:
|
||||
QApplication([])
|
||||
|
||||
def transform(self, oeb, context):
|
||||
@classmethod
|
||||
def config(cls, cfg):
|
||||
return cfg
|
||||
|
||||
@classmethod
|
||||
def generate(cls, opts):
|
||||
return cls()
|
||||
|
||||
def __call__(self, oeb, context):
|
||||
oeb.logger.info('Rasterizing SVG images...')
|
||||
self.oeb = oeb
|
||||
self.profile = context.dest
|
||||
|
@ -17,7 +17,15 @@ from calibre.ebooks.oeb.base import LINK_SELECTORS, CSSURL_RE
|
||||
from calibre.ebooks.oeb.base import urlnormalize
|
||||
|
||||
class ManifestTrimmer(object):
|
||||
def transform(self, oeb, context):
|
||||
@classmethod
|
||||
def config(cls, cfg):
|
||||
return cfg
|
||||
|
||||
@classmethod
|
||||
def generate(cls, opts):
|
||||
return cls()
|
||||
|
||||
def __call__(self, oeb, context):
|
||||
oeb.logger.info('Trimming unused files from manifest...')
|
||||
used = set()
|
||||
hrefs = oeb.manifest.hrefs
|
||||
|
@ -9,12 +9,15 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
|
||||
import sys, os, logging
|
||||
from calibre.ebooks.oeb.base import OPF_MIME, xml2str
|
||||
from calibre.ebooks.oeb.base import Logger, DirContainer, OEBBook
|
||||
from calibre.utils.config import Config
|
||||
|
||||
__all__ = ['OEBWriter']
|
||||
|
||||
class OEBWriter(object):
|
||||
DEFAULT_PROFILE = 'PRS505'
|
||||
"""Default renderer profile for content written with this Writer."""
|
||||
|
||||
TRANSFORMS = []
|
||||
"""List of transforms to apply to content written with this Writer."""
|
||||
|
||||
def __init__(self, version='2.0', page_map=False, pretty_print=False):
|
||||
self.version = version
|
||||
@ -23,6 +26,9 @@ class OEBWriter(object):
|
||||
|
||||
@classmethod
|
||||
def config(cls, cfg):
|
||||
"""Add any book-writing options to the :class:`Config` object
|
||||
:param:`cfg`.
|
||||
"""
|
||||
oeb = cfg.add_group('oeb', _('OPF/NCX/etc. generation options.'))
|
||||
versions = ['1.2', '2.0']
|
||||
oeb('opf_version', ['--opf-version'], default='2.0', choices=versions,
|
||||
@ -34,6 +40,7 @@ class OEBWriter(object):
|
||||
|
||||
@classmethod
|
||||
def generate(cls, opts):
|
||||
"""Generate a Writer instance from command-line options."""
|
||||
version = opts.opf_version
|
||||
page_map = opts.adobe_page_map
|
||||
pretty_print = opts.pretty_print
|
||||
@ -41,6 +48,9 @@ class OEBWriter(object):
|
||||
pretty_print=pretty_print)
|
||||
|
||||
def __call__(self, oeb, path):
|
||||
"""Read the book in the :class:`OEBBook` object :param:`oeb` to a file
|
||||
at :param:`path`.
|
||||
"""
|
||||
version = int(self.version[0])
|
||||
opfname = None
|
||||
if os.path.splitext(path)[1].lower() == '.opf':
|
||||
@ -63,48 +73,3 @@ class OEBWriter(object):
|
||||
href = opfname
|
||||
output.write(href, xml2str(data, pretty_print=pretty_print))
|
||||
return
|
||||
|
||||
|
||||
def option_parser():
|
||||
cfg = Config('oeb', _('Options to control OEB conversion.'))
|
||||
OEBWriter.config(cfg)
|
||||
parser = cfg.option_parser()
|
||||
parser.add_option('--encoding', default=None,
|
||||
help=_('Character encoding for files. Default is to auto detect.'))
|
||||
parser.add_option('-o', '--output', default=None,
|
||||
help=_('Output file. Default is derived from input filename.'))
|
||||
parser.add_option('-p', '--pretty-print', action='store_true',
|
||||
default=False, help=_('Produce more human-readable XML output.'))
|
||||
parser.add_option('-v', '--verbose', default=0, action='count',
|
||||
help=_('Useful for debugging.'))
|
||||
return parser
|
||||
|
||||
def any2oeb(opts, inpath):
|
||||
from calibre.ebooks.oeb.factory import ReaderFactory
|
||||
logger = Logger(logging.getLogger('any2oeb'))
|
||||
logger.setup_cli_handler(opts.verbose)
|
||||
outpath = opts.output
|
||||
if outpath is None:
|
||||
outpath = os.path.basename(inpath)
|
||||
outpath = os.path.splitext(outpath)[0]
|
||||
encoding = opts.encoding
|
||||
pretty_print = opts.pretty_print
|
||||
oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger)
|
||||
reader = ReaderFactory(inpath)
|
||||
reader(oeb, inpath)
|
||||
writer = OEBWriter.generate(opts)
|
||||
writer(oeb, outpath)
|
||||
return 0
|
||||
|
||||
def main(argv=sys.argv):
|
||||
parser = option_parser()
|
||||
opts, args = parser.parse_args(argv[1:])
|
||||
if len(args) != 1:
|
||||
parser.print_help()
|
||||
return 1
|
||||
inpath = args[0]
|
||||
retval = any2oeb(opts, inpath)
|
||||
return retval
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
Loading…
x
Reference in New Issue
Block a user