Conversion pipeline now works for conversion from MOBI to OEB

2025-08-11 09:13:57 -04:00 · 2009-03-31 18:51:46 -07:00 · 2009-03-31 18:51:46 -07:00 · e624b088d7
commit e624b088d7
parent 9aa2fbfbec
10 changed files with 74 additions and 45 deletions
--- a/src/calibre/customize/conversion.py
+++ b/src/calibre/customize/conversion.py
@ -170,7 +170,8 @@ class InputFormatPlugin(Plugin):
            if not os.path.exists(options.debug_input):
                os.makedirs(options.debug_input)
            shutil.rmtree(options.debug_input)
-            shutil.copytree('.', options.debug_input)
+            shutil.copytree(output_dir, options.debug_input)
            log.info('Input debug saved to:', options.debug_input)
        return ret
@ -195,7 +196,14 @@ class OutputFormatPlugin(Plugin):
    #: Options shared by all Input format plugins. Do not override
    #: in sub-classes. Use :member:`options` instead. Every option must be an
    #: instance of :class:`OptionRecommendation`.
-    common_options = set([])
+    common_options = set([
        OptionRecommendation(name='pretty_print',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('If specified, the output plugin will try to create output '
            'that is as human readable as possible. May not have any effect '
            'for some output plugins.')
        ),
        ])
    #: Options to customize the behavior of this plugin. Every option must be an
    #: instance of :class:`OptionRecommendation`.
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -25,9 +25,11 @@ class Plugin(_Plugin):
    screen_size = (800, 600)
    dpi = 100
-    def initialize(self):
+    def __init__(self, *args, **kwargs):
        _Plugin.__init__(self, *args, **kwargs)
        self.width, self.height = self.screen_size
        fsizes = list(self.fsizes)
        self.fkey = list(self.fsizes)
        self.fsizes = []
        for (name, num), size in izip(FONT_SIZES, fsizes):
            self.fsizes.append((name, num, float(size)))
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@ -92,9 +92,9 @@ def add_input_output_options(parser, plumber):
        parser.add_option_group(io)
    if output_options:
-        title = plumber.output_fmt.upper() + ' ' + _('OPTIONS')
+        title = _('OUTPUT OPTIONS')
        oo = OptionGroup(parser, title, _('Options to control the processing'
-                          ' of the output %s file')%plumber.input_fmt)
+                          ' of the output %s')%plumber.output_fmt)
        add_options(oo.add_option, output_options)
        parser.add_option_group(oo)
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -9,6 +9,7 @@ from calibre.customize.conversion import OptionRecommendation
 from calibre.customize.ui import input_profiles, output_profiles, \
        plugin_for_input_format, plugin_for_output_format
 from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
 from calibre.ptempfile import PersistentTemporaryDirectory
 class OptionValues(object):
    pass
@ -289,6 +290,8 @@ OptionRecommendation(name='language',
        '''
        # Setup baseline option values
        self.setup_options()
        if self.opts.verbose:
            self.log.filter_level = self.log.DEBUG
        # Run any preprocess plugins
        from calibre.customize.ui import run_plugins_on_preprocess
@ -300,9 +303,11 @@ OptionRecommendation(name='language',
        from calibre.ebooks.oeb.base import OEBBook
        accelerators = {}
        tdir = PersistentTemporaryDirectory('_plumber')
        opfpath = self.input_plugin(open(self.input, 'rb'), self.opts,
                                    self.input_fmt, self.log,
-                                    accelerators)
+                                    accelerators, tdir)
        html_preprocessor = HTMLPreProcessor()
        self.reader = OEBReader()
        self.oeb = OEBBook(self.log, html_preprocessor=html_preprocessor)
@ -316,15 +321,16 @@ OptionRecommendation(name='language',
        from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
        fbase = self.opts.base_font_size
        if fbase == 0:
-            fbase = self.opts.dest.fbase
+            fbase = float(self.opts.dest.fbase)
        fkey = self.opts.font_size_mapping
        if fkey is None:
-            fkey = self.opts.dest.fsizes
+            fkey = self.opts.dest.fkey
        else:
            fkey = map(float, fkey.split(','))
        flattener = CSSFlattener(fbase=fbase, fkey=fkey,
                lineh=self.opts.line_height,
                untable=self.opts.linearize_tables)
        self.log.info('Flattening CSS...')
        flattener(self.oeb, self.opts)
        from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
@ -334,7 +340,7 @@ OptionRecommendation(name='language',
        trimmer(self.oeb, self.opts)
        self.log.info('Creating %s output...'%self.output_plugin.name)
-        self.output_plugin(self.oeb, self.output, self.input_plugin, self.opts,
+        self.output_plugin.convert(self.oeb, self.output, self.input_plugin, self.opts,
                self.log)
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 Read data from .mobi files
 '''
-import struct, os, cStringIO, re, functools, datetime
+import struct, os, cStringIO, re, functools, datetime, textwrap
 try:
    from PIL import Image as PILImage
@ -162,7 +162,7 @@ class MobiReader(object):
        self.log = log
        self.debug = debug
        self.embedded_mi = None
-        self.base_css_rules = '''
+        self.base_css_rules = textwrap.dedent('''
                blockquote { margin: 0em 0em 0em 1.25em; text-align: justify }
                p { margin: 0em; text-align: justify }
@ -174,7 +174,7 @@ class MobiReader(object):
                .mbp_pagebreak {
                    page-break-after: always; margin: 0; display: block
                }
-                '''
+                ''')
        self.tag_css_rules = []
        if hasattr(filename_or_stream, 'read'):
@ -223,7 +223,7 @@ class MobiReader(object):
        processed_records = self.extract_text()
        if self.debug is not None:
-            self.parse_cache['calibre_raw_mobi_markup'] = self.mobi_html
+            parse_cache['calibre_raw_mobi_markup'] = self.mobi_html
        self.add_anchors()
        self.processed_html = self.processed_html.decode(self.book_header.codec,
                                                          'ignore')
@ -265,7 +265,6 @@ class MobiReader(object):
            pass
        parse_cache[htmlfile] = root
        self.htmlfile = htmlfile
        self.log.debug('Creating OPF...')
        ncx = cStringIO.StringIO()
        opf = self.create_opf(htmlfile, guide, root)
        self.created_opf_path = os.path.splitext(htmlfile)[0]+'.opf'
@ -283,8 +282,7 @@ class MobiReader(object):
        if self.book_header.exth is not None or self.embedded_mi is not None:
-            if self.verbose:
+            self.log.debug('Creating OPF...')
                print 'Creating OPF...'
            ncx = cStringIO.StringIO()
            opf = self.create_opf(htmlfile, guide, root)
            opf.render(open(os.path.splitext(htmlfile)[0]+'.opf', 'wb'), ncx)
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -658,9 +658,9 @@ class Manifest(object):
        def _parse_css(self, data):
            data = self.oeb.decode(data)
-            data = self.CSSPreProcessor(data)
+            data = self.oeb.css_preprocessor(data)
            data = XHTML_CSS_NAMESPACE + data
-            parser = CSSParser(log=self.oeb.logger, loglevel=logging.WARNING,
+            parser = CSSParser(loglevel=logging.WARNING,
                               fetcher=self._fetch_css)
            data = parser.parseString(data, href=self.href)
            data.namespaces['h'] = XHTML_NS
--- a/src/calibre/ebooks/oeb/output.py
+++ b/src/calibre/ebooks/oeb/output.py
@ -25,7 +25,7 @@ class OEBOutput(OutputFormatPlugin):
        with CurrentDir(output_path):
            results = oeb_book.to_opf2(page_map=True)
            for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME):
-                href, root = results.pop(key, None)
+                href, root = results.pop(key, [None, None])
                if root is not None:
                    raw = etree.tostring(root, pretty_print=True,
                            encoding='utf-8')
@ -33,6 +33,21 @@ class OEBOutput(OutputFormatPlugin):
                        f.write(raw)
            for item in oeb_book.manifest:
-                print item.href
+                path = os.path.abspath(item.href)
                dir = os.path.dirname(path)
                if not os.path.exists(dir):
                    os.makedirs(dir)
                raw = item.data
                if not isinstance(raw, basestring):
                    if hasattr(raw, 'cssText'):
                        raw = raw.cssText
                    else:
                        raw = etree.tostring(raw, encoding='utf-8',
                                pretty_print=opts.pretty_print)
                        raw = raw + '<?xml version="1.0" encoding="utf-8" ?>\n'
                if isinstance(raw, unicode):
                    raw = raw.encode('utf-8')
                with open(path, 'wb') as f:
                    f.write(raw)
--- a/src/calibre/ebooks/oeb/reader.py
+++ b/src/calibre/ebooks/oeb/reader.py
@ -168,7 +168,7 @@ class OEBReader(object):
        data.
        '''
        bad = []
-        check = OEB_DOCS+OEB_STYLES
+        check = OEB_DOCS.union(OEB_STYLES)
        for item in list(self.oeb.manifest.values()):
            if item.media_type in check:
                try: