From 49e81ecb82953e064acf1324f5c96ca023865b04 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 27 Aug 2009 11:56:37 -0600 Subject: [PATCH] ebook-convert: Replace --debug-input with the more sophisticated --debug-pipeline --- src/calibre/customize/conversion.py | 22 ------- src/calibre/ebooks/conversion/cli.py | 4 +- src/calibre/ebooks/conversion/plumber.py | 82 ++++++++++++++++++++++-- 3 files changed, 79 insertions(+), 29 deletions(-) diff --git a/src/calibre/customize/conversion.py b/src/calibre/customize/conversion.py index 6052bae035..edc6ba2639 100644 --- a/src/calibre/customize/conversion.py +++ b/src/calibre/customize/conversion.py @@ -117,15 +117,6 @@ class InputFormatPlugin(Plugin): #: in sub-classes. Use :member:`options` instead. Every option must be an #: instance of :class:`OptionRecommendation`. common_options = set([ - OptionRecommendation(name='debug_input', - recommended_value=None, level=OptionRecommendation.LOW, - help=_('Save the output from the input plugin to the specified ' - 'directory. Useful if you are unsure at which stage ' - 'of the conversion process a bug is occurring. ' - 'WARNING: This completely deletes the contents of ' - 'the specified directory.') - ), - OptionRecommendation(name='input_encoding', recommended_value=None, level=OptionRecommendation.LOW, help=_('Specify the character encoding of the input document. If ' @@ -216,19 +207,6 @@ class InputFormatPlugin(Plugin): ret = self.convert(stream, options, file_ext, log, accelerators) - if options.debug_input is not None: - options.debug_input = os.path.abspath(options.debug_input) - if not os.path.exists(options.debug_input): - os.makedirs(options.debug_input) - if isinstance(ret, basestring): - shutil.rmtree(options.debug_input) - shutil.copytree(output_dir, options.debug_input) - else: - from calibre.ebooks.oeb.writer import OEBWriter - w = OEBWriter(pretty_print=options.pretty_print) - w(ret, options.debug_input) - - log.info('Input debug saved to:', options.debug_input) return ret diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index d956c08112..d9e79995e4 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -158,6 +158,7 @@ def add_pipeline_options(parser, plumber): 'DEBUG': (_('Options to help with debugging the conversion'), [ 'verbose', + 'debug_pipeline', ]), @@ -247,9 +248,6 @@ def main(args=sys.argv): plumber.run() - if plumber.opts.debug_input is None: - log(_('Output saved to'), ' ', plumber.output) - return 0 if __name__ == '__main__': diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 2261d071ed..a4de582466 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -3,7 +3,7 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os, re, sys +import os, re, sys, shutil from calibre.customize.conversion import OptionRecommendation, DummyReporter from calibre.customize.ui import input_profiles, output_profiles, \ @@ -13,6 +13,26 @@ from calibre.ebooks.conversion.preprocess import HTMLPreProcessor from calibre.ptempfile import PersistentTemporaryDirectory from calibre import extract, walk +DEBUG_README=u''' +This debug directory contains snapshots of the e-book as it passes through the +various stages of conversion. The stages are: + + 1. input - This is the result of running the input plugin on the source + file. + + 2. parsed - This is the result of preprocessing and parsing the output of + the input plugin. Note that for some input plugins this will be identical to + the input sub-directory. Use this directory to debug structure detection, + etc. + + 3. structure - This corresponds to the stage in the pipeline when structure + detection has run, but before the CSS is flattened. + + 4. processed - This corresponds to the e-book as it is passed to the output + plugin. + +''' + def supported_input_formats(): fmts = available_input_formats() for x in ('zip', 'rar', 'oebzip'): @@ -70,6 +90,17 @@ OptionRecommendation(name='verbose', 'verbosity.') ), +OptionRecommendation(name='debug_pipeline', + recommended_value=None, level=OptionRecommendation.LOW, + short_switch='d', + help=_('Save the output from different stages of the conversion ' + 'pipeline to the specified ' + 'directory. Useful if you are unsure at which stage ' + 'of the conversion process a bug is occurring. ' + 'WARNING: This completely deletes the contents of ' + 'the specified directory.') + ), + OptionRecommendation(name='input_profile', recommended_value='default', level=OptionRecommendation.LOW, choices=[x.short_name for x in input_profiles()], @@ -622,6 +653,22 @@ OptionRecommendation(name='language', except: pass + def dump_oeb(self, oeb, out_dir): + from calibre.ebooks.oeb.writer import OEBWriter + w = OEBWriter(pretty_print=self.opts.pretty_print) + w(oeb, out_dir) + + def dump_input(self, ret, output_dir): + out_dir = os.path.join(self.opts.debug_pipeline, 'input') + if isinstance(ret, basestring): + shutil.copytree(output_dir, out_dir) + else: + os.makedirs(out_dir) + self.dump_oeb(ret, out_dir) + + self.log.info('Input debug saved to:', out_dir) + + def run(self): ''' Run the conversion pipeline @@ -632,6 +679,14 @@ OptionRecommendation(name='language', self.log.filter_level = self.log.DEBUG self.flush() + if self.opts.debug_pipeline is not None: + self.opts.verbose = max(self.opts.verbose, 4) + self.opts.debug_pipeline = os.path.abspath(self.opts.debug_pipeline) + if os.path.exists(self.opts.debug_pipeline): + shutil.rmtree(self.opts.debug_pipeline) + os.makedirs(self.opts.debug_pipeline) + + # Run any preprocess plugins from calibre.customize.ui import run_plugins_on_preprocess self.input = run_plugins_on_preprocess(self.input) @@ -656,17 +711,21 @@ OptionRecommendation(name='language', self.oeb = self.input_plugin(stream, self.opts, self.input_fmt, self.log, accelerators, tdir) + if self.opts.debug_pipeline is not None: + self.dump_input(self.oeb, tdir) if self.input_fmt == 'recipe': self.opts_to_mi(self.user_metadata) - if self.opts.debug_input is not None: - self.log('Debug input called, aborting the rest of the pipeline.') - return if not hasattr(self.oeb, 'manifest'): self.oeb = create_oebbook(self.log, self.oeb, self.opts, self.input_plugin) self.input_plugin.postprocess_book(self.oeb, self.opts, self.log) pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter) self.flush() + if self.opts.debug_pipeline is not None: + out_dir = os.path.join(self.opts.debug_pipeline, 'parsed') + self.dump_oeb(self.oeb, out_dir) + self.log('Parsed HTML written to:', out_dir) + pr(0., _('Running transforms on ebook...')) from calibre.ebooks.oeb.transforms.guide import Clean @@ -702,6 +761,12 @@ OptionRecommendation(name='language', pr(0.4) self.flush() + if self.opts.debug_pipeline is not None: + out_dir = os.path.join(self.opts.debug_pipeline, 'structure') + self.dump_oeb(self.oeb, out_dir) + self.log('Structured HTML written to:', out_dir) + + if self.opts.extra_css and os.path.exists(self.opts.extra_css): self.opts.extra_css = open(self.opts.extra_css, 'rb').read() @@ -739,6 +804,15 @@ OptionRecommendation(name='language', pr(1.) self.flush() + if self.opts.debug_pipeline is not None: + out_dir = os.path.join(self.opts.debug_pipeline, 'processed') + self.dump_oeb(self.oeb, out_dir) + self.log('Processed HTML written to:', out_dir) + open(os.path.join(self.opts.debug_pipeline, 'README.txt'), + 'w').write(DEBUG_README.encode('utf-8')) + self.log('Debug pipeline called, not running output plugin') + return + self.log.info('Creating %s...'%self.output_plugin.name) our = CompositeProgressReporter(0.67, 1., self.ui_reporter) self.output_plugin.report_progress = our