ebook-convert: Replace --debug-input with the more sophisticated --debug-pipeline

This commit is contained in:
Kovid Goyal 2009-08-27 11:56:37 -06:00
parent 92e4f700c6
commit 49e81ecb82
3 changed files with 79 additions and 29 deletions

View File

@ -117,15 +117,6 @@ class InputFormatPlugin(Plugin):
#: in sub-classes. Use :member:`options` instead. Every option must be an #: in sub-classes. Use :member:`options` instead. Every option must be an
#: instance of :class:`OptionRecommendation`. #: instance of :class:`OptionRecommendation`.
common_options = set([ common_options = set([
OptionRecommendation(name='debug_input',
recommended_value=None, level=OptionRecommendation.LOW,
help=_('Save the output from the input plugin to the specified '
'directory. Useful if you are unsure at which stage '
'of the conversion process a bug is occurring. '
'WARNING: This completely deletes the contents of '
'the specified directory.')
),
OptionRecommendation(name='input_encoding', OptionRecommendation(name='input_encoding',
recommended_value=None, level=OptionRecommendation.LOW, recommended_value=None, level=OptionRecommendation.LOW,
help=_('Specify the character encoding of the input document. If ' help=_('Specify the character encoding of the input document. If '
@ -216,19 +207,6 @@ class InputFormatPlugin(Plugin):
ret = self.convert(stream, options, file_ext, ret = self.convert(stream, options, file_ext,
log, accelerators) log, accelerators)
if options.debug_input is not None:
options.debug_input = os.path.abspath(options.debug_input)
if not os.path.exists(options.debug_input):
os.makedirs(options.debug_input)
if isinstance(ret, basestring):
shutil.rmtree(options.debug_input)
shutil.copytree(output_dir, options.debug_input)
else:
from calibre.ebooks.oeb.writer import OEBWriter
w = OEBWriter(pretty_print=options.pretty_print)
w(ret, options.debug_input)
log.info('Input debug saved to:', options.debug_input)
return ret return ret

View File

@ -158,6 +158,7 @@ def add_pipeline_options(parser, plumber):
'DEBUG': (_('Options to help with debugging the conversion'), 'DEBUG': (_('Options to help with debugging the conversion'),
[ [
'verbose', 'verbose',
'debug_pipeline',
]), ]),
@ -247,9 +248,6 @@ def main(args=sys.argv):
plumber.run() plumber.run()
if plumber.opts.debug_input is None:
log(_('Output saved to'), ' ', plumber.output)
return 0 return 0
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -3,7 +3,7 @@ __license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, re, sys import os, re, sys, shutil
from calibre.customize.conversion import OptionRecommendation, DummyReporter from calibre.customize.conversion import OptionRecommendation, DummyReporter
from calibre.customize.ui import input_profiles, output_profiles, \ from calibre.customize.ui import input_profiles, output_profiles, \
@ -13,6 +13,26 @@ from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ptempfile import PersistentTemporaryDirectory
from calibre import extract, walk from calibre import extract, walk
DEBUG_README=u'''
This debug directory contains snapshots of the e-book as it passes through the
various stages of conversion. The stages are:
1. input - This is the result of running the input plugin on the source
file.
2. parsed - This is the result of preprocessing and parsing the output of
the input plugin. Note that for some input plugins this will be identical to
the input sub-directory. Use this directory to debug structure detection,
etc.
3. structure - This corresponds to the stage in the pipeline when structure
detection has run, but before the CSS is flattened.
4. processed - This corresponds to the e-book as it is passed to the output
plugin.
'''
def supported_input_formats(): def supported_input_formats():
fmts = available_input_formats() fmts = available_input_formats()
for x in ('zip', 'rar', 'oebzip'): for x in ('zip', 'rar', 'oebzip'):
@ -70,6 +90,17 @@ OptionRecommendation(name='verbose',
'verbosity.') 'verbosity.')
), ),
OptionRecommendation(name='debug_pipeline',
recommended_value=None, level=OptionRecommendation.LOW,
short_switch='d',
help=_('Save the output from different stages of the conversion '
'pipeline to the specified '
'directory. Useful if you are unsure at which stage '
'of the conversion process a bug is occurring. '
'WARNING: This completely deletes the contents of '
'the specified directory.')
),
OptionRecommendation(name='input_profile', OptionRecommendation(name='input_profile',
recommended_value='default', level=OptionRecommendation.LOW, recommended_value='default', level=OptionRecommendation.LOW,
choices=[x.short_name for x in input_profiles()], choices=[x.short_name for x in input_profiles()],
@ -622,6 +653,22 @@ OptionRecommendation(name='language',
except: except:
pass pass
def dump_oeb(self, oeb, out_dir):
from calibre.ebooks.oeb.writer import OEBWriter
w = OEBWriter(pretty_print=self.opts.pretty_print)
w(oeb, out_dir)
def dump_input(self, ret, output_dir):
out_dir = os.path.join(self.opts.debug_pipeline, 'input')
if isinstance(ret, basestring):
shutil.copytree(output_dir, out_dir)
else:
os.makedirs(out_dir)
self.dump_oeb(ret, out_dir)
self.log.info('Input debug saved to:', out_dir)
def run(self): def run(self):
''' '''
Run the conversion pipeline Run the conversion pipeline
@ -632,6 +679,14 @@ OptionRecommendation(name='language',
self.log.filter_level = self.log.DEBUG self.log.filter_level = self.log.DEBUG
self.flush() self.flush()
if self.opts.debug_pipeline is not None:
self.opts.verbose = max(self.opts.verbose, 4)
self.opts.debug_pipeline = os.path.abspath(self.opts.debug_pipeline)
if os.path.exists(self.opts.debug_pipeline):
shutil.rmtree(self.opts.debug_pipeline)
os.makedirs(self.opts.debug_pipeline)
# Run any preprocess plugins # Run any preprocess plugins
from calibre.customize.ui import run_plugins_on_preprocess from calibre.customize.ui import run_plugins_on_preprocess
self.input = run_plugins_on_preprocess(self.input) self.input = run_plugins_on_preprocess(self.input)
@ -656,17 +711,21 @@ OptionRecommendation(name='language',
self.oeb = self.input_plugin(stream, self.opts, self.oeb = self.input_plugin(stream, self.opts,
self.input_fmt, self.log, self.input_fmt, self.log,
accelerators, tdir) accelerators, tdir)
if self.opts.debug_pipeline is not None:
self.dump_input(self.oeb, tdir)
if self.input_fmt == 'recipe': if self.input_fmt == 'recipe':
self.opts_to_mi(self.user_metadata) self.opts_to_mi(self.user_metadata)
if self.opts.debug_input is not None:
self.log('Debug input called, aborting the rest of the pipeline.')
return
if not hasattr(self.oeb, 'manifest'): if not hasattr(self.oeb, 'manifest'):
self.oeb = create_oebbook(self.log, self.oeb, self.opts, self.oeb = create_oebbook(self.log, self.oeb, self.opts,
self.input_plugin) self.input_plugin)
self.input_plugin.postprocess_book(self.oeb, self.opts, self.log) self.input_plugin.postprocess_book(self.oeb, self.opts, self.log)
pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter) pr = CompositeProgressReporter(0.34, 0.67, self.ui_reporter)
self.flush() self.flush()
if self.opts.debug_pipeline is not None:
out_dir = os.path.join(self.opts.debug_pipeline, 'parsed')
self.dump_oeb(self.oeb, out_dir)
self.log('Parsed HTML written to:', out_dir)
pr(0., _('Running transforms on ebook...')) pr(0., _('Running transforms on ebook...'))
from calibre.ebooks.oeb.transforms.guide import Clean from calibre.ebooks.oeb.transforms.guide import Clean
@ -702,6 +761,12 @@ OptionRecommendation(name='language',
pr(0.4) pr(0.4)
self.flush() self.flush()
if self.opts.debug_pipeline is not None:
out_dir = os.path.join(self.opts.debug_pipeline, 'structure')
self.dump_oeb(self.oeb, out_dir)
self.log('Structured HTML written to:', out_dir)
if self.opts.extra_css and os.path.exists(self.opts.extra_css): if self.opts.extra_css and os.path.exists(self.opts.extra_css):
self.opts.extra_css = open(self.opts.extra_css, 'rb').read() self.opts.extra_css = open(self.opts.extra_css, 'rb').read()
@ -739,6 +804,15 @@ OptionRecommendation(name='language',
pr(1.) pr(1.)
self.flush() self.flush()
if self.opts.debug_pipeline is not None:
out_dir = os.path.join(self.opts.debug_pipeline, 'processed')
self.dump_oeb(self.oeb, out_dir)
self.log('Processed HTML written to:', out_dir)
open(os.path.join(self.opts.debug_pipeline, 'README.txt'),
'w').write(DEBUG_README.encode('utf-8'))
self.log('Debug pipeline called, not running output plugin')
return
self.log.info('Creating %s...'%self.output_plugin.name) self.log.info('Creating %s...'%self.output_plugin.name)
our = CompositeProgressReporter(0.67, 1., self.ui_reporter) our = CompositeProgressReporter(0.67, 1., self.ui_reporter)
self.output_plugin.report_progress = our self.output_plugin.report_progress = our