CHM Input: Store extracted files in the input/ sub dir for easy debugging when --debug-pipeline is specified

2025-07-09 03:04:10 -04:00 · 2011-04-27 14:19:25 -06:00 · 2011-04-27 14:19:25 -06:00 · 8887b7f057
commit 8887b7f057
parent 631afb7eac
3 changed files with 16 additions and 8 deletions
--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/chm/input.py
@ -19,12 +19,12 @@ class CHMInput(InputFormatPlugin):
    description = 'Convert CHM files to OEB'
    file_types  = set(['chm'])

-    def _chmtohtml(self, output_dir, chm_path, no_images, log):
+    def _chmtohtml(self, output_dir, chm_path, no_images, log, debug_dump=False):
        from calibre.ebooks.chm.reader import CHMReader
        log.debug('Opening CHM file')
        rdr = CHMReader(chm_path, log, self.opts)
        log.debug('Extracting CHM to %s' % output_dir)
-        rdr.extract_content(output_dir)
+        rdr.extract_content(output_dir, debug_dump=debug_dump)
        self._chm_reader = rdr
        return rdr.hhc_path

@ -47,7 +47,12 @@ class CHMInput(InputFormatPlugin):
            stream.close()
            log.debug('tdir=%s' % tdir)
            log.debug('stream.name=%s' % stream.name)
-            mainname = self._chmtohtml(tdir, chm_name, no_images, log)
+            debug_dump = False
+            odi = options.debug_pipeline
+            if odi:
+                debug_dump = os.path.join(odi, 'input')
+            mainname = self._chmtohtml(tdir, chm_name, no_images, log,
+                    debug_dump=debug_dump)
            mainpath = os.path.join(tdir, mainname)

            metadata = get_metadata_from_reader(self._chm_reader)
@ -56,7 +61,6 @@ class CHMInput(InputFormatPlugin):
            #from calibre import ipython
            #ipython()

-            odi = options.debug_pipeline
            options.debug_pipeline = None
            options.input_encoding = 'utf-8'
            # try a custom conversion:
--- a/src/calibre/ebooks/chm/reader.py
+++ b/src/calibre/ebooks/chm/reader.py
@ -97,7 +97,7 @@ class CHMReader(CHMFile):
            raise CHMError("'%s' is zero bytes in length!"%(path,))
        return data

-    def ExtractFiles(self, output_dir=os.getcwdu()):
+    def ExtractFiles(self, output_dir=os.getcwdu(), debug_dump=False):
        html_files = set([])
        for path in self.Contents():
            lpath = os.path.join(output_dir, path)
@ -123,6 +123,9 @@ class CHMReader(CHMFile):
                    self.log.warn('%r filename too long, skipping'%path)
                    continue
                raise
+        if debug_dump:
+            import shutil
+            shutil.copytree(output_dir, os.path.join(debug_dump, 'debug_dump'))
        for lpath in html_files:
            with open(lpath, 'r+b') as f:
                data = f.read()
@ -249,8 +252,8 @@ class CHMReader(CHMFile):
        if not os.path.isdir(dir):
            os.makedirs(dir)

-    def extract_content(self, output_dir=os.getcwdu()):
-        self.ExtractFiles(output_dir=output_dir)
+    def extract_content(self, output_dir=os.getcwdu(), debug_dump=False):
+        self.ExtractFiles(output_dir=output_dir, debug_dump=debug_dump)



--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -854,6 +854,7 @@ OptionRecommendation(name='sr3_replace',
        if isinstance(ret, basestring):
            shutil.copytree(output_dir, out_dir)
        else:
+            if not os.path.exists(out_dir):
                os.makedirs(out_dir)
            self.dump_oeb(ret, out_dir)
        if self.input_fmt == 'recipe':