Run preprocess rules when viewing PDF files

2026-06-07 14:35:27 -04:00 · 2009-05-24 21:58:20 -07:00
parent 352b5d24ed
commit aa25d2a814
2 changed files with 13 additions and 4 deletions
@@ -130,6 +130,9 @@ class EbookIterator(object):
            plumber.opts.dont_package = True
        if hasattr(plumber.opts, 'no_process'):
            plumber.opts.no_process = True
+        if hasattr(plumber.input_plugin, '_preprocess_html_for_viewer'):
+            plumber.input_plugin._preprocess_html_for_viewer = True
+
        self.pathtoopf = plumber.input_plugin(open(plumber.input, 'rb'),
                plumber.opts, plumber.input_fmt, self.log,
                {}, self.base)
@@ -12,19 +12,25 @@ from calibre.ebooks.pdf.pdftohtml import pdftohtml
 from calibre.ebooks.metadata.opf2 import OPFCreator

 class PDFInput(InputFormatPlugin):
-    
+
    name        = 'PDF Input'
    author      = 'John Schember'
    description = 'Convert PDF files to HTML'
    file_types  = set(['pdf'])

+    _preprocess_html_for_viewer = False
+
    def convert(self, stream, options, file_ext, log,
                accelerators):
        html = pdftohtml(stream.name)
-        
+        if self._preprocess_html_for_viewer:
+            from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
+            prepro = HTMLPreProcessor(lambda x:x, False)
+            html = prepro(html)
+
        with open('index.html', 'wb') as index:
            index.write(html)
-            
+
        from calibre.ebooks.metadata.meta import get_metadata
        mi = get_metadata(stream, 'pdf')
        opf = OPFCreator(os.getcwd(), mi)
@@ -32,5 +38,5 @@ class PDFInput(InputFormatPlugin):
        opf.create_spine(['index.html'])
        with open('metadata.opf', 'wb') as opffile:
            opf.render(opffile)
-        
+
        return os.path.join(os.getcwd(), 'metadata.opf')