Debug messages for a number of ebook conversion plugins.

2026-01-04 03:00:20 -05:00 · 2009-06-21 08:11:33 -04:00 · 2009-06-21 08:11:33 -04:00 · 7eef5c40e3
commit 7eef5c40e3
parent 25911a8598
17 changed files with 65 additions and 31 deletions
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@ -57,7 +57,6 @@ def line_length(raw, percent):
        return 0

    total = sum(lengths)
-    print total
    avg = total / len(lengths)
    max_line = avg * 2

--- a/src/calibre/ebooks/fb2/fb2ml.py
+++ b/src/calibre/ebooks/fb2/fb2ml.py
@ -9,12 +9,10 @@ Transform OEB content into FB2 markup
 '''

 import os
-import re
 from base64 import b64encode

 from lxml import etree

-from calibre import entity_to_unicode
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
 from calibre.ebooks.oeb.stylizer import Stylizer
 from calibre.ebooks.oeb.base import OEB_IMAGES
@ -33,11 +31,11 @@ STYLES = [
 ]

 class FB2MLizer(object):
-    def __init__(self, ignore_tables=False):
-        self.ignore_tables = ignore_tables
+    def __init__(self, log):
+        self.log = log
        
    def extract_content(self, oeb_book, opts):
-        oeb_book.logger.info('Converting XHTML to FB2 markup...')
+        self.log.info('Converting XHTML to FB2 markup...')
        self.oeb_book = oeb_book
        self.opts = opts
        return self.fb2mlize_spine()
@ -45,12 +43,14 @@ class FB2MLizer(object):
    def fb2mlize_spine(self):
        output = self.fb2_header()
        if 'titlepage' in self.oeb_book.guide:
+            self.log.debug('Generating cover page...')
            href = self.oeb_book.guide['titlepage'].href
            item = self.oeb_book.manifest.hrefs[href]
            if item.spine_position is None:
                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
                output += self.dump_text(item.data.find(XHTML('body')), stylizer)
        for item in self.oeb_book.spine:
+            self.log.debug('Converting %s to FictionBook2 XML' % item.href)
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
            output += self.dump_text(item.data.find(XHTML('body')), stylizer)
        output += self.fb2_body_footer()
--- a/src/calibre/ebooks/fb2/output.py
+++ b/src/calibre/ebooks/fb2/output.py
@ -16,7 +16,7 @@ class FB2Output(OutputFormatPlugin):
    file_type = 'fb2'

    def convert(self, oeb_book, output_path, input_plugin, opts, log):    
-        fb2mlizer = FB2MLizer(ignore_tables=opts.linearize_tables)
+        fb2mlizer = FB2MLizer(log)
        fb2_content = fb2mlizer.extract_content(oeb_book, opts)

        close = False
--- a/src/calibre/ebooks/pdf/input.py
+++ b/src/calibre/ebooks/pdf/input.py
@ -24,10 +24,12 @@ class PDFInput(InputFormatPlugin):

    def convert(self, stream, options, file_ext, log,
                accelerators):
+        log.debug('Converting file to html...')
        # The main html file will be named index.html
        pdftohtml(os.getcwd(), stream.name, options.no_images)

        from calibre.ebooks.metadata.meta import get_metadata
+        log.debug('Retrieving document metadata...')
        mi = get_metadata(stream, 'pdf')
        opf = OPFCreator(os.getcwd(), mi)

@ -42,9 +44,11 @@ class PDFInput(InputFormatPlugin):
            new_i = i.replace('-', '')
            os.rename(i, new_i)
            manifest.append((new_i, None))
+        log.debug('Generating manifest...')
        opf.create_manifest(manifest)

        opf.create_spine(['index.html'])
+        log.debug('Rendering manifest...')
        with open('metadata.opf', 'wb') as opffile:
            opf.render(opffile)

--- a/src/calibre/ebooks/pdf/output.py
+++ b/src/calibre/ebooks/pdf/output.py
@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
-from __future__ import with_statement

 __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -9,9 +8,8 @@ __docformat__ = 'restructuredtext en'
 Convert OEB ebook format to PDF.
 '''

-#unit, papersize, orientation, custom_size, profile
-
-import os, glob
+import glob
+import os

 from calibre.customize.conversion import OutputFormatPlugin, \
    OptionRecommendation
@ -54,14 +52,17 @@ class PDFOutput(OutputFormatPlugin):
        self.metadata = oeb_book.metadata

        if input_plugin.is_image_collection:
+            log.debug('Converting input as an image collection...')
            self.convert_images(input_plugin.get_images())
        else:
+            log.debug('Converting input as a text based book...')
            self.convert_text(oeb_book)

    def convert_images(self, images):
        self.write(ImagePDFWriter, images)

    def convert_text(self, oeb_book):
+        self.log.debug('Serializing oeb input to disk for processing...')
        with TemporaryDirectory('_pdf_out') as oeb_dir:
            from calibre.customize.ui import plugin_for_output_format
            oeb_output = plugin_for_output_format('oeb')
@ -86,6 +87,7 @@ class PDFOutput(OutputFormatPlugin):

        out_stream.seek(0)
        out_stream.truncate()
+        self.log.debug('Rendering pages to PDF...')
        writer.dump(items, out_stream, PDFMetadata(self.metadata))

        if close:
--- a/src/calibre/ebooks/pdf/writer.py
+++ b/src/calibre/ebooks/pdf/writer.py
@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
-from __future__ import with_statement

 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -9,11 +8,12 @@ __docformat__ = 'restructuredtext en'
 Write content to PDF.
 '''

-import os, shutil
+import os
+import shutil

 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.ebooks.pdf.pageoptions import unit, paper_size, \
-    orientation, size
+    orientation
 from calibre.ebooks.metadata import authors_to_string

 from PyQt4 import QtCore
--- a/src/calibre/ebooks/pml/input.py
+++ b/src/calibre/ebooks/pml/input.py
@ -4,7 +4,9 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

-import glob, os, shutil
+import glob
+import os
+import shutil

 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ptempfile import TemporaryDirectory
@ -40,6 +42,7 @@ class PMLInput(InputFormatPlugin):
        if self.options.input_encoding:
            ienc = self.options.input_encoding

+        self.log.debug('Converting PML to HTML...')
        html = pml_to_html(pml_stream.read().decode(ienc)) 
        html_stream.write('<html><head><title /></head><body>' + html.encode('utf-8') + '</body></html>')

@ -51,9 +54,11 @@ class PMLInput(InputFormatPlugin):
    def convert(self, stream, options, file_ext, log,
                accelerators):
        self.options = options
+        self.log = log
        pages, images = [], []

        if file_ext == 'pmlz':
+            log.debug('De-compressing content to temporary directory...')
            with TemporaryDirectory('_unpmlz') as tdir:
                zf = ZipFile(stream)
                zf.extractall(tdir)
@ -64,6 +69,7 @@ class PMLInput(InputFormatPlugin):
                    html_path = os.path.join(os.getcwd(), html_name)
                    
                    pages.append(html_name)
+                    log.debug('Processing PML item %s...' % pml)
                    self.process_pml(pml, html_path)
                    
                imgs = glob.glob(os.path.join(tdir, '*.png'))
@ -90,12 +96,13 @@ class PMLInput(InputFormatPlugin):
            manifest_items.append((item, None))
        
        from calibre.ebooks.metadata.meta import get_metadata
+        log.debug('Reading metadata from input file...')
        mi = get_metadata(stream, 'pml')
        opf = OPFCreator(os.getcwd(), mi)
+        log.debug('Generating manifest...')
        opf.create_manifest(manifest_items)
        opf.create_spine(pages)
        with open('metadata.opf', 'wb') as opffile:
            opf.render(opffile)
        
        return os.path.join(os.getcwd(), 'metadata.opf')
-
--- a/src/calibre/ebooks/pml/output.py
+++ b/src/calibre/ebooks/pml/output.py
@ -37,13 +37,14 @@ class PMLOutput(OutputFormatPlugin):

    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        with TemporaryDirectory('_pmlz_output') as tdir:
-            pmlmlizer = PMLMLizer(ignore_tables=opts.linearize_tables)
+            pmlmlizer = PMLMLizer(log)
            content = pmlmlizer.extract_content(oeb_book, opts)
            with open(os.path.join(tdir, 'index.pml'), 'wb') as out:
                out.write(content.encode(opts.output_encoding, 'replace'))

            self.write_images(oeb_book.manifest, tdir)

+            log.debug('Compressing output...')
            pmlz = ZipFile(output_path, 'w')
            pmlz.add_dir(tdir)

--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -10,8 +10,6 @@ __docformat__ = 'restructuredtext en'

 import re

-from htmlentitydefs import codepoint2name
-
 from calibre.ebooks.pdb.ereader import image_name

 PML_HTML_RULES = [
--- a/src/calibre/ebooks/pml/pmlml.py
+++ b/src/calibre/ebooks/pml/pmlml.py
@ -67,24 +67,26 @@ SEPARATE_TAGS = [
 ]

 class PMLMLizer(object):
-    def __init__(self, ignore_tables=False):
-        self.ignore_tables = ignore_tables
+    def __init__(self, log):
+        self.log = log
        
    def extract_content(self, oeb_book, opts):
-        oeb_book.logger.info('Converting XHTML to PML markup...')
+        self.log.info('Converting XHTML to PML markup...')
        self.oeb_book = oeb_book
        self.opts = opts
        return self.pmlmlize_spine()
        
    def pmlmlize_spine(self):
        output = u''
-        if 'titlepage' in self.oeb_book.guide:
+        if 'titlepage' in self.oeb_book.guide
+            self.log.debug('Generating title page...')
            href = self.oeb_book.guide['titlepage'].href
            item = self.oeb_book.manifest.hrefs[href]
            if item.spine_position is None:
                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
                output += self.dump_text(item.data.find(XHTML('body')), stylizer)
        for item in self.oeb_book.spine:
+            self.log.debug('Converting %s to PML markup...' % item.href)
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
            output += self.add_page_anchor(item.href)
            output += self.dump_text(item.data.find(XHTML('body')), stylizer)
--- a/src/calibre/ebooks/rb/rbml.py
+++ b/src/calibre/ebooks/rb/rbml.py
@ -52,12 +52,12 @@ STYLES = [

 class RBMLizer(object):

-    def __init__(self, name_map={}, ignore_tables=False):
+    def __init__(self, log, name_map={}):
+        self.log = log
        self.name_map = name_map
-        self.ignore_tables = ignore_tables

    def extract_content(self, oeb_book, opts):
-        oeb_book.logger.info('Converting XHTML to RB markup...')
+        self.log.info('Converting XHTML to RB markup...')
        self.oeb_book = oeb_book
        self.opts = opts
        return self.mlize_spine()
@ -66,12 +66,14 @@ class RBMLizer(object):
    def mlize_spine(self):
        output = u'<HTML><HEAD><TITLE></TITLE></HEAD><BODY>'
        if 'titlepage' in self.oeb_book.guide:
+            self.log.debug('Generating cover page...')
            href = self.oeb_book.guide['titlepage'].href
            item = self.oeb_book.manifest.hrefs[href]
            if item.spine_position is None:
                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
                output += self.dump_text(item.data.find(XHTML('body')), stylizer)
        for item in self.oeb_book.spine:
+            self.log.debug('Converting %s to RocketBook HTML...' % item.href)
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
            output += self.add_page_anchor(item.href)
            output += self.dump_text(item.data.find(XHTML('body')), stylizer)
--- a/src/calibre/ebooks/rb/reader.py
+++ b/src/calibre/ebooks/rb/reader.py
@ -102,14 +102,17 @@ class Reader(object):
            img.write(data)

    def extract_content(self, output_dir):
+        self.log.debug('Extracting content from file...')
        html = []
        images = []
        
        for item in self.toc:
            if item.name.lower().endswith('html'):
+                self.log.debug('HTML item %s found...' % item.name)
                html.append(item.name)
                self.get_text(item, output_dir)
            if item.name.lower().endswith('png'):
+                self.log.debug('PNG item %s found...' % item.name)
                images.append(item.name)
                self.get_image(item, output_dir)

--- a/src/calibre/ebooks/rb/writer.py
+++ b/src/calibre/ebooks/rb/writer.py
@ -64,6 +64,7 @@ class RBWriter(object):
                flags = 0
            toc_items.append(TocItem(name.ljust(32, '\x00')[:32], size, flags))

+        self.log.debug('Writing file header...')
        out_stream.write(HEADER)
        out_stream.write(struct.pack('<I', 0))
        out_stream.write(struct.pack('<IH', 0, 0))
@ -82,6 +83,7 @@ class RBWriter(object):

        out_stream.write(info[0][1])

+        self.log.debug('Writing compressed RB HTHML...')
        # Compressed text with proper heading
        out_stream.write(struct.pack('<I', len(text[0][1])))
        out_stream.write(struct.pack('<I', text_size))
@ -90,6 +92,7 @@ class RBWriter(object):
        for chunck in text[0][1]:
            out_stream.write(chunck)

+        self.log.debug('Writing images...')
        for item in hidx+images:
            out_stream.write(item[1])

@ -98,7 +101,7 @@ class RBWriter(object):
        out_stream.write(struct.pack('<I', total_size))

    def _text(self, oeb_book):
-        rbmlizer = RBMLizer(name_map=self.name_map, ignore_tables=self.opts.linearize_tables)
+        rbmlizer = RBMLizer(log, name_map=self.name_map)
        text = rbmlizer.extract_content(oeb_book, self.opts).encode('cp1252', 'xmlcharrefreplace')
        size = len(text)

--- a/src/calibre/ebooks/rtf/output.py
+++ b/src/calibre/ebooks/rtf/output.py
@ -16,7 +16,7 @@ class RTFOutput(OutputFormatPlugin):
    file_type = 'rtf'

    def convert(self, oeb_book, output_path, input_plugin, opts, log):
-        rtfmlitzer = RTFMLizer(ignore_tables=opts.linearize_tables)
+        rtfmlitzer = RTFMLizer(log)
        content = rtfmlitzer.extract_content(oeb_book, opts)

        close = False
--- a/src/calibre/ebooks/rtf/rtfml.py
+++ b/src/calibre/ebooks/rtf/rtfml.py
@ -79,11 +79,11 @@ TODO:
 '''
 class RTFMLizer(object):

-    def __init__(self, ignore_tables=False):
-        self.ignore_tables = ignore_tables
+    def __init__(self, log):
+        self.log = log

    def extract_content(self, oeb_book, opts):
-        oeb_book.logger.info('Converting XHTML to RTF markup...')
+        self.log.info('Converting XHTML to RTF markup...')
        self.oeb_book = oeb_book
        self.opts = opts
        return self.mlize_spine()
@ -98,6 +98,7 @@ class RTFMLizer(object):
                output += self.dump_text(item.data.find(XHTML('body')), stylizer)
                output += '{\\page } '
        for item in self.oeb_book.spine:
+            self.log.debug('Converting %s to RTF markup...' % item.href)
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
            output += self.dump_text(item.data.find(XHTML('body')), stylizer)
        output += self.footer()
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@ -21,20 +21,26 @@ class TXTInput(InputFormatPlugin):
        ienc = stream.encoding if stream.encoding else 'utf-8'
        if options.input_encoding:
            ienc = options.input_encoding
+        log.debug('Reading text from file...')
        txt = stream.read().decode(ienc)

+        log.debug('Running text though markdown conversion...')
        try:
            html = txt_to_markdown(txt)
        except RuntimeError:
            raise ValueError('This txt file has malformed markup, it cannot be'
                'converted by calibre. See http://daringfireball.net/projects/markdown/syntax')
+
+        log.debug('Writing html output...')
        with open('index.html', 'wb') as index:
            index.write(html.encode('utf-8'))

        from calibre.ebooks.metadata.meta import get_metadata
+        log.debug('Retrieving source document metadata...')
        mi = get_metadata(stream, 'txt')
        manifest = [('index.html', None)]
        spine = ['index.html']
+        log.debug('Generating manifest...')
        opf_writer(os.getcwd(), 'metadata.opf', manifest, spine, mi)

        return os.path.join(os.getcwd(), 'metadata.opf')
--- a/src/calibre/ebooks/txt/writer.py
+++ b/src/calibre/ebooks/txt/writer.py
@ -24,6 +24,7 @@ class TxtWriter(object):
    def dump(self, spine):
        out = u''
        for item in spine:
+            self.log.debug('Processing %s...' % item.href)
            content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
            content = self.remove_newlines(content)
            content = self.strip_html(content)
@ -40,6 +41,7 @@ class TxtWriter(object):
        return out

    def strip_html(self, text):
+        self.log.debug('\tStripping html...')
        stripped = u''

        # Remove unnecessary tags
@ -77,6 +79,7 @@ class TxtWriter(object):
        return stripped

    def replace_html_symbols(self, content):
+        self.log.debug('\tReplacing entities with unicode...')
        for entity in set(re.findall('&.+?;', content)):
            mo = re.search('(%s)' % entity[1:-1], content)
            content = content.replace(entity, entity_to_unicode(mo))
@ -84,6 +87,7 @@ class TxtWriter(object):
        return content

    def cleanup_text(self, text):
+        self.log.debug('\tClean up text...')
        # Replace bad characters.
        text = text.replace(u'\xc2', '')
        text = text.replace(u'\xa0', ' ')
@ -114,6 +118,7 @@ class TxtWriter(object):
        return text

    def remove_newlines(self, text):
+        self.log.debug('\tRemove newlines for processing...')
        text = text.replace('\r\n', ' ')
        text = text.replace('\n', ' ')
        text = text.replace('\r', ' ')
@ -121,6 +126,7 @@ class TxtWriter(object):
        return text

    def specified_newlines(self, text):
+        self.log.debug('\tReplacing newlines with selected type...')
        if self.newline == '\n':
            return text