diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 15b927115f..a2794d94ec 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -57,7 +57,6 @@ def line_length(raw, percent): return 0 total = sum(lengths) - print total avg = total / len(lengths) max_line = avg * 2 diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 76b9b9a758..5df3970577 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -9,12 +9,10 @@ Transform OEB content into FB2 markup ''' import os -import re from base64 import b64encode from lxml import etree -from calibre import entity_to_unicode from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.base import OEB_IMAGES @@ -33,11 +31,11 @@ STYLES = [ ] class FB2MLizer(object): - def __init__(self, ignore_tables=False): - self.ignore_tables = ignore_tables + def __init__(self, log): + self.log = log def extract_content(self, oeb_book, opts): - oeb_book.logger.info('Converting XHTML to FB2 markup...') + self.log.info('Converting XHTML to FB2 markup...') self.oeb_book = oeb_book self.opts = opts return self.fb2mlize_spine() @@ -45,12 +43,14 @@ class FB2MLizer(object): def fb2mlize_spine(self): output = self.fb2_header() if 'titlepage' in self.oeb_book.guide: + self.log.debug('Generating cover page...') href = self.oeb_book.guide['titlepage'].href item = self.oeb_book.manifest.hrefs[href] if item.spine_position is None: stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) output += self.dump_text(item.data.find(XHTML('body')), stylizer) for item in self.oeb_book.spine: + self.log.debug('Converting %s to FictionBook2 XML' % item.href) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) output += self.dump_text(item.data.find(XHTML('body')), stylizer) output += self.fb2_body_footer() diff --git a/src/calibre/ebooks/fb2/output.py b/src/calibre/ebooks/fb2/output.py index 3b9a5a245f..6eaef6ad19 100644 --- a/src/calibre/ebooks/fb2/output.py +++ b/src/calibre/ebooks/fb2/output.py @@ -16,7 +16,7 @@ class FB2Output(OutputFormatPlugin): file_type = 'fb2' def convert(self, oeb_book, output_path, input_plugin, opts, log): - fb2mlizer = FB2MLizer(ignore_tables=opts.linearize_tables) + fb2mlizer = FB2MLizer(log) fb2_content = fb2mlizer.extract_content(oeb_book, opts) close = False diff --git a/src/calibre/ebooks/pdf/input.py b/src/calibre/ebooks/pdf/input.py index 97024ea908..d59c9da61b 100644 --- a/src/calibre/ebooks/pdf/input.py +++ b/src/calibre/ebooks/pdf/input.py @@ -24,10 +24,12 @@ class PDFInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): + log.debug('Converting file to html...') # The main html file will be named index.html pdftohtml(os.getcwd(), stream.name, options.no_images) from calibre.ebooks.metadata.meta import get_metadata + log.debug('Retrieving document metadata...') mi = get_metadata(stream, 'pdf') opf = OPFCreator(os.getcwd(), mi) @@ -42,9 +44,11 @@ class PDFInput(InputFormatPlugin): new_i = i.replace('-', '') os.rename(i, new_i) manifest.append((new_i, None)) + log.debug('Generating manifest...') opf.create_manifest(manifest) opf.create_spine(['index.html']) + log.debug('Rendering manifest...') with open('metadata.opf', 'wb') as opffile: opf.render(opffile) diff --git a/src/calibre/ebooks/pdf/output.py b/src/calibre/ebooks/pdf/output.py index a20f503c57..b2d649c2cf 100644 --- a/src/calibre/ebooks/pdf/output.py +++ b/src/calibre/ebooks/pdf/output.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -from __future__ import with_statement __license__ = 'GPL 3' __copyright__ = '2009, John Schember ' @@ -9,9 +8,8 @@ __docformat__ = 'restructuredtext en' Convert OEB ebook format to PDF. ''' -#unit, papersize, orientation, custom_size, profile - -import os, glob +import glob +import os from calibre.customize.conversion import OutputFormatPlugin, \ OptionRecommendation @@ -54,14 +52,17 @@ class PDFOutput(OutputFormatPlugin): self.metadata = oeb_book.metadata if input_plugin.is_image_collection: + log.debug('Converting input as an image collection...') self.convert_images(input_plugin.get_images()) else: + log.debug('Converting input as a text based book...') self.convert_text(oeb_book) def convert_images(self, images): self.write(ImagePDFWriter, images) def convert_text(self, oeb_book): + self.log.debug('Serializing oeb input to disk for processing...') with TemporaryDirectory('_pdf_out') as oeb_dir: from calibre.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format('oeb') @@ -86,6 +87,7 @@ class PDFOutput(OutputFormatPlugin): out_stream.seek(0) out_stream.truncate() + self.log.debug('Rendering pages to PDF...') writer.dump(items, out_stream, PDFMetadata(self.metadata)) if close: diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py index 97eaeb9244..b87aba7bc0 100644 --- a/src/calibre/ebooks/pdf/writer.py +++ b/src/calibre/ebooks/pdf/writer.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' @@ -9,11 +8,12 @@ __docformat__ = 'restructuredtext en' Write content to PDF. ''' -import os, shutil +import os +import shutil from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ebooks.pdf.pageoptions import unit, paper_size, \ - orientation, size + orientation from calibre.ebooks.metadata import authors_to_string from PyQt4 import QtCore diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/pml/input.py index d755890ca8..be9876dc1f 100644 --- a/src/calibre/ebooks/pml/input.py +++ b/src/calibre/ebooks/pml/input.py @@ -4,7 +4,9 @@ __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' -import glob, os, shutil +import glob +import os +import shutil from calibre.customize.conversion import InputFormatPlugin from calibre.ptempfile import TemporaryDirectory @@ -40,6 +42,7 @@ class PMLInput(InputFormatPlugin): if self.options.input_encoding: ienc = self.options.input_encoding + self.log.debug('Converting PML to HTML...') html = pml_to_html(pml_stream.read().decode(ienc)) html_stream.write('</head><body>' + html.encode('utf-8') + '</body></html>') @@ -51,9 +54,11 @@ class PMLInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): self.options = options + self.log = log pages, images = [], [] if file_ext == 'pmlz': + log.debug('De-compressing content to temporary directory...') with TemporaryDirectory('_unpmlz') as tdir: zf = ZipFile(stream) zf.extractall(tdir) @@ -64,6 +69,7 @@ class PMLInput(InputFormatPlugin): html_path = os.path.join(os.getcwd(), html_name) pages.append(html_name) + log.debug('Processing PML item %s...' % pml) self.process_pml(pml, html_path) imgs = glob.glob(os.path.join(tdir, '*.png')) @@ -90,12 +96,13 @@ class PMLInput(InputFormatPlugin): manifest_items.append((item, None)) from calibre.ebooks.metadata.meta import get_metadata + log.debug('Reading metadata from input file...') mi = get_metadata(stream, 'pml') opf = OPFCreator(os.getcwd(), mi) + log.debug('Generating manifest...') opf.create_manifest(manifest_items) opf.create_spine(pages) with open('metadata.opf', 'wb') as opffile: opf.render(opffile) return os.path.join(os.getcwd(), 'metadata.opf') - diff --git a/src/calibre/ebooks/pml/output.py b/src/calibre/ebooks/pml/output.py index 851a89db56..ac66c9a9f5 100644 --- a/src/calibre/ebooks/pml/output.py +++ b/src/calibre/ebooks/pml/output.py @@ -37,13 +37,14 @@ class PMLOutput(OutputFormatPlugin): def convert(self, oeb_book, output_path, input_plugin, opts, log): with TemporaryDirectory('_pmlz_output') as tdir: - pmlmlizer = PMLMLizer(ignore_tables=opts.linearize_tables) + pmlmlizer = PMLMLizer(log) content = pmlmlizer.extract_content(oeb_book, opts) with open(os.path.join(tdir, 'index.pml'), 'wb') as out: out.write(content.encode(opts.output_encoding, 'replace')) self.write_images(oeb_book.manifest, tdir) + log.debug('Compressing output...') pmlz = ZipFile(output_path, 'w') pmlz.add_dir(tdir) diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index aeb3326636..2ca38176d5 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -10,8 +10,6 @@ __docformat__ = 'restructuredtext en' import re -from htmlentitydefs import codepoint2name - from calibre.ebooks.pdb.ereader import image_name PML_HTML_RULES = [ diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py index ef735a56b1..79a8d8bfa9 100644 --- a/src/calibre/ebooks/pml/pmlml.py +++ b/src/calibre/ebooks/pml/pmlml.py @@ -67,24 +67,26 @@ SEPARATE_TAGS = [ ] class PMLMLizer(object): - def __init__(self, ignore_tables=False): - self.ignore_tables = ignore_tables + def __init__(self, log): + self.log = log def extract_content(self, oeb_book, opts): - oeb_book.logger.info('Converting XHTML to PML markup...') + self.log.info('Converting XHTML to PML markup...') self.oeb_book = oeb_book self.opts = opts return self.pmlmlize_spine() def pmlmlize_spine(self): output = u'' - if 'titlepage' in self.oeb_book.guide: + if 'titlepage' in self.oeb_book.guide + self.log.debug('Generating title page...') href = self.oeb_book.guide['titlepage'].href item = self.oeb_book.manifest.hrefs[href] if item.spine_position is None: stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) output += self.dump_text(item.data.find(XHTML('body')), stylizer) for item in self.oeb_book.spine: + self.log.debug('Converting %s to PML markup...' % item.href) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) output += self.add_page_anchor(item.href) output += self.dump_text(item.data.find(XHTML('body')), stylizer) diff --git a/src/calibre/ebooks/rb/rbml.py b/src/calibre/ebooks/rb/rbml.py index 3563ba2538..bc9248f8b0 100644 --- a/src/calibre/ebooks/rb/rbml.py +++ b/src/calibre/ebooks/rb/rbml.py @@ -52,12 +52,12 @@ STYLES = [ class RBMLizer(object): - def __init__(self, name_map={}, ignore_tables=False): + def __init__(self, log, name_map={}): + self.log = log self.name_map = name_map - self.ignore_tables = ignore_tables def extract_content(self, oeb_book, opts): - oeb_book.logger.info('Converting XHTML to RB markup...') + self.log.info('Converting XHTML to RB markup...') self.oeb_book = oeb_book self.opts = opts return self.mlize_spine() @@ -66,12 +66,14 @@ class RBMLizer(object): def mlize_spine(self): output = u'<HTML><HEAD><TITLE>' if 'titlepage' in self.oeb_book.guide: + self.log.debug('Generating cover page...') href = self.oeb_book.guide['titlepage'].href item = self.oeb_book.manifest.hrefs[href] if item.spine_position is None: stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) output += self.dump_text(item.data.find(XHTML('body')), stylizer) for item in self.oeb_book.spine: + self.log.debug('Converting %s to RocketBook HTML...' % item.href) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) output += self.add_page_anchor(item.href) output += self.dump_text(item.data.find(XHTML('body')), stylizer) diff --git a/src/calibre/ebooks/rb/reader.py b/src/calibre/ebooks/rb/reader.py index ffc7d6e799..c4668b94bf 100644 --- a/src/calibre/ebooks/rb/reader.py +++ b/src/calibre/ebooks/rb/reader.py @@ -102,14 +102,17 @@ class Reader(object): img.write(data) def extract_content(self, output_dir): + self.log.debug('Extracting content from file...') html = [] images = [] for item in self.toc: if item.name.lower().endswith('html'): + self.log.debug('HTML item %s found...' % item.name) html.append(item.name) self.get_text(item, output_dir) if item.name.lower().endswith('png'): + self.log.debug('PNG item %s found...' % item.name) images.append(item.name) self.get_image(item, output_dir) diff --git a/src/calibre/ebooks/rb/writer.py b/src/calibre/ebooks/rb/writer.py index 4e697f6d36..0d5086a333 100644 --- a/src/calibre/ebooks/rb/writer.py +++ b/src/calibre/ebooks/rb/writer.py @@ -64,6 +64,7 @@ class RBWriter(object): flags = 0 toc_items.append(TocItem(name.ljust(32, '\x00')[:32], size, flags)) + self.log.debug('Writing file header...') out_stream.write(HEADER) out_stream.write(struct.pack('