diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 15b927115f..a2794d94ec 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -57,7 +57,6 @@ def line_length(raw, percent): return 0 total = sum(lengths) - print total avg = total / len(lengths) max_line = avg * 2 diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 76b9b9a758..5df3970577 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -9,12 +9,10 @@ Transform OEB content into FB2 markup ''' import os -import re from base64 import b64encode from lxml import etree -from calibre import entity_to_unicode from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.base import OEB_IMAGES @@ -33,11 +31,11 @@ STYLES = [ ] class FB2MLizer(object): - def __init__(self, ignore_tables=False): - self.ignore_tables = ignore_tables + def __init__(self, log): + self.log = log def extract_content(self, oeb_book, opts): - oeb_book.logger.info('Converting XHTML to FB2 markup...') + self.log.info('Converting XHTML to FB2 markup...') self.oeb_book = oeb_book self.opts = opts return self.fb2mlize_spine() @@ -45,12 +43,14 @@ class FB2MLizer(object): def fb2mlize_spine(self): output = self.fb2_header() if 'titlepage' in self.oeb_book.guide: + self.log.debug('Generating cover page...') href = self.oeb_book.guide['titlepage'].href item = self.oeb_book.manifest.hrefs[href] if item.spine_position is None: stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) output += self.dump_text(item.data.find(XHTML('body')), stylizer) for item in self.oeb_book.spine: + self.log.debug('Converting %s to FictionBook2 XML' % item.href) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) output += self.dump_text(item.data.find(XHTML('body')), stylizer) output += self.fb2_body_footer() diff --git a/src/calibre/ebooks/fb2/output.py b/src/calibre/ebooks/fb2/output.py index 3b9a5a245f..af04179e9a 100644 --- a/src/calibre/ebooks/fb2/output.py +++ b/src/calibre/ebooks/fb2/output.py @@ -16,7 +16,7 @@ class FB2Output(OutputFormatPlugin): file_type = 'fb2' def convert(self, oeb_book, output_path, input_plugin, opts, log): - fb2mlizer = FB2MLizer(ignore_tables=opts.linearize_tables) + fb2mlizer = FB2MLizer(log) fb2_content = fb2mlizer.extract_content(oeb_book, opts) close = False @@ -30,7 +30,7 @@ class FB2Output(OutputFormatPlugin): out_stream.seek(0) out_stream.truncate() - out_stream.write(fb2_content.encode('utf-8')) + out_stream.write(fb2_content.encode('utf-8', 'replace')) if close: out_stream.close() diff --git a/src/calibre/ebooks/pdb/ereader/output.py b/src/calibre/ebooks/pdb/ereader/output.py index f217c04415..7d3a75eeee 100644 --- a/src/calibre/ebooks/pdb/ereader/output.py +++ b/src/calibre/ebooks/pdb/ereader/output.py @@ -8,7 +8,6 @@ import os from calibre.customize.conversion import OutputFormatPlugin from calibre.ebooks.pdb.ereader.writer import Writer -from calibre.ebooks.metadata import authors_to_string class EREADEROutput(OutputFormatPlugin): diff --git a/src/calibre/ebooks/pdb/ereader/reader132.py b/src/calibre/ebooks/pdb/ereader/reader132.py index 91edfaf48b..7821a9e509 100644 --- a/src/calibre/ebooks/pdb/ereader/reader132.py +++ b/src/calibre/ebooks/pdb/ereader/reader132.py @@ -73,9 +73,9 @@ class Reader132(FormatReader): def decompress_text(self, number): if self.header_record.version == 2: - return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding) + return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace') if self.header_record.version == 10: - return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding) + return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace') def get_image(self, number): if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1: diff --git a/src/calibre/ebooks/pdb/ereader/reader202.py b/src/calibre/ebooks/pdb/ereader/reader202.py index 3ef409c9ce..ec8380dfe5 100644 --- a/src/calibre/ebooks/pdb/ereader/reader202.py +++ b/src/calibre/ebooks/pdb/ereader/reader202.py @@ -54,7 +54,7 @@ class Reader202(FormatReader): return self.sections[number] def decompress_text(self, number): - return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding) + return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding, 'replace') def get_image(self, number): name = None diff --git a/src/calibre/ebooks/pdb/palmdoc/reader.py b/src/calibre/ebooks/pdb/palmdoc/reader.py index 915ed7d739..7e8f3b241c 100644 --- a/src/calibre/ebooks/pdb/palmdoc/reader.py +++ b/src/calibre/ebooks/pdb/palmdoc/reader.py @@ -49,7 +49,7 @@ class Reader(FormatReader): if self.header_record.compression == 1: return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding) if self.header_record.compression == 2: - return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding) + return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace') return '' def extract_content(self, output_dir): diff --git a/src/calibre/ebooks/pdb/ztxt/reader.py b/src/calibre/ebooks/pdb/ztxt/reader.py index ccc26a3fdc..0c334556e8 100644 --- a/src/calibre/ebooks/pdb/ztxt/reader.py +++ b/src/calibre/ebooks/pdb/ztxt/reader.py @@ -65,7 +65,7 @@ class Reader(FormatReader): def decompress_text(self, number): if number == 1: self.uncompressor = zlib.decompressobj() - return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding) + return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace') def extract_content(self, output_dir): txt = '' diff --git a/src/calibre/ebooks/pdf/input.py b/src/calibre/ebooks/pdf/input.py index 97024ea908..d59c9da61b 100644 --- a/src/calibre/ebooks/pdf/input.py +++ b/src/calibre/ebooks/pdf/input.py @@ -24,10 +24,12 @@ class PDFInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): + log.debug('Converting file to html...') # The main html file will be named index.html pdftohtml(os.getcwd(), stream.name, options.no_images) from calibre.ebooks.metadata.meta import get_metadata + log.debug('Retrieving document metadata...') mi = get_metadata(stream, 'pdf') opf = OPFCreator(os.getcwd(), mi) @@ -42,9 +44,11 @@ class PDFInput(InputFormatPlugin): new_i = i.replace('-', '') os.rename(i, new_i) manifest.append((new_i, None)) + log.debug('Generating manifest...') opf.create_manifest(manifest) opf.create_spine(['index.html']) + log.debug('Rendering manifest...') with open('metadata.opf', 'wb') as opffile: opf.render(opffile) diff --git a/src/calibre/ebooks/pdf/output.py b/src/calibre/ebooks/pdf/output.py index a20f503c57..b2d649c2cf 100644 --- a/src/calibre/ebooks/pdf/output.py +++ b/src/calibre/ebooks/pdf/output.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -from __future__ import with_statement __license__ = 'GPL 3' __copyright__ = '2009, John Schember ' @@ -9,9 +8,8 @@ __docformat__ = 'restructuredtext en' Convert OEB ebook format to PDF. ''' -#unit, papersize, orientation, custom_size, profile - -import os, glob +import glob +import os from calibre.customize.conversion import OutputFormatPlugin, \ OptionRecommendation @@ -54,14 +52,17 @@ class PDFOutput(OutputFormatPlugin): self.metadata = oeb_book.metadata if input_plugin.is_image_collection: + log.debug('Converting input as an image collection...') self.convert_images(input_plugin.get_images()) else: + log.debug('Converting input as a text based book...') self.convert_text(oeb_book) def convert_images(self, images): self.write(ImagePDFWriter, images) def convert_text(self, oeb_book): + self.log.debug('Serializing oeb input to disk for processing...') with TemporaryDirectory('_pdf_out') as oeb_dir: from calibre.customize.ui import plugin_for_output_format oeb_output = plugin_for_output_format('oeb') @@ -86,6 +87,7 @@ class PDFOutput(OutputFormatPlugin): out_stream.seek(0) out_stream.truncate() + self.log.debug('Rendering pages to PDF...') writer.dump(items, out_stream, PDFMetadata(self.metadata)) if close: diff --git a/src/calibre/ebooks/pdf/writer.py b/src/calibre/ebooks/pdf/writer.py index 97eaeb9244..b87aba7bc0 100644 --- a/src/calibre/ebooks/pdf/writer.py +++ b/src/calibre/ebooks/pdf/writer.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -from __future__ import with_statement __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' @@ -9,11 +8,12 @@ __docformat__ = 'restructuredtext en' Write content to PDF. ''' -import os, shutil +import os +import shutil from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ebooks.pdf.pageoptions import unit, paper_size, \ - orientation, size + orientation from calibre.ebooks.metadata import authors_to_string from PyQt4 import QtCore diff --git a/src/calibre/ebooks/pml/input.py b/src/calibre/ebooks/pml/input.py index d755890ca8..270c8a7b0f 100644 --- a/src/calibre/ebooks/pml/input.py +++ b/src/calibre/ebooks/pml/input.py @@ -4,7 +4,9 @@ __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' -import glob, os, shutil +import glob +import os +import shutil from calibre.customize.conversion import InputFormatPlugin from calibre.ptempfile import TemporaryDirectory @@ -40,8 +42,9 @@ class PMLInput(InputFormatPlugin): if self.options.input_encoding: ienc = self.options.input_encoding + self.log.debug('Converting PML to HTML...') html = pml_to_html(pml_stream.read().decode(ienc)) - html_stream.write('</head><body>' + html.encode('utf-8') + '</body></html>') + html_stream.write('<html><head><title /></head><body>' + html.encode('utf-8', 'replace') + '</body></html>') if pclose: pml_stream.close() @@ -51,9 +54,11 @@ class PMLInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): self.options = options + self.log = log pages, images = [], [] if file_ext == 'pmlz': + log.debug('De-compressing content to temporary directory...') with TemporaryDirectory('_unpmlz') as tdir: zf = ZipFile(stream) zf.extractall(tdir) @@ -64,6 +69,7 @@ class PMLInput(InputFormatPlugin): html_path = os.path.join(os.getcwd(), html_name) pages.append(html_name) + log.debug('Processing PML item %s...' % pml) self.process_pml(pml, html_path) imgs = glob.glob(os.path.join(tdir, '*.png')) @@ -90,12 +96,13 @@ class PMLInput(InputFormatPlugin): manifest_items.append((item, None)) from calibre.ebooks.metadata.meta import get_metadata + log.debug('Reading metadata from input file...') mi = get_metadata(stream, 'pml') opf = OPFCreator(os.getcwd(), mi) + log.debug('Generating manifest...') opf.create_manifest(manifest_items) opf.create_spine(pages) with open('metadata.opf', 'wb') as opffile: opf.render(opffile) return os.path.join(os.getcwd(), 'metadata.opf') - diff --git a/src/calibre/ebooks/pml/output.py b/src/calibre/ebooks/pml/output.py index 851a89db56..ac66c9a9f5 100644 --- a/src/calibre/ebooks/pml/output.py +++ b/src/calibre/ebooks/pml/output.py @@ -37,13 +37,14 @@ class PMLOutput(OutputFormatPlugin): def convert(self, oeb_book, output_path, input_plugin, opts, log): with TemporaryDirectory('_pmlz_output') as tdir: - pmlmlizer = PMLMLizer(ignore_tables=opts.linearize_tables) + pmlmlizer = PMLMLizer(log) content = pmlmlizer.extract_content(oeb_book, opts) with open(os.path.join(tdir, 'index.pml'), 'wb') as out: out.write(content.encode(opts.output_encoding, 'replace')) self.write_images(oeb_book.manifest, tdir) + log.debug('Compressing output...') pmlz = ZipFile(output_path, 'w') pmlz.add_dir(tdir) diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index aeb3326636..2ca38176d5 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -10,8 +10,6 @@ __docformat__ = 'restructuredtext en' import re -from htmlentitydefs import codepoint2name - from calibre.ebooks.pdb.ereader import image_name PML_HTML_RULES = [ diff --git a/src/calibre/ebooks/pml/pmlml.py b/src/calibre/ebooks/pml/pmlml.py index ef735a56b1..fd54fcf681 100644 --- a/src/calibre/ebooks/pml/pmlml.py +++ b/src/calibre/ebooks/pml/pmlml.py @@ -67,11 +67,11 @@ SEPARATE_TAGS = [ ] class PMLMLizer(object): - def __init__(self, ignore_tables=False): - self.ignore_tables = ignore_tables + def __init__(self, log): + self.log = log def extract_content(self, oeb_book, opts): - oeb_book.logger.info('Converting XHTML to PML markup...') + self.log.info('Converting XHTML to PML markup...') self.oeb_book = oeb_book self.opts = opts return self.pmlmlize_spine() @@ -79,12 +79,14 @@ class PMLMLizer(object): def pmlmlize_spine(self): output = u'' if 'titlepage' in self.oeb_book.guide: + self.log.debug('Generating title page...') href = self.oeb_book.guide['titlepage'].href item = self.oeb_book.manifest.hrefs[href] if item.spine_position is None: stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) output += self.dump_text(item.data.find(XHTML('body')), stylizer) for item in self.oeb_book.spine: + self.log.debug('Converting %s to PML markup...' % item.href) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) output += self.add_page_anchor(item.href) output += self.dump_text(item.data.find(XHTML('body')), stylizer) diff --git a/src/calibre/ebooks/rb/rbml.py b/src/calibre/ebooks/rb/rbml.py index 3563ba2538..bc9248f8b0 100644 --- a/src/calibre/ebooks/rb/rbml.py +++ b/src/calibre/ebooks/rb/rbml.py @@ -52,12 +52,12 @@ STYLES = [ class RBMLizer(object): - def __init__(self, name_map={}, ignore_tables=False): + def __init__(self, log, name_map={}): + self.log = log self.name_map = name_map - self.ignore_tables = ignore_tables def extract_content(self, oeb_book, opts): - oeb_book.logger.info('Converting XHTML to RB markup...') + self.log.info('Converting XHTML to RB markup...') self.oeb_book = oeb_book self.opts = opts return self.mlize_spine() @@ -66,12 +66,14 @@ class RBMLizer(object): def mlize_spine(self): output = u'<HTML><HEAD><TITLE>' if 'titlepage' in self.oeb_book.guide: + self.log.debug('Generating cover page...') href = self.oeb_book.guide['titlepage'].href item = self.oeb_book.manifest.hrefs[href] if item.spine_position is None: stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) output += self.dump_text(item.data.find(XHTML('body')), stylizer) for item in self.oeb_book.spine: + self.log.debug('Converting %s to RocketBook HTML...' % item.href) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) output += self.add_page_anchor(item.href) output += self.dump_text(item.data.find(XHTML('body')), stylizer) diff --git a/src/calibre/ebooks/rb/reader.py b/src/calibre/ebooks/rb/reader.py index ffc7d6e799..f97c3d78c5 100644 --- a/src/calibre/ebooks/rb/reader.py +++ b/src/calibre/ebooks/rb/reader.py @@ -84,9 +84,9 @@ class Reader(object): for size in chunck_sizes: cm_chunck = self.stream.read(size) - output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding) + output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding, 'replace') else: - output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding) + output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding, 'replace') with open(os.path.join(output_dir, toc_item.name), 'wb') as html: html.write(output.encode('utf-8')) @@ -102,14 +102,17 @@ class Reader(object): img.write(data) def extract_content(self, output_dir): + self.log.debug('Extracting content from file...') html = [] images = [] for item in self.toc: if item.name.lower().endswith('html'): + self.log.debug('HTML item %s found...' % item.name) html.append(item.name) self.get_text(item, output_dir) if item.name.lower().endswith('png'): + self.log.debug('PNG item %s found...' % item.name) images.append(item.name) self.get_image(item, output_dir) diff --git a/src/calibre/ebooks/rb/writer.py b/src/calibre/ebooks/rb/writer.py index 4e697f6d36..0d5086a333 100644 --- a/src/calibre/ebooks/rb/writer.py +++ b/src/calibre/ebooks/rb/writer.py @@ -64,6 +64,7 @@ class RBWriter(object): flags = 0 toc_items.append(TocItem(name.ljust(32, '\x00')[:32], size, flags)) + self.log.debug('Writing file header...') out_stream.write(HEADER) out_stream.write(struct.pack('Quit in the context menu of the ' 'system tray.')).exec_() dynamic['systray_msg'] = True - self.hide() + self.hide_windows() e.ignore() else: if self.confirm_quit():