From b66537869866715d173a1fe0978ad1b1b16cc8cb Mon Sep 17 00:00:00 2001 From: John Schember Date: Thu, 4 Jun 2009 06:35:51 -0400 Subject: [PATCH] Output encoding options for some formats. --- src/calibre/ebooks/pdb/output.py | 5 +++++ src/calibre/ebooks/pdb/palmdoc/writer.py | 4 ++-- src/calibre/ebooks/pdb/ztxt/writer.py | 2 +- src/calibre/ebooks/pml/output.py | 11 ++++++++++- src/calibre/ebooks/txt/output.py | 21 +++++++++++++-------- 5 files changed, 31 insertions(+), 12 deletions(-) diff --git a/src/calibre/ebooks/pdb/output.py b/src/calibre/ebooks/pdb/output.py index fb6984e1e2..bf9f05f24b 100644 --- a/src/calibre/ebooks/pdb/output.py +++ b/src/calibre/ebooks/pdb/output.py @@ -22,6 +22,11 @@ class PDBOutput(OutputFormatPlugin): short_switch='f', choices=FORMAT_WRITERS.keys(), help=(_('Format to use inside the pdb container. Choices are:')+\ ' %s' % FORMAT_WRITERS.keys())), + OptionRecommendation(name='output_encoding', recommended_value='cp1252', + level=OptionRecommendation.LOW, + help=_('Specify the character encoding of the output document. ' \ + 'The default is cp1252. Note: This option is not honored by all ' \ + 'formats.')), ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): diff --git a/src/calibre/ebooks/pdb/palmdoc/writer.py b/src/calibre/ebooks/pdb/palmdoc/writer.py index 702b176af9..8eca0db124 100644 --- a/src/calibre/ebooks/pdb/palmdoc/writer.py +++ b/src/calibre/ebooks/pdb/palmdoc/writer.py @@ -34,7 +34,7 @@ class Writer(FormatWriter): self.log.info('Compessing data...') for i in range(0, len(txt_records)): self.log.debug('\tCompressing record %i' % i) - txt_records[i] = compress_doc(txt_records[i].encode('cp1252', 'replace')) + txt_records[i] = compress_doc(txt_records[i]) section_lengths.append(len(txt_records[i])) out_stream.seek(0) @@ -46,7 +46,7 @@ class Writer(FormatWriter): def _generate_text(self, spine): txt_writer = TxtWriter(TxtNewlines('system').newline, self.log) - txt = txt_writer.dump(spine) + txt = txt_writer.dump(spine).encode(self.opts.output_encoding, 'replace') txt_length = len(txt) diff --git a/src/calibre/ebooks/pdb/ztxt/writer.py b/src/calibre/ebooks/pdb/ztxt/writer.py index 608bcc033a..d6bdeefc59 100644 --- a/src/calibre/ebooks/pdb/ztxt/writer.py +++ b/src/calibre/ebooks/pdb/ztxt/writer.py @@ -50,7 +50,7 @@ class Writer(FormatWriter): def _generate_text(self, spine): txt_writer = TxtWriter(TxtNewlines('system').newline, self.log) - txt = txt_writer.dump(spine) + txt = txt_writer.dump(spine).encode(self.opts.output_encoding, 'replace') txt_length = len(txt) diff --git a/src/calibre/ebooks/pml/output.py b/src/calibre/ebooks/pml/output.py index 700407d058..deace8df79 100644 --- a/src/calibre/ebooks/pml/output.py +++ b/src/calibre/ebooks/pml/output.py @@ -15,6 +15,7 @@ except ImportError: import cStringIO from calibre.customize.conversion import OutputFormatPlugin +from calibre.customize.conversion import OptionRecommendation from calibre.ptempfile import TemporaryDirectory from calibre.utils.zipfile import ZipFile from calibre.ebooks.oeb.base import OEB_IMAGES @@ -26,12 +27,20 @@ class PMLOutput(OutputFormatPlugin): author = 'John Schember' file_type = 'pmlz' + options = set([ + OptionRecommendation(name='output_encoding', recommended_value='cp1252', + level=OptionRecommendation.LOW, + help=_('Specify the character encoding of the output document. ' \ + 'The default is cp1252. Note: This option is not honored by all ' \ + 'formats.')), + ]) + def convert(self, oeb_book, output_path, input_plugin, opts, log): with TemporaryDirectory('_pmlz_output') as tdir: pmlmlizer = PMLMLizer(ignore_tables=opts.linearize_tables) content = pmlmlizer.extract_content(oeb_book, opts) with open(os.path.join(tdir, 'index.pml'), 'wb') as out: - out.write(content.encode('utf-8')) + out.write(content.encode(self.opts.output_encoding, 'replace')) self.write_images(oeb_book.manifest, tdir) diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py index 64835c3c52..ffb4d6fee5 100644 --- a/src/calibre/ebooks/txt/output.py +++ b/src/calibre/ebooks/txt/output.py @@ -17,13 +17,18 @@ class TXTOutput(OutputFormatPlugin): file_type = 'txt' options = set([ - OptionRecommendation(name='newline', recommended_value='system', - level=OptionRecommendation.LOW, - short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(), - help=_('Type of newline to use. Options are %s. Default is \'system\'. ' - 'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. ' - 'For Mac OS X use \'unix\'. \'system\' will default to the newline ' - 'type used by this OS.') % sorted(TxtNewlines.NEWLINE_TYPES.keys())), + OptionRecommendation(name='newline', recommended_value='system', + level=OptionRecommendation.LOW, + short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(), + help=_('Type of newline to use. Options are %s. Default is \'system\'. ' + 'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. ' + 'For Mac OS X use \'unix\'. \'system\' will default to the newline ' + 'type used by this OS.') % sorted(TxtNewlines.NEWLINE_TYPES.keys())), + OptionRecommendation(name='output_encoding', recommended_value='utf-8', + level=OptionRecommendation.LOW, + help=_('Specify the character encoding of the output document. ' \ + 'The default is utf-8. Note: This option is not honored by all ' \ + 'formats.')), ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): @@ -41,7 +46,7 @@ class TXTOutput(OutputFormatPlugin): out_stream.seek(0) out_stream.truncate() - out_stream.write(txt.encode('utf-8')) + out_stream.write(txt.encode(self.opts.output_encoding, 'replace')) if close: out_stream.close()