Fix bug 2687: Text encoding bug.

This commit is contained in:
John Schember 2009-06-21 11:43:42 -04:00
parent cf42eb60ca
commit 2d39bceb64
9 changed files with 10 additions and 11 deletions

View File

@ -30,7 +30,7 @@ class FB2Output(OutputFormatPlugin):
out_stream.seek(0) out_stream.seek(0)
out_stream.truncate() out_stream.truncate()
out_stream.write(fb2_content.encode('utf-8')) out_stream.write(fb2_content.encode('utf-8', 'replace'))
if close: if close:
out_stream.close() out_stream.close()

View File

@ -8,7 +8,6 @@ import os
from calibre.customize.conversion import OutputFormatPlugin from calibre.customize.conversion import OutputFormatPlugin
from calibre.ebooks.pdb.ereader.writer import Writer from calibre.ebooks.pdb.ereader.writer import Writer
from calibre.ebooks.metadata import authors_to_string
class EREADEROutput(OutputFormatPlugin): class EREADEROutput(OutputFormatPlugin):

View File

@ -73,9 +73,9 @@ class Reader132(FormatReader):
def decompress_text(self, number): def decompress_text(self, number):
if self.header_record.version == 2: if self.header_record.version == 2:
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding) return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
if self.header_record.version == 10: if self.header_record.version == 10:
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding) return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
def get_image(self, number): def get_image(self, number):
if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1: if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:

View File

@ -54,7 +54,7 @@ class Reader202(FormatReader):
return self.sections[number] return self.sections[number]
def decompress_text(self, number): def decompress_text(self, number):
return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding) return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
def get_image(self, number): def get_image(self, number):
name = None name = None

View File

@ -49,7 +49,7 @@ class Reader(FormatReader):
if self.header_record.compression == 1: if self.header_record.compression == 1:
return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding) return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding)
if self.header_record.compression == 2: if self.header_record.compression == 2:
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding) return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
return '' return ''
def extract_content(self, output_dir): def extract_content(self, output_dir):

View File

@ -65,7 +65,7 @@ class Reader(FormatReader):
def decompress_text(self, number): def decompress_text(self, number):
if number == 1: if number == 1:
self.uncompressor = zlib.decompressobj() self.uncompressor = zlib.decompressobj()
return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding) return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
def extract_content(self, output_dir): def extract_content(self, output_dir):
txt = '' txt = ''

View File

@ -44,7 +44,7 @@ class PMLInput(InputFormatPlugin):
self.log.debug('Converting PML to HTML...') self.log.debug('Converting PML to HTML...')
html = pml_to_html(pml_stream.read().decode(ienc)) html = pml_to_html(pml_stream.read().decode(ienc))
html_stream.write('<html><head><title /></head><body>' + html.encode('utf-8') + '</body></html>') html_stream.write('<html><head><title /></head><body>' + html.encode('utf-8', 'replace') + '</body></html>')
if pclose: if pclose:
pml_stream.close() pml_stream.close()

View File

@ -84,9 +84,9 @@ class Reader(object):
for size in chunck_sizes: for size in chunck_sizes:
cm_chunck = self.stream.read(size) cm_chunck = self.stream.read(size)
output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding) output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
else: else:
output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding) output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
with open(os.path.join(output_dir, toc_item.name), 'wb') as html: with open(os.path.join(output_dir, toc_item.name), 'wb') as html:
html.write(output.encode('utf-8')) html.write(output.encode('utf-8'))

View File

@ -22,7 +22,7 @@ class TXTInput(InputFormatPlugin):
if options.input_encoding: if options.input_encoding:
ienc = options.input_encoding ienc = options.input_encoding
log.debug('Reading text from file...') log.debug('Reading text from file...')
txt = stream.read().decode(ienc) txt = stream.read().decode(ienc, 'replace')
log.debug('Running text though markdown conversion...') log.debug('Running text though markdown conversion...')
try: try: