mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix bug 2687: Text encoding bug.
This commit is contained in:
parent
cf42eb60ca
commit
2d39bceb64
@ -30,7 +30,7 @@ class FB2Output(OutputFormatPlugin):
|
||||
|
||||
out_stream.seek(0)
|
||||
out_stream.truncate()
|
||||
out_stream.write(fb2_content.encode('utf-8'))
|
||||
out_stream.write(fb2_content.encode('utf-8', 'replace'))
|
||||
|
||||
if close:
|
||||
out_stream.close()
|
||||
|
@ -8,7 +8,6 @@ import os
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin
|
||||
from calibre.ebooks.pdb.ereader.writer import Writer
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
|
||||
class EREADEROutput(OutputFormatPlugin):
|
||||
|
||||
|
@ -73,9 +73,9 @@ class Reader132(FormatReader):
|
||||
|
||||
def decompress_text(self, number):
|
||||
if self.header_record.version == 2:
|
||||
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||
if self.header_record.version == 10:
|
||||
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||
|
||||
def get_image(self, number):
|
||||
if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
|
||||
|
@ -54,7 +54,7 @@ class Reader202(FormatReader):
|
||||
return self.sections[number]
|
||||
|
||||
def decompress_text(self, number):
|
||||
return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||
|
||||
def get_image(self, number):
|
||||
name = None
|
||||
|
@ -49,7 +49,7 @@ class Reader(FormatReader):
|
||||
if self.header_record.compression == 1:
|
||||
return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
if self.header_record.compression == 2:
|
||||
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||
return ''
|
||||
|
||||
def extract_content(self, output_dir):
|
||||
|
@ -65,7 +65,7 @@ class Reader(FormatReader):
|
||||
def decompress_text(self, number):
|
||||
if number == 1:
|
||||
self.uncompressor = zlib.decompressobj()
|
||||
return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||
|
||||
def extract_content(self, output_dir):
|
||||
txt = ''
|
||||
|
@ -44,7 +44,7 @@ class PMLInput(InputFormatPlugin):
|
||||
|
||||
self.log.debug('Converting PML to HTML...')
|
||||
html = pml_to_html(pml_stream.read().decode(ienc))
|
||||
html_stream.write('<html><head><title /></head><body>' + html.encode('utf-8') + '</body></html>')
|
||||
html_stream.write('<html><head><title /></head><body>' + html.encode('utf-8', 'replace') + '</body></html>')
|
||||
|
||||
if pclose:
|
||||
pml_stream.close()
|
||||
|
@ -84,9 +84,9 @@ class Reader(object):
|
||||
|
||||
for size in chunck_sizes:
|
||||
cm_chunck = self.stream.read(size)
|
||||
output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||
else:
|
||||
output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding)
|
||||
output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||
|
||||
with open(os.path.join(output_dir, toc_item.name), 'wb') as html:
|
||||
html.write(output.encode('utf-8'))
|
||||
|
@ -22,7 +22,7 @@ class TXTInput(InputFormatPlugin):
|
||||
if options.input_encoding:
|
||||
ienc = options.input_encoding
|
||||
log.debug('Reading text from file...')
|
||||
txt = stream.read().decode(ienc)
|
||||
txt = stream.read().decode(ienc, 'replace')
|
||||
|
||||
log.debug('Running text though markdown conversion...')
|
||||
try:
|
||||
|
Loading…
x
Reference in New Issue
Block a user