mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix bug 2687: Text encoding bug.
This commit is contained in:
parent
cf42eb60ca
commit
2d39bceb64
@ -30,7 +30,7 @@ class FB2Output(OutputFormatPlugin):
|
|||||||
|
|
||||||
out_stream.seek(0)
|
out_stream.seek(0)
|
||||||
out_stream.truncate()
|
out_stream.truncate()
|
||||||
out_stream.write(fb2_content.encode('utf-8'))
|
out_stream.write(fb2_content.encode('utf-8', 'replace'))
|
||||||
|
|
||||||
if close:
|
if close:
|
||||||
out_stream.close()
|
out_stream.close()
|
||||||
|
@ -8,7 +8,6 @@ import os
|
|||||||
|
|
||||||
from calibre.customize.conversion import OutputFormatPlugin
|
from calibre.customize.conversion import OutputFormatPlugin
|
||||||
from calibre.ebooks.pdb.ereader.writer import Writer
|
from calibre.ebooks.pdb.ereader.writer import Writer
|
||||||
from calibre.ebooks.metadata import authors_to_string
|
|
||||||
|
|
||||||
class EREADEROutput(OutputFormatPlugin):
|
class EREADEROutput(OutputFormatPlugin):
|
||||||
|
|
||||||
|
@ -73,9 +73,9 @@ class Reader132(FormatReader):
|
|||||||
|
|
||||||
def decompress_text(self, number):
|
def decompress_text(self, number):
|
||||||
if self.header_record.version == 2:
|
if self.header_record.version == 2:
|
||||||
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||||
if self.header_record.version == 10:
|
if self.header_record.version == 10:
|
||||||
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||||
|
|
||||||
def get_image(self, number):
|
def get_image(self, number):
|
||||||
if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
|
if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
|
||||||
|
@ -54,7 +54,7 @@ class Reader202(FormatReader):
|
|||||||
return self.sections[number]
|
return self.sections[number]
|
||||||
|
|
||||||
def decompress_text(self, number):
|
def decompress_text(self, number):
|
||||||
return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding)
|
return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||||
|
|
||||||
def get_image(self, number):
|
def get_image(self, number):
|
||||||
name = None
|
name = None
|
||||||
|
@ -49,7 +49,7 @@ class Reader(FormatReader):
|
|||||||
if self.header_record.compression == 1:
|
if self.header_record.compression == 1:
|
||||||
return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding)
|
return self.section_data(number).decode('cp1252' if self.encoding is None else self.encoding)
|
||||||
if self.header_record.compression == 2:
|
if self.header_record.compression == 2:
|
||||||
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def extract_content(self, output_dir):
|
def extract_content(self, output_dir):
|
||||||
|
@ -65,7 +65,7 @@ class Reader(FormatReader):
|
|||||||
def decompress_text(self, number):
|
def decompress_text(self, number):
|
||||||
if number == 1:
|
if number == 1:
|
||||||
self.uncompressor = zlib.decompressobj()
|
self.uncompressor = zlib.decompressobj()
|
||||||
return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding)
|
return self.uncompressor.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||||
|
|
||||||
def extract_content(self, output_dir):
|
def extract_content(self, output_dir):
|
||||||
txt = ''
|
txt = ''
|
||||||
|
@ -44,7 +44,7 @@ class PMLInput(InputFormatPlugin):
|
|||||||
|
|
||||||
self.log.debug('Converting PML to HTML...')
|
self.log.debug('Converting PML to HTML...')
|
||||||
html = pml_to_html(pml_stream.read().decode(ienc))
|
html = pml_to_html(pml_stream.read().decode(ienc))
|
||||||
html_stream.write('<html><head><title /></head><body>' + html.encode('utf-8') + '</body></html>')
|
html_stream.write('<html><head><title /></head><body>' + html.encode('utf-8', 'replace') + '</body></html>')
|
||||||
|
|
||||||
if pclose:
|
if pclose:
|
||||||
pml_stream.close()
|
pml_stream.close()
|
||||||
|
@ -84,9 +84,9 @@ class Reader(object):
|
|||||||
|
|
||||||
for size in chunck_sizes:
|
for size in chunck_sizes:
|
||||||
cm_chunck = self.stream.read(size)
|
cm_chunck = self.stream.read(size)
|
||||||
output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding)
|
output += zlib.decompress(cm_chunck).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||||
else:
|
else:
|
||||||
output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding)
|
output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
|
||||||
|
|
||||||
with open(os.path.join(output_dir, toc_item.name), 'wb') as html:
|
with open(os.path.join(output_dir, toc_item.name), 'wb') as html:
|
||||||
html.write(output.encode('utf-8'))
|
html.write(output.encode('utf-8'))
|
||||||
|
@ -22,7 +22,7 @@ class TXTInput(InputFormatPlugin):
|
|||||||
if options.input_encoding:
|
if options.input_encoding:
|
||||||
ienc = options.input_encoding
|
ienc = options.input_encoding
|
||||||
log.debug('Reading text from file...')
|
log.debug('Reading text from file...')
|
||||||
txt = stream.read().decode(ienc)
|
txt = stream.read().decode(ienc, 'replace')
|
||||||
|
|
||||||
log.debug('Running text though markdown conversion...')
|
log.debug('Running text though markdown conversion...')
|
||||||
try:
|
try:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user