From 04dd4e88d819ec2771829195bd255533cb5ae361 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 26 Feb 2008 23:39:28 +0000 Subject: [PATCH] --- src/libprs500/ebooks/mobi/huffcdic.py | 6 +++--- src/libprs500/ebooks/mobi/palmdoc.py | 4 ++-- src/libprs500/ebooks/mobi/reader.py | 11 +++++------ 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/libprs500/ebooks/mobi/huffcdic.py b/src/libprs500/ebooks/mobi/huffcdic.py index 84949e2024..34d8ac672c 100644 --- a/src/libprs500/ebooks/mobi/huffcdic.py +++ b/src/libprs500/ebooks/mobi/huffcdic.py @@ -44,8 +44,8 @@ class BitReader(object): class HuffReader(object): - def __init__(self, huffs, extra_flags, codec='cp1252'): - self.huffs, self.extra_flags, self.codec = huffs, extra_flags, codec + def __init__(self, huffs, extra_flags): + self.huffs, self.extra_flags = huffs, extra_flags if huffs[0][0:4] != 'HUFF' or huffs[0][4:8] != '\x00\x00\x00\x18': raise MobiError('Invalid HUFF header') @@ -124,4 +124,4 @@ class HuffReader(object): r += self.unpack(data[:len(data)-trail_size]) if r.endswith('#'): r = r[:-1] - return r.decode(self.codec) + return r diff --git a/src/libprs500/ebooks/mobi/palmdoc.py b/src/libprs500/ebooks/mobi/palmdoc.py index 2dba998b08..68972cef4e 100644 --- a/src/libprs500/ebooks/mobi/palmdoc.py +++ b/src/libprs500/ebooks/mobi/palmdoc.py @@ -18,7 +18,7 @@ COUNT_BITS = 3 -def decompress_doc(data, codec='cp1252'): +def decompress_doc(data): buffer = [ord(i) for i in data] res = [] i = 0 @@ -42,5 +42,5 @@ def decompress_doc(data, codec='cp1252'): for k in range( num ): res.append(res[j - di+k]) - return unicode(''.join([chr(i) for i in res]), codec) + return ''.join([chr(i) for i in res]) \ No newline at end of file diff --git a/src/libprs500/ebooks/mobi/reader.py b/src/libprs500/ebooks/mobi/reader.py index cb6db14016..80da66e1d8 100644 --- a/src/libprs500/ebooks/mobi/reader.py +++ b/src/libprs500/ebooks/mobi/reader.py @@ -169,6 +169,7 @@ class MobiReader(object): processed_records = self.extract_text() self.add_anchors() + self.processed_html = self.processed_html.decode(self.book_header.codec) self.extract_images(processed_records, output_dir) self.replace_page_breaks() @@ -202,8 +203,7 @@ class MobiReader(object): text_sections = [self.sections[i][0] for i in range(1, self.book_header.records+1)] processed_records = list(range(0, self.book_header.records+1)) - self.mobi_html = u'' - codec = self.book_header.codec + self.mobi_html = '' if self.book_header.compression_type == 'DH': huffs = [self.sections[i][0] for i in @@ -211,16 +211,15 @@ class MobiReader(object): self.book_header.huff_offset+self.book_header.huff_number)] processed_records += list(range(self.book_header.huff_offset, self.book_header.huff_offset+self.book_header.huff_number)) - huff = HuffReader(huffs, self.book_header.extra_flags, codec) + huff = HuffReader(huffs, self.book_header.extra_flags) self.mobi_html = huff.decompress(text_sections) elif self.book_header.compression_type == '\x00\x02': for section in text_sections: - self.mobi_html += decompress_doc(section, codec) + self.mobi_html += decompress_doc(section) elif self.book_header.compression_type == '\x00\x01': - t = [i.decode(codec) for i in text_sections] - self.mobi_html = ''.join(t) + self.mobi_html = ''.join(text_sections) else: raise MobiError('Unknown compression algorithm: %s'%repr(self.book_header.compression_type))