mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
This commit is contained in:
parent
e95a7902e0
commit
04dd4e88d8
@ -44,8 +44,8 @@ class BitReader(object):
|
||||
|
||||
class HuffReader(object):
|
||||
|
||||
def __init__(self, huffs, extra_flags, codec='cp1252'):
|
||||
self.huffs, self.extra_flags, self.codec = huffs, extra_flags, codec
|
||||
def __init__(self, huffs, extra_flags):
|
||||
self.huffs, self.extra_flags = huffs, extra_flags
|
||||
|
||||
if huffs[0][0:4] != 'HUFF' or huffs[0][4:8] != '\x00\x00\x00\x18':
|
||||
raise MobiError('Invalid HUFF header')
|
||||
@ -124,4 +124,4 @@ class HuffReader(object):
|
||||
r += self.unpack(data[:len(data)-trail_size])
|
||||
if r.endswith('#'):
|
||||
r = r[:-1]
|
||||
return r.decode(self.codec)
|
||||
return r
|
||||
|
@ -18,7 +18,7 @@
|
||||
|
||||
COUNT_BITS = 3
|
||||
|
||||
def decompress_doc(data, codec='cp1252'):
|
||||
def decompress_doc(data):
|
||||
buffer = [ord(i) for i in data]
|
||||
res = []
|
||||
i = 0
|
||||
@ -42,5 +42,5 @@ def decompress_doc(data, codec='cp1252'):
|
||||
for k in range( num ):
|
||||
res.append(res[j - di+k])
|
||||
|
||||
return unicode(''.join([chr(i) for i in res]), codec)
|
||||
return ''.join([chr(i) for i in res])
|
||||
|
@ -169,6 +169,7 @@ class MobiReader(object):
|
||||
|
||||
processed_records = self.extract_text()
|
||||
self.add_anchors()
|
||||
self.processed_html = self.processed_html.decode(self.book_header.codec)
|
||||
self.extract_images(processed_records, output_dir)
|
||||
self.replace_page_breaks()
|
||||
|
||||
@ -202,8 +203,7 @@ class MobiReader(object):
|
||||
text_sections = [self.sections[i][0] for i in range(1, self.book_header.records+1)]
|
||||
processed_records = list(range(0, self.book_header.records+1))
|
||||
|
||||
self.mobi_html = u''
|
||||
codec = self.book_header.codec
|
||||
self.mobi_html = ''
|
||||
|
||||
if self.book_header.compression_type == 'DH':
|
||||
huffs = [self.sections[i][0] for i in
|
||||
@ -211,16 +211,15 @@ class MobiReader(object):
|
||||
self.book_header.huff_offset+self.book_header.huff_number)]
|
||||
processed_records += list(range(self.book_header.huff_offset,
|
||||
self.book_header.huff_offset+self.book_header.huff_number))
|
||||
huff = HuffReader(huffs, self.book_header.extra_flags, codec)
|
||||
huff = HuffReader(huffs, self.book_header.extra_flags)
|
||||
self.mobi_html = huff.decompress(text_sections)
|
||||
|
||||
elif self.book_header.compression_type == '\x00\x02':
|
||||
for section in text_sections:
|
||||
self.mobi_html += decompress_doc(section, codec)
|
||||
self.mobi_html += decompress_doc(section)
|
||||
|
||||
elif self.book_header.compression_type == '\x00\x01':
|
||||
t = [i.decode(codec) for i in text_sections]
|
||||
self.mobi_html = ''.join(t)
|
||||
self.mobi_html = ''.join(text_sections)
|
||||
|
||||
else:
|
||||
raise MobiError('Unknown compression algorithm: %s'%repr(self.book_header.compression_type))
|
||||
|
Loading…
x
Reference in New Issue
Block a user