mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
This commit is contained in:
parent
e95a7902e0
commit
04dd4e88d8
@ -44,8 +44,8 @@ class BitReader(object):
|
|||||||
|
|
||||||
class HuffReader(object):
|
class HuffReader(object):
|
||||||
|
|
||||||
def __init__(self, huffs, extra_flags, codec='cp1252'):
|
def __init__(self, huffs, extra_flags):
|
||||||
self.huffs, self.extra_flags, self.codec = huffs, extra_flags, codec
|
self.huffs, self.extra_flags = huffs, extra_flags
|
||||||
|
|
||||||
if huffs[0][0:4] != 'HUFF' or huffs[0][4:8] != '\x00\x00\x00\x18':
|
if huffs[0][0:4] != 'HUFF' or huffs[0][4:8] != '\x00\x00\x00\x18':
|
||||||
raise MobiError('Invalid HUFF header')
|
raise MobiError('Invalid HUFF header')
|
||||||
@ -124,4 +124,4 @@ class HuffReader(object):
|
|||||||
r += self.unpack(data[:len(data)-trail_size])
|
r += self.unpack(data[:len(data)-trail_size])
|
||||||
if r.endswith('#'):
|
if r.endswith('#'):
|
||||||
r = r[:-1]
|
r = r[:-1]
|
||||||
return r.decode(self.codec)
|
return r
|
||||||
|
@ -18,7 +18,7 @@
|
|||||||
|
|
||||||
COUNT_BITS = 3
|
COUNT_BITS = 3
|
||||||
|
|
||||||
def decompress_doc(data, codec='cp1252'):
|
def decompress_doc(data):
|
||||||
buffer = [ord(i) for i in data]
|
buffer = [ord(i) for i in data]
|
||||||
res = []
|
res = []
|
||||||
i = 0
|
i = 0
|
||||||
@ -42,5 +42,5 @@ def decompress_doc(data, codec='cp1252'):
|
|||||||
for k in range( num ):
|
for k in range( num ):
|
||||||
res.append(res[j - di+k])
|
res.append(res[j - di+k])
|
||||||
|
|
||||||
return unicode(''.join([chr(i) for i in res]), codec)
|
return ''.join([chr(i) for i in res])
|
||||||
|
|
@ -169,6 +169,7 @@ class MobiReader(object):
|
|||||||
|
|
||||||
processed_records = self.extract_text()
|
processed_records = self.extract_text()
|
||||||
self.add_anchors()
|
self.add_anchors()
|
||||||
|
self.processed_html = self.processed_html.decode(self.book_header.codec)
|
||||||
self.extract_images(processed_records, output_dir)
|
self.extract_images(processed_records, output_dir)
|
||||||
self.replace_page_breaks()
|
self.replace_page_breaks()
|
||||||
|
|
||||||
@ -202,8 +203,7 @@ class MobiReader(object):
|
|||||||
text_sections = [self.sections[i][0] for i in range(1, self.book_header.records+1)]
|
text_sections = [self.sections[i][0] for i in range(1, self.book_header.records+1)]
|
||||||
processed_records = list(range(0, self.book_header.records+1))
|
processed_records = list(range(0, self.book_header.records+1))
|
||||||
|
|
||||||
self.mobi_html = u''
|
self.mobi_html = ''
|
||||||
codec = self.book_header.codec
|
|
||||||
|
|
||||||
if self.book_header.compression_type == 'DH':
|
if self.book_header.compression_type == 'DH':
|
||||||
huffs = [self.sections[i][0] for i in
|
huffs = [self.sections[i][0] for i in
|
||||||
@ -211,16 +211,15 @@ class MobiReader(object):
|
|||||||
self.book_header.huff_offset+self.book_header.huff_number)]
|
self.book_header.huff_offset+self.book_header.huff_number)]
|
||||||
processed_records += list(range(self.book_header.huff_offset,
|
processed_records += list(range(self.book_header.huff_offset,
|
||||||
self.book_header.huff_offset+self.book_header.huff_number))
|
self.book_header.huff_offset+self.book_header.huff_number))
|
||||||
huff = HuffReader(huffs, self.book_header.extra_flags, codec)
|
huff = HuffReader(huffs, self.book_header.extra_flags)
|
||||||
self.mobi_html = huff.decompress(text_sections)
|
self.mobi_html = huff.decompress(text_sections)
|
||||||
|
|
||||||
elif self.book_header.compression_type == '\x00\x02':
|
elif self.book_header.compression_type == '\x00\x02':
|
||||||
for section in text_sections:
|
for section in text_sections:
|
||||||
self.mobi_html += decompress_doc(section, codec)
|
self.mobi_html += decompress_doc(section)
|
||||||
|
|
||||||
elif self.book_header.compression_type == '\x00\x01':
|
elif self.book_header.compression_type == '\x00\x01':
|
||||||
t = [i.decode(codec) for i in text_sections]
|
self.mobi_html = ''.join(text_sections)
|
||||||
self.mobi_html = ''.join(t)
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise MobiError('Unknown compression algorithm: %s'%repr(self.book_header.compression_type))
|
raise MobiError('Unknown compression algorithm: %s'%repr(self.book_header.compression_type))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user