mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Lift "trailing entry" handling out of huff/cdic decompression and to general mobipocket book processing
This commit is contained in:
parent
40be364965
commit
40ef97e9a3
@ -33,7 +33,7 @@ class BitReader(object):
|
|||||||
class HuffReader(object):
|
class HuffReader(object):
|
||||||
|
|
||||||
def __init__(self, huffs, extra_flags):
|
def __init__(self, huffs, extra_flags):
|
||||||
self.huffs, self.extra_flags = huffs, extra_flags
|
self.huffs = huffs
|
||||||
|
|
||||||
if huffs[0][0:4] != 'HUFF' or huffs[0][4:8] != '\x00\x00\x00\x18':
|
if huffs[0][0:4] != 'HUFF' or huffs[0][4:8] != '\x00\x00\x00\x18':
|
||||||
raise MobiError('Invalid HUFF header')
|
raise MobiError('Invalid HUFF header')
|
||||||
@ -84,32 +84,10 @@ class HuffReader(object):
|
|||||||
self._unpack(BitReader(data))
|
self._unpack(BitReader(data))
|
||||||
return self.r
|
return self.r
|
||||||
|
|
||||||
def sizeof_trailing_entries(self, data):
|
|
||||||
|
|
||||||
def sizeof_trailing_entry(ptr, psize):
|
|
||||||
bitpos, result = 0, 0
|
|
||||||
while True:
|
|
||||||
v = ord(ptr[psize-1])
|
|
||||||
result |= (v & 0x7F) << bitpos
|
|
||||||
bitpos += 7
|
|
||||||
psize -= 1
|
|
||||||
if (v & 0x80) != 0 or (bitpos >= 28) or (psize == 0):
|
|
||||||
return result
|
|
||||||
|
|
||||||
num = 0
|
|
||||||
size = len(data)
|
|
||||||
flags = self.extra_flags >> 1
|
|
||||||
while flags:
|
|
||||||
if flags & 1:
|
|
||||||
num += sizeof_trailing_entry(data, size - num)
|
|
||||||
flags >>= 1
|
|
||||||
return num
|
|
||||||
|
|
||||||
def decompress(self, sections):
|
def decompress(self, sections):
|
||||||
r = ''
|
r = ''
|
||||||
for data in sections:
|
for data in sections:
|
||||||
trail_size = self.sizeof_trailing_entries(data)
|
r += self.unpack(data)
|
||||||
r += self.unpack(data[:len(data)-trail_size])
|
|
||||||
if r.endswith('#'):
|
if r.endswith('#'):
|
||||||
r = r[:-1]
|
r = r[:-1]
|
||||||
return r
|
return r
|
||||||
|
@ -89,7 +89,7 @@ class BookHeader(object):
|
|||||||
print '[WARNING] Unknown codepage %d. Assuming cp-1252'%self.codepage
|
print '[WARNING] Unknown codepage %d. Assuming cp-1252'%self.codepage
|
||||||
self.codec = 'cp1252'
|
self.codec = 'cp1252'
|
||||||
|
|
||||||
if ident == 'TEXTREAD' or self.length != 0xE4:
|
if ident == 'TEXTREAD' or self.length < 0xE4 or 0xE8 < self.length:
|
||||||
self.extra_flags = 0
|
self.extra_flags = 0
|
||||||
else:
|
else:
|
||||||
self.extra_flags, = struct.unpack('>L', raw[0xF0:0xF4])
|
self.extra_flags, = struct.unpack('>L', raw[0xF0:0xF4])
|
||||||
@ -234,8 +234,33 @@ class MobiReader(object):
|
|||||||
return opf
|
return opf
|
||||||
|
|
||||||
|
|
||||||
|
def sizeof_trailing_entries(self, data):
|
||||||
|
def sizeof_trailing_entry(ptr, psize):
|
||||||
|
bitpos, result = 0, 0
|
||||||
|
while True:
|
||||||
|
v = ord(ptr[psize-1])
|
||||||
|
result |= (v & 0x7F) << bitpos
|
||||||
|
bitpos += 7
|
||||||
|
psize -= 1
|
||||||
|
if (v & 0x80) != 0 or (bitpos >= 28) or (psize == 0):
|
||||||
|
return result
|
||||||
|
|
||||||
|
num = 0
|
||||||
|
size = len(data)
|
||||||
|
flags = self.book_header.extra_flags >> 1
|
||||||
|
while flags:
|
||||||
|
if flags & 1:
|
||||||
|
num += sizeof_trailing_entry(data, size - num)
|
||||||
|
flags >>= 1
|
||||||
|
return num
|
||||||
|
|
||||||
|
def text_section(self, index):
|
||||||
|
data = self.sections[index][0]
|
||||||
|
trail_size = self.sizeof_trailing_entries(data)
|
||||||
|
return data[:len(data)-trail_size]
|
||||||
|
|
||||||
def extract_text(self):
|
def extract_text(self):
|
||||||
text_sections = [self.sections[i][0] for i in range(1, self.book_header.records+1)]
|
text_sections = [self.text_section(i) for i in range(1, self.book_header.records+1)]
|
||||||
processed_records = list(range(0, self.book_header.records+1))
|
processed_records = list(range(0, self.book_header.records+1))
|
||||||
|
|
||||||
self.mobi_html = ''
|
self.mobi_html = ''
|
||||||
@ -246,7 +271,7 @@ class MobiReader(object):
|
|||||||
self.book_header.huff_offset+self.book_header.huff_number)]
|
self.book_header.huff_offset+self.book_header.huff_number)]
|
||||||
processed_records += list(range(self.book_header.huff_offset,
|
processed_records += list(range(self.book_header.huff_offset,
|
||||||
self.book_header.huff_offset+self.book_header.huff_number))
|
self.book_header.huff_offset+self.book_header.huff_number))
|
||||||
huff = HuffReader(huffs, self.book_header.extra_flags)
|
huff = HuffReader(huffs)
|
||||||
self.mobi_html = huff.decompress(text_sections)
|
self.mobi_html = huff.decompress(text_sections)
|
||||||
|
|
||||||
elif self.book_header.compression_type == '\x00\x02':
|
elif self.book_header.compression_type == '\x00\x02':
|
||||||
|
Loading…
x
Reference in New Issue
Block a user