mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
...
This commit is contained in:
parent
e313a72ec1
commit
823cacf811
@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
|
|||||||
import struct, datetime, sys, os
|
import struct, datetime, sys, os
|
||||||
from calibre.utils.date import utc_tz
|
from calibre.utils.date import utc_tz
|
||||||
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
||||||
|
from calibre.ebooks.mobi.writer2.utils import decode_hex_number
|
||||||
|
|
||||||
# PalmDB {{{
|
# PalmDB {{{
|
||||||
class PalmDOCAttributes(object):
|
class PalmDOCAttributes(object):
|
||||||
@ -382,7 +383,7 @@ class TagX(object): # {{{
|
|||||||
self.num_values, self.bitmask, self.bmask, self.eof)
|
self.num_values, self.bitmask, self.bmask, self.eof)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class PrimaryIndexRecord(object): # {{{
|
class IndexHeader(object): # {{{
|
||||||
|
|
||||||
def __init__(self, record):
|
def __init__(self, record):
|
||||||
self.record = record
|
self.record = record
|
||||||
@ -437,9 +438,8 @@ class PrimaryIndexRecord(object): # {{{
|
|||||||
raise ValueError('TAGX last entry is not EOF')
|
raise ValueError('TAGX last entry is not EOF')
|
||||||
|
|
||||||
idxt0_pos = self.header_length+self.tagx_header_length
|
idxt0_pos = self.header_length+self.tagx_header_length
|
||||||
last_name_len, = struct.unpack(b'>B', raw[idxt0_pos])
|
last_num, consumed = decode_hex_number(raw[idxt0_pos:])
|
||||||
count_pos = idxt0_pos+1+last_name_len
|
count_pos = idxt0_pos + consumed
|
||||||
last_num = int(raw[idxt0_pos+1:count_pos], 16)
|
|
||||||
self.ncx_count, = struct.unpack(b'>H', raw[count_pos:count_pos+2])
|
self.ncx_count, = struct.unpack(b'>H', raw[count_pos:count_pos+2])
|
||||||
|
|
||||||
if last_num != self.ncx_count - 1:
|
if last_num != self.ncx_count - 1:
|
||||||
@ -457,9 +457,12 @@ class PrimaryIndexRecord(object): # {{{
|
|||||||
def __str__(self):
|
def __str__(self):
|
||||||
ans = ['*'*20 + ' Index Header '+ '*'*20]
|
ans = ['*'*20 + ' Index Header '+ '*'*20]
|
||||||
a = ans.append
|
a = ans.append
|
||||||
|
def u(w):
|
||||||
|
a('Unknown: %r (%d bytes) (All zeros: %r)'%(w,
|
||||||
|
len(w), not bool(w.replace(b'\0', b'')) ))
|
||||||
|
|
||||||
a('Header length: %d'%self.header_length)
|
a('Header length: %d'%self.header_length)
|
||||||
a('Unknown1: %r (%d bytes) (All zeros: %r)'%(self.unknown1,
|
u(self.unknown1)
|
||||||
len(self.unknown1), not bool(self.unknown1.replace(b'\0', '')) ))
|
|
||||||
a('Index Type: %s (%d)'%(self.index_type_desc, self.index_type))
|
a('Index Type: %s (%d)'%(self.index_type_desc, self.index_type))
|
||||||
a('Offset to IDXT start: %d'%self.idxt_start)
|
a('Offset to IDXT start: %d'%self.idxt_start)
|
||||||
a('Number of index records: %d'%self.index_count)
|
a('Number of index records: %d'%self.index_count)
|
||||||
@ -472,11 +475,9 @@ class PrimaryIndexRecord(object): # {{{
|
|||||||
a('LIGT start: %d'%self.ligt_start)
|
a('LIGT start: %d'%self.ligt_start)
|
||||||
a('Number of LIGT entries: %d'%self.num_of_ligt_entries)
|
a('Number of LIGT entries: %d'%self.num_of_ligt_entries)
|
||||||
a('Number of CTOC blocks: %d'%self.num_of_ctoc_blocks)
|
a('Number of CTOC blocks: %d'%self.num_of_ctoc_blocks)
|
||||||
a('Unknown2: %r (%d bytes) (All zeros: %r)'%(self.unknown2,
|
u(self.unknown2)
|
||||||
len(self.unknown2), not bool(self.unknown2.replace(b'\0', '')) ))
|
|
||||||
a('TAGX offset: %d'%self.tagx_offset)
|
a('TAGX offset: %d'%self.tagx_offset)
|
||||||
a('Unknown3: %r (%d bytes) (All zeros: %r)'%(self.unknown3,
|
u(self.unknown3)
|
||||||
len(self.unknown3), not bool(self.unknown3.replace(b'\0', '')) ))
|
|
||||||
a('\n\n')
|
a('\n\n')
|
||||||
a('*'*20 + ' TAGX Header (%d bytes)'%self.tagx_header_length+ '*'*20)
|
a('*'*20 + ' TAGX Header (%d bytes)'%self.tagx_header_length+ '*'*20)
|
||||||
a('Header length: %d'%self.tagx_header_length)
|
a('Header length: %d'%self.tagx_header_length)
|
||||||
@ -488,6 +489,71 @@ class PrimaryIndexRecord(object): # {{{
|
|||||||
return '\n'.join(ans)
|
return '\n'.join(ans)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
class IndexEntry(object):
|
||||||
|
|
||||||
|
def __init__(self, ident, entry_type, raw):
|
||||||
|
self.id = ident
|
||||||
|
self.entry_type = entry_type
|
||||||
|
|
||||||
|
class IndexRecord(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, record):
|
||||||
|
self.record = record
|
||||||
|
raw = self.record.raw
|
||||||
|
if raw[:4] != b'INDX':
|
||||||
|
raise ValueError('Invalid Primary Index Record')
|
||||||
|
|
||||||
|
u = struct.unpack
|
||||||
|
|
||||||
|
self.header_length, = u('>I', raw[4:8])
|
||||||
|
self.unknown1 = raw[8:12]
|
||||||
|
self.header_type, = u('>I', raw[12:16])
|
||||||
|
self.unknown2 = raw[16:20]
|
||||||
|
self.idxt_offset, self.idxt_count = u(b'>II', raw[20:28])
|
||||||
|
if self.idxt_offset < 192:
|
||||||
|
raise ValueError('Unknown Index record structure')
|
||||||
|
self.unknown3 = raw[28:36]
|
||||||
|
self.unknown4 = raw[36:192] # Should be 156 bytes
|
||||||
|
|
||||||
|
self.index_offsets = []
|
||||||
|
indices = raw[self.idxt_offset:]
|
||||||
|
if indices[:4] != b'IDXT':
|
||||||
|
raise ValueError("Invalid IDXT index table")
|
||||||
|
indices = indices[4:]
|
||||||
|
for i in range(self.idxt_count):
|
||||||
|
off, = u(b'>H', indices[i*2:(i+1)*2])
|
||||||
|
self.index_offsets.append(off-192)
|
||||||
|
|
||||||
|
indxt = raw[192:self.idxt_offset]
|
||||||
|
self.indices = []
|
||||||
|
for off in self.index_offsets:
|
||||||
|
index = indxt[off:]
|
||||||
|
ident, consumed = decode_hex_number(index)
|
||||||
|
index = index[consumed:]
|
||||||
|
entry_type = u(b'>B', index[0])
|
||||||
|
self.indices.append(IndexEntry(ident, entry_type, index[1:]))
|
||||||
|
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
ans = ['*'*20 + ' Index Record (%d bytes)'%len(self.record.raw)+ '*'*20]
|
||||||
|
a = ans.append
|
||||||
|
def u(w):
|
||||||
|
a('Unknown: %r (%d bytes) (All zeros: %r)'%(w,
|
||||||
|
len(w), not bool(w.replace(b'\0', b'')) ))
|
||||||
|
a('Header length: %d'%self.header_length)
|
||||||
|
u(self.unknown1)
|
||||||
|
a('Header Type: %d'%self.header_type)
|
||||||
|
u(self.unknown2)
|
||||||
|
a('IDXT Offset: %d'%self.idxt_offset)
|
||||||
|
a('IDXT Count: %d'%self.idxt_count)
|
||||||
|
u(self.unknown3)
|
||||||
|
u(self.unknown4)
|
||||||
|
a('Index offsets: %r'%self.index_offsets)
|
||||||
|
|
||||||
|
return '\n'.join(ans)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
class MOBIFile(object): # {{{
|
class MOBIFile(object): # {{{
|
||||||
|
|
||||||
def __init__(self, stream):
|
def __init__(self, stream):
|
||||||
@ -516,10 +582,11 @@ class MOBIFile(object): # {{{
|
|||||||
|
|
||||||
self.mobi_header = MOBIHeader(self.records[0])
|
self.mobi_header = MOBIHeader(self.records[0])
|
||||||
|
|
||||||
self.primary_index_record = None
|
self.index_header = None
|
||||||
pir = self.mobi_header.primary_index_record
|
pir = self.mobi_header.primary_index_record
|
||||||
if pir != 0xffffffff:
|
if pir != 0xffffffff:
|
||||||
self.primary_index_record = PrimaryIndexRecord(self.records[pir])
|
self.index_header = IndexHeader(self.records[pir])
|
||||||
|
self.index_record = IndexRecord(self.records[pir+1])
|
||||||
|
|
||||||
|
|
||||||
def print_header(self, f=sys.stdout):
|
def print_header(self, f=sys.stdout):
|
||||||
@ -542,9 +609,12 @@ def inspect_mobi(path_or_stream):
|
|||||||
os.mkdir(ddir)
|
os.mkdir(ddir)
|
||||||
with open(os.path.join(ddir, 'header.txt'), 'wb') as out:
|
with open(os.path.join(ddir, 'header.txt'), 'wb') as out:
|
||||||
f.print_header(f=out)
|
f.print_header(f=out)
|
||||||
if f.primary_index_record is not None:
|
if f.index_header is not None:
|
||||||
with open(os.path.join(ddir, 'primary_index_record.txt'), 'wb') as out:
|
with open(os.path.join(ddir, 'index.txt'), 'wb') as out:
|
||||||
print(str(f.primary_index_record), file=out)
|
print(str(f.index_header), file=out)
|
||||||
|
print('\n\n', file=out)
|
||||||
|
print(str(f.index_record), file=out)
|
||||||
|
|
||||||
print ('Debug data saved to:', ddir)
|
print ('Debug data saved to:', ddir)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -15,10 +15,11 @@ from calibre.ebooks import normalize
|
|||||||
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
|
from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
|
||||||
from calibre.ebooks.mobi.writer2.serializer import Serializer
|
from calibre.ebooks.mobi.writer2.serializer import Serializer
|
||||||
from calibre.ebooks.compression.palmdoc import compress_doc
|
from calibre.ebooks.compression.palmdoc import compress_doc
|
||||||
from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
|
|
||||||
from calibre.ebooks.mobi.langcodes import iana2mobi
|
from calibre.ebooks.mobi.langcodes import iana2mobi
|
||||||
from calibre.utils.filenames import ascii_filename
|
from calibre.utils.filenames import ascii_filename
|
||||||
from calibre.ebooks.mobi.writer2 import PALMDOC, UNCOMPRESSED
|
from calibre.ebooks.mobi.writer2 import PALMDOC, UNCOMPRESSED
|
||||||
|
from calibre.ebooks.mobi.writer2.utils import (rescale_image, decint,
|
||||||
|
DECINT_FORWARD, DECINT_BACKWARD)
|
||||||
|
|
||||||
EXTH_CODES = {
|
EXTH_CODES = {
|
||||||
'creator': 100,
|
'creator': 100,
|
||||||
@ -41,87 +42,10 @@ WRITE_UNCROSSABLE_BREAKS = False
|
|||||||
|
|
||||||
RECORD_SIZE = 0x1000 # 4096
|
RECORD_SIZE = 0x1000 # 4096
|
||||||
|
|
||||||
IMAGE_MAX_SIZE = 10 * 1024 * 1024
|
|
||||||
MAX_THUMB_SIZE = 16 * 1024
|
MAX_THUMB_SIZE = 16 * 1024
|
||||||
MAX_THUMB_DIMEN = (180, 240)
|
MAX_THUMB_DIMEN = (180, 240)
|
||||||
|
|
||||||
# Almost like the one for MS LIT, but not quite.
|
|
||||||
DECINT_FORWARD = 0
|
|
||||||
DECINT_BACKWARD = 1
|
|
||||||
|
|
||||||
def decint(value, direction):
|
|
||||||
'''
|
|
||||||
Some parts of the Mobipocket format encode data as variable-width integers.
|
|
||||||
These integers are represented big-endian with 7 bits per byte in bits 1-7.
|
|
||||||
They may be either forward-encoded, in which case only the LSB has bit 8 set,
|
|
||||||
or backward-encoded, in which case only the MSB has bit 8 set.
|
|
||||||
For example, the number 0x11111 would be represented forward-encoded as:
|
|
||||||
|
|
||||||
0x04 0x22 0x91
|
|
||||||
|
|
||||||
And backward-encoded as:
|
|
||||||
|
|
||||||
0x84 0x22 0x11
|
|
||||||
|
|
||||||
This function encodes the integer ``value`` as a variable width integer and
|
|
||||||
returns the bytestring corresponding to it.
|
|
||||||
'''
|
|
||||||
# Encode vwi
|
|
||||||
byts = bytearray()
|
|
||||||
while True:
|
|
||||||
b = value & 0x7f
|
|
||||||
value >>= 7
|
|
||||||
byts.append(b)
|
|
||||||
if value == 0:
|
|
||||||
break
|
|
||||||
if direction == DECINT_FORWARD:
|
|
||||||
byts[0] |= 0x80
|
|
||||||
elif direction == DECINT_BACKWARD:
|
|
||||||
byts[-1] |= 0x80
|
|
||||||
return bytes(byts)
|
|
||||||
|
|
||||||
def rescale_image(data, maxsizeb=IMAGE_MAX_SIZE, dimen=None):
|
|
||||||
'''
|
|
||||||
Convert image setting all transparent pixels to white and changing format
|
|
||||||
to JPEG. Ensure the resultant image has a byte size less than
|
|
||||||
maxsizeb.
|
|
||||||
|
|
||||||
If dimen is not None, generate a thumbnail of width=dimen, height=dimen
|
|
||||||
|
|
||||||
Returns the image as a bytestring
|
|
||||||
'''
|
|
||||||
if dimen is not None:
|
|
||||||
data = thumbnail(data, width=dimen, height=dimen,
|
|
||||||
compression_quality=90)[-1]
|
|
||||||
else:
|
|
||||||
# Replace transparent pixels with white pixels and convert to JPEG
|
|
||||||
data = save_cover_data_to(data, 'img.jpg', return_data=True)
|
|
||||||
if len(data) <= maxsizeb:
|
|
||||||
return data
|
|
||||||
orig_data = data
|
|
||||||
img = Image()
|
|
||||||
quality = 95
|
|
||||||
|
|
||||||
img.load(data)
|
|
||||||
while len(data) >= maxsizeb and quality >= 10:
|
|
||||||
quality -= 5
|
|
||||||
img.set_compression_quality(quality)
|
|
||||||
data = img.export('jpg')
|
|
||||||
if len(data) <= maxsizeb:
|
|
||||||
return data
|
|
||||||
orig_data = data
|
|
||||||
|
|
||||||
scale = 0.9
|
|
||||||
while len(data) >= maxsizeb and scale >= 0.05:
|
|
||||||
img = Image()
|
|
||||||
img.load(orig_data)
|
|
||||||
w, h = img.size
|
|
||||||
img.size = (int(scale*w), int(scale*h))
|
|
||||||
img.set_compression_quality(quality)
|
|
||||||
data = img.export('jpg')
|
|
||||||
scale -= 0.05
|
|
||||||
return data
|
|
||||||
|
|
||||||
class MobiWriter(object):
|
class MobiWriter(object):
|
||||||
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user