mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
When dumping MOBI files for debugging also dump container records and HD images
This commit is contained in:
parent
f0a890e8c1
commit
63f9cbda0b
66
src/calibre/ebooks/mobi/debug/containers.py
Normal file
66
src/calibre/ebooks/mobi/debug/containers.py
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
from struct import unpack_from
|
||||||
|
|
||||||
|
from calibre.ebooks.mobi.debug.headers import EXTHHeader
|
||||||
|
|
||||||
|
class ContainerHeader(object):
|
||||||
|
|
||||||
|
def __init__(self, data):
|
||||||
|
self.ident = data[:4]
|
||||||
|
self.record_size, self.type, self.count, self.encoding = unpack_from(b'>IHHI', data, 4)
|
||||||
|
self.encoding = {
|
||||||
|
1252 : 'cp1252',
|
||||||
|
65001: 'utf-8',
|
||||||
|
}.get(self.encoding, repr(self.encoding))
|
||||||
|
rest = list(unpack_from(b'>IIIIIIII', data, 16))
|
||||||
|
self.num_of_resource_records = rest[2]
|
||||||
|
self.num_of_non_dummy_resource_records = rest[3]
|
||||||
|
self.offset_to_href_record = rest[4]
|
||||||
|
self.unknowns1 = rest[:2]
|
||||||
|
self.unknowns2 = rest[5]
|
||||||
|
self.header_length = rest[6]
|
||||||
|
self.title_length = rest[7]
|
||||||
|
self.resources = []
|
||||||
|
self.hrefs = []
|
||||||
|
if data[48:52] == b'EXTH':
|
||||||
|
self.exth = EXTHHeader(data[48:])
|
||||||
|
self.title = data[48 + self.exth.length:][:self.title_length].decode(self.encoding)
|
||||||
|
self.is_image_container = self.exth[539] == 'application/image'
|
||||||
|
else:
|
||||||
|
self.exth = ' No EXTH header present '
|
||||||
|
self.title = ''
|
||||||
|
self.is_image_container = False
|
||||||
|
self.bytes_after_exth = data[self.header_length + self.title_length:]
|
||||||
|
self.null_bytes_after_exth = len(self.bytes_after_exth) - len(self.bytes_after_exth.replace(b'\0', b''))
|
||||||
|
|
||||||
|
def add_hrefs(self, data):
|
||||||
|
# kindlegen inserts a trailing | after the last href
|
||||||
|
self.hrefs = filter(None, data.decode('utf-8').split('|'))
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
ans = [('*'*10) + ' Container Header ' + ('*'*10)]
|
||||||
|
a = ans.append
|
||||||
|
a('Record size: %d' % self.record_size)
|
||||||
|
a('Type: %d' % self.type)
|
||||||
|
a('Total number of records in this container: %d' % self.count)
|
||||||
|
a('Encoding: %s' % self.encoding)
|
||||||
|
a('Unknowns1: %s' % self.unknowns1)
|
||||||
|
a('Num of resource records: %d' % self.num_of_resource_records)
|
||||||
|
a('Num of non-dummy resource records: %d' % self.num_of_non_dummy_resource_records)
|
||||||
|
a('Offset to href record: %d' % self.offset_to_href_record)
|
||||||
|
a('Unknowns2: %s' % self.unknowns2)
|
||||||
|
a('Header length: %d' % self.header_length)
|
||||||
|
a('Title Length: %s' % self.title_length)
|
||||||
|
a('hrefs: %s' % self.hrefs)
|
||||||
|
a('Null bytes after EXTH: %d' % self.null_bytes_after_exth)
|
||||||
|
if len(self.bytes_after_exth) != self.null_bytes_after_exth:
|
||||||
|
a('Non-null bytes present after EXTH header!!!!')
|
||||||
|
return '\n'.join(ans) + '\n\n' + str(self.exth) + '\n\n' + ('Title: %s' % self.title)
|
||||||
|
|
@ -12,6 +12,7 @@ import sys, os, struct, textwrap
|
|||||||
from itertools import izip
|
from itertools import izip
|
||||||
|
|
||||||
from calibre import CurrentDir
|
from calibre import CurrentDir
|
||||||
|
from calibre.ebooks.mobi.debug.containers import ContainerHeader
|
||||||
from calibre.ebooks.mobi.debug.headers import TextRecord
|
from calibre.ebooks.mobi.debug.headers import TextRecord
|
||||||
from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex,
|
from calibre.ebooks.mobi.debug.index import (SKELIndex, SECTIndex, NCXIndex,
|
||||||
GuideIndex)
|
GuideIndex)
|
||||||
@ -152,9 +153,11 @@ class MOBIFile(object):
|
|||||||
|
|
||||||
def extract_resources(self, records):
|
def extract_resources(self, records):
|
||||||
self.resource_map = []
|
self.resource_map = []
|
||||||
|
self.containers = []
|
||||||
known_types = {b'FLIS', b'FCIS', b'SRCS',
|
known_types = {b'FLIS', b'FCIS', b'SRCS',
|
||||||
b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
|
b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
|
||||||
b'AUDI', b'VIDE', b'CRES', b'CONT', b'CMET'}
|
b'AUDI', b'VIDE', b'CRES', b'CONT', b'CMET'}
|
||||||
|
container = None
|
||||||
|
|
||||||
for i, rec in enumerate(records):
|
for i, rec in enumerate(records):
|
||||||
for (l, r, offset) in self.resource_ranges:
|
for (l, r, offset) in self.resource_ranges:
|
||||||
@ -181,7 +184,27 @@ class MOBIFile(object):
|
|||||||
payload = (font['font_data'] if font['font_data'] else
|
payload = (font['font_data'] if font['font_data'] else
|
||||||
font['raw_data'])
|
font['raw_data'])
|
||||||
prefix, ext = 'fonts', font['ext']
|
prefix, ext = 'fonts', font['ext']
|
||||||
|
elif sig == b'CONT':
|
||||||
|
if payload == b'CONTBOUNDARY':
|
||||||
|
self.containers.append(container)
|
||||||
|
container = None
|
||||||
|
continue
|
||||||
|
container = ContainerHeader(payload)
|
||||||
|
elif sig == b'CRES':
|
||||||
|
container.resources.append(payload)
|
||||||
|
if container.is_image_container:
|
||||||
|
payload = payload[12:]
|
||||||
|
q = what(None, payload)
|
||||||
|
if q:
|
||||||
|
prefix, ext = 'hd-images', q
|
||||||
|
resource_index = len(container.resources)
|
||||||
|
elif sig == b'\xa0\xa0\xa0\xa0' and len(payload) == 4:
|
||||||
|
container.resources.append(None)
|
||||||
|
continue
|
||||||
elif sig not in known_types:
|
elif sig not in known_types:
|
||||||
|
if container is not None and len(container.resources) == container.num_of_resource_records:
|
||||||
|
container.add_hrefs(payload)
|
||||||
|
continue
|
||||||
q = what(None, rec.raw)
|
q = what(None, rec.raw)
|
||||||
if q:
|
if q:
|
||||||
prefix, ext = 'images', q
|
prefix, ext = 'images', q
|
||||||
@ -274,7 +297,7 @@ def inspect_mobi(mobi_file, ddir):
|
|||||||
with open(alltext, 'wb') as of:
|
with open(alltext, 'wb') as of:
|
||||||
of.write(f.raw_text)
|
of.write(f.raw_text)
|
||||||
|
|
||||||
for x in ('text_records', 'images', 'fonts', 'binary', 'files', 'flows'):
|
for x in ('text_records', 'images', 'fonts', 'binary', 'files', 'flows', 'hd-images',):
|
||||||
os.mkdir(os.path.join(ddir, x))
|
os.mkdir(os.path.join(ddir, x))
|
||||||
|
|
||||||
for rec in f.text_records:
|
for rec in f.text_records:
|
||||||
@ -284,6 +307,10 @@ def inspect_mobi(mobi_file, ddir):
|
|||||||
with open(os.path.join(ddir, href), 'wb') as fo:
|
with open(os.path.join(ddir, href), 'wb') as fo:
|
||||||
fo.write(payload)
|
fo.write(payload)
|
||||||
|
|
||||||
|
for i, container in enumerate(f.containers):
|
||||||
|
with open(os.path.join(ddir, 'container%d.txt' % (i + 1)), 'wb') as cf:
|
||||||
|
cf.write(str(container).encode('utf-8'))
|
||||||
|
|
||||||
if f.fdst:
|
if f.fdst:
|
||||||
with open(os.path.join(ddir, 'fdst.record'), 'wb') as fo:
|
with open(os.path.join(ddir, 'fdst.record'), 'wb') as fo:
|
||||||
fo.write(str(f.fdst).encode('utf-8'))
|
fo.write(str(f.fdst).encode('utf-8'))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user