mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
inspect-mobi: Extract resources from KF8 files
This commit is contained in:
parent
5822d6b21b
commit
fc15737d4f
@ -7,9 +7,10 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import sys, os
|
import sys, os, imghdr
|
||||||
|
|
||||||
from calibre.ebooks.mobi.debug.headers import TextRecord
|
from calibre.ebooks.mobi.debug.headers import TextRecord
|
||||||
|
from calibre.ebooks.mobi.utils import read_font_record
|
||||||
|
|
||||||
class MOBIFile(object):
|
class MOBIFile(object):
|
||||||
|
|
||||||
@ -30,6 +31,7 @@ class MOBIFile(object):
|
|||||||
first_text_record+offset+h8.number_of_text_records])]
|
first_text_record+offset+h8.number_of_text_records])]
|
||||||
|
|
||||||
self.raw_text = b''.join(r.raw for r in self.text_records)
|
self.raw_text = b''.join(r.raw for r in self.text_records)
|
||||||
|
self.extract_resources()
|
||||||
|
|
||||||
def print_header(self, f=sys.stdout):
|
def print_header(self, f=sys.stdout):
|
||||||
print (str(self.mf.palmdb).encode('utf-8'), file=f)
|
print (str(self.mf.palmdb).encode('utf-8'), file=f)
|
||||||
@ -41,6 +43,42 @@ class MOBIFile(object):
|
|||||||
print (file=f)
|
print (file=f)
|
||||||
print (str(self.mf.mobi8_header).encode('utf-8'), file=f)
|
print (str(self.mf.mobi8_header).encode('utf-8'), file=f)
|
||||||
|
|
||||||
|
def extract_resources(self):
|
||||||
|
self.resource_map = []
|
||||||
|
known_types = {b'FLIS', b'FCIS', b'SRCS',
|
||||||
|
b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
|
||||||
|
b'AUDI', b'VIDE'}
|
||||||
|
|
||||||
|
for i, rec in enumerate(self.resource_records):
|
||||||
|
sig = rec.raw[:4]
|
||||||
|
payload = rec.raw
|
||||||
|
ext = 'dat'
|
||||||
|
prefix = 'binary'
|
||||||
|
suffix = ''
|
||||||
|
if sig in {b'HUFF', b'CDIC', b'INDX'}: continue
|
||||||
|
# TODO: Ignore CNCX records as well
|
||||||
|
if sig == b'FONT':
|
||||||
|
font = read_font_record(rec.raw)
|
||||||
|
if font['err']:
|
||||||
|
raise ValueError('Failed to read font record: %s Headers: %s'%(
|
||||||
|
font['err'], font['headers']))
|
||||||
|
payload = (font['font_data'] if font['font_data'] else
|
||||||
|
font['raw_data'])
|
||||||
|
prefix, ext = 'fonts', font['ext']
|
||||||
|
elif sig not in known_types:
|
||||||
|
q = imghdr.what(None, rec.raw)
|
||||||
|
if q:
|
||||||
|
prefix, ext = 'images', q
|
||||||
|
|
||||||
|
if prefix == 'binary':
|
||||||
|
if sig == b'\xe9\x8e\r\n':
|
||||||
|
suffix = '-EOF'
|
||||||
|
elif sig in known_types:
|
||||||
|
suffix = '-' + sig.decode('ascii')
|
||||||
|
|
||||||
|
self.resource_map.append(('%s/%06d%s.%s'%(prefix, i, suffix, ext),
|
||||||
|
payload))
|
||||||
|
|
||||||
|
|
||||||
def inspect_mobi(mobi_file, ddir):
|
def inspect_mobi(mobi_file, ddir):
|
||||||
f = MOBIFile(mobi_file)
|
f = MOBIFile(mobi_file)
|
||||||
@ -51,12 +89,14 @@ def inspect_mobi(mobi_file, ddir):
|
|||||||
with open(alltext, 'wb') as of:
|
with open(alltext, 'wb') as of:
|
||||||
of.write(f.raw_text)
|
of.write(f.raw_text)
|
||||||
|
|
||||||
for tdir, attr in [('text_records', 'text_records'), ('images',
|
for x in ('text_records', 'images', 'fonts', 'binary'):
|
||||||
'image_records'), ('binary', 'binary_records'), ('font',
|
os.mkdir(os.path.join(ddir, x))
|
||||||
'font_records')]:
|
|
||||||
tdir = os.path.join(ddir, tdir)
|
for rec in f.text_records:
|
||||||
os.mkdir(tdir)
|
rec.dump(os.path.join(ddir, 'text_records'))
|
||||||
for rec in getattr(f, attr, []):
|
|
||||||
rec.dump(tdir)
|
for href, payload in f.resource_map:
|
||||||
|
with open(os.path.join(ddir, href), 'wb') as f:
|
||||||
|
f.write(payload)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user