diff --git a/src/calibre/ebooks/mobi/debug/mobi8.py b/src/calibre/ebooks/mobi/debug/mobi8.py index e4a92ee95c..20fd419e29 100644 --- a/src/calibre/ebooks/mobi/debug/mobi8.py +++ b/src/calibre/ebooks/mobi/debug/mobi8.py @@ -7,9 +7,10 @@ __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import sys, os +import sys, os, imghdr from calibre.ebooks.mobi.debug.headers import TextRecord +from calibre.ebooks.mobi.utils import read_font_record class MOBIFile(object): @@ -30,6 +31,7 @@ class MOBIFile(object): first_text_record+offset+h8.number_of_text_records])] self.raw_text = b''.join(r.raw for r in self.text_records) + self.extract_resources() def print_header(self, f=sys.stdout): print (str(self.mf.palmdb).encode('utf-8'), file=f) @@ -41,6 +43,42 @@ class MOBIFile(object): print (file=f) print (str(self.mf.mobi8_header).encode('utf-8'), file=f) + def extract_resources(self): + self.resource_map = [] + known_types = {b'FLIS', b'FCIS', b'SRCS', + b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP', + b'AUDI', b'VIDE'} + + for i, rec in enumerate(self.resource_records): + sig = rec.raw[:4] + payload = rec.raw + ext = 'dat' + prefix = 'binary' + suffix = '' + if sig in {b'HUFF', b'CDIC', b'INDX'}: continue + # TODO: Ignore CNCX records as well + if sig == b'FONT': + font = read_font_record(rec.raw) + if font['err']: + raise ValueError('Failed to read font record: %s Headers: %s'%( + font['err'], font['headers'])) + payload = (font['font_data'] if font['font_data'] else + font['raw_data']) + prefix, ext = 'fonts', font['ext'] + elif sig not in known_types: + q = imghdr.what(None, rec.raw) + if q: + prefix, ext = 'images', q + + if prefix == 'binary': + if sig == b'\xe9\x8e\r\n': + suffix = '-EOF' + elif sig in known_types: + suffix = '-' + sig.decode('ascii') + + self.resource_map.append(('%s/%06d%s.%s'%(prefix, i, suffix, ext), + payload)) + def inspect_mobi(mobi_file, ddir): f = MOBIFile(mobi_file) @@ -51,12 +89,14 @@ def inspect_mobi(mobi_file, ddir): with open(alltext, 'wb') as of: of.write(f.raw_text) - for tdir, attr in [('text_records', 'text_records'), ('images', - 'image_records'), ('binary', 'binary_records'), ('font', - 'font_records')]: - tdir = os.path.join(ddir, tdir) - os.mkdir(tdir) - for rec in getattr(f, attr, []): - rec.dump(tdir) + for x in ('text_records', 'images', 'fonts', 'binary'): + os.mkdir(os.path.join(ddir, x)) + + for rec in f.text_records: + rec.dump(os.path.join(ddir, 'text_records')) + + for href, payload in f.resource_map: + with open(os.path.join(ddir, href), 'wb') as f: + f.write(payload)