mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Add preliminary support for extracting FONT records to inspect mobi
This commit is contained in:
parent
1ec08cf8c6
commit
41f168413b
@ -52,7 +52,7 @@ class MOBIInput(InputFormatPlugin):
|
|||||||
mr.extract_content(u'.', parse_cache)
|
mr.extract_content(u'.', parse_cache)
|
||||||
|
|
||||||
if mr.kf8_type is not None:
|
if mr.kf8_type is not None:
|
||||||
log('Found KF8 MOBI of type %s'%mr.kf8_type)
|
log('Found KF8 MOBI of type %r'%mr.kf8_type)
|
||||||
from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader
|
from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader
|
||||||
return os.path.abspath(Mobi8Reader(mr, log)())
|
return os.path.abspath(Mobi8Reader(mr, log)())
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import struct, datetime, sys, os, shutil
|
import struct, datetime, sys, os, shutil, zlib
|
||||||
from collections import OrderedDict, defaultdict
|
from collections import OrderedDict, defaultdict
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
@ -1149,6 +1149,32 @@ class BinaryRecord(object): # {{{
|
|||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
|
class FontRecord(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, idx, record):
|
||||||
|
self.raw = record.raw
|
||||||
|
name = '%06d'%idx
|
||||||
|
(self.uncompressed_size, self.unknown1, self.unknown2) = \
|
||||||
|
struct.unpack_from(b'>LLL', self.raw, 4)
|
||||||
|
self.payload = self.raw[4:]
|
||||||
|
self.ext = 'unknown'
|
||||||
|
if self.unknown1 == 1:
|
||||||
|
self.zlib_header = self.raw[self.unknown2:self.unknown2+2]
|
||||||
|
self.payload = zlib.decompress(self.raw[self.unknown2+2:-4], -15)
|
||||||
|
hdr = self.payload[:4]
|
||||||
|
if hdr in {b'\0\1\0\0', b'true', b'ttcf'}:
|
||||||
|
self.ext = 'ttf'
|
||||||
|
else:
|
||||||
|
print ('Unknown font record with fields: %s' %
|
||||||
|
[self.uncompressed_size, self.unknown1, self.unknown2])
|
||||||
|
self.name = '%s.%s'%(name, self.ext)
|
||||||
|
|
||||||
|
def dump(self, folder):
|
||||||
|
with open(os.path.join(folder, self.name), 'wb') as f:
|
||||||
|
f.write(self.payload)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
class TBSIndexing(object): # {{{
|
class TBSIndexing(object): # {{{
|
||||||
|
|
||||||
def __init__(self, text_records, indices, doc_type):
|
def __init__(self, text_records, indices, doc_type):
|
||||||
@ -1410,6 +1436,7 @@ class MOBIFile(object): # {{{
|
|||||||
self.mobi_header.extra_data_flags, decompress) for r in xrange(1,
|
self.mobi_header.extra_data_flags, decompress) for r in xrange(1,
|
||||||
min(len(self.records), ntr+1))]
|
min(len(self.records), ntr+1))]
|
||||||
self.image_records, self.binary_records = [], []
|
self.image_records, self.binary_records = [], []
|
||||||
|
self.font_records = []
|
||||||
image_index = 0
|
image_index = 0
|
||||||
for i in xrange(fntbr, len(self.records)):
|
for i in xrange(fntbr, len(self.records)):
|
||||||
if i in self.indexing_record_nums or i in self.huffman_record_nums:
|
if i in self.indexing_record_nums or i in self.huffman_record_nums:
|
||||||
@ -1419,13 +1446,15 @@ class MOBIFile(object): # {{{
|
|||||||
fmt = None
|
fmt = None
|
||||||
if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
|
if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
|
||||||
b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
|
b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
|
||||||
b'AUDI', b'VIDE'}:
|
b'AUDI', b'VIDE', b'FONT'}:
|
||||||
try:
|
try:
|
||||||
width, height, fmt = identify_data(r.raw)
|
width, height, fmt = identify_data(r.raw)
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
if fmt is not None:
|
if fmt is not None:
|
||||||
self.image_records.append(ImageRecord(image_index, r, fmt))
|
self.image_records.append(ImageRecord(image_index, r, fmt))
|
||||||
|
elif r.raw[:4] == b'FONT':
|
||||||
|
self.font_records.append(FontRecord(i, r))
|
||||||
else:
|
else:
|
||||||
self.binary_records.append(BinaryRecord(i, r))
|
self.binary_records.append(BinaryRecord(i, r))
|
||||||
|
|
||||||
@ -1465,10 +1494,11 @@ def inspect_mobi(path_or_stream, ddir=None): # {{{
|
|||||||
of.write(rec.raw)
|
of.write(rec.raw)
|
||||||
alltext += rec.raw
|
alltext += rec.raw
|
||||||
of.seek(0)
|
of.seek(0)
|
||||||
root = html.fromstring(alltext.decode('utf-8'))
|
if f.mobi_header.file_version < 8:
|
||||||
with open(os.path.join(ddir, 'pretty.html'), 'wb') as of:
|
root = html.fromstring(alltext.decode('utf-8'))
|
||||||
of.write(html.tostring(root, pretty_print=True, encoding='utf-8',
|
with open(os.path.join(ddir, 'pretty.html'), 'wb') as of:
|
||||||
include_meta_content_type=True))
|
of.write(html.tostring(root, pretty_print=True, encoding='utf-8',
|
||||||
|
include_meta_content_type=True))
|
||||||
|
|
||||||
|
|
||||||
if f.index_header is not None:
|
if f.index_header is not None:
|
||||||
@ -1490,7 +1520,7 @@ def inspect_mobi(path_or_stream, ddir=None): # {{{
|
|||||||
f.tbs_indexing.dump(ddir)
|
f.tbs_indexing.dump(ddir)
|
||||||
|
|
||||||
for tdir, attr in [('text', 'text_records'), ('images', 'image_records'),
|
for tdir, attr in [('text', 'text_records'), ('images', 'image_records'),
|
||||||
('binary', 'binary_records')]:
|
('binary', 'binary_records'), ('font', 'font_records')]:
|
||||||
tdir = os.path.join(ddir, tdir)
|
tdir = os.path.join(ddir, tdir)
|
||||||
os.mkdir(tdir)
|
os.mkdir(tdir)
|
||||||
for rec in getattr(f, attr):
|
for rec in getattr(f, attr):
|
||||||
|
@ -351,7 +351,7 @@ class Mobi8Reader(object):
|
|||||||
fields = struct.unpack_from(b'>LLLL', data, 4)
|
fields = struct.unpack_from(b'>LLLL', data, 4)
|
||||||
except:
|
except:
|
||||||
fields = None
|
fields = None
|
||||||
#self.log.debug('Font record fields: %s'%(fields,))
|
# self.log.debug('Font record fields: %s'%(fields,))
|
||||||
cdata = data[26:-4]
|
cdata = data[26:-4]
|
||||||
ext = 'dat'
|
ext = 'dat'
|
||||||
try:
|
try:
|
||||||
@ -361,11 +361,13 @@ class Mobi8Reader(object):
|
|||||||
'Fields: %s' % (fname_idx, fields,))
|
'Fields: %s' % (fname_idx, fields,))
|
||||||
uncompressed_data = data[4:]
|
uncompressed_data = data[4:]
|
||||||
ext = 'failed'
|
ext = 'failed'
|
||||||
hdr = uncompressed_data[0:4]
|
|
||||||
if len(uncompressed_data) < 200:
|
if len(uncompressed_data) < 200:
|
||||||
self.log.warn('Corrupted font record: %d'%fname_idx)
|
self.log.warn('Failed to uncompress embedded font %d: '
|
||||||
|
'Fields: %s' % (fname_idx, fields,))
|
||||||
|
uncompressed_data = data[4:]
|
||||||
ext = 'failed'
|
ext = 'failed'
|
||||||
if hdr == b'\0\1\0\0' or hdr == b'true' or hdr == b'ttcf':
|
hdr = uncompressed_data[:4]
|
||||||
|
if ext != 'failed' and hdr in {b'\0\1\0\0', b'true', b'ttcf'}:
|
||||||
ext = 'ttf'
|
ext = 'ttf'
|
||||||
href = "fonts/%05d.%s" % (fname_idx, ext)
|
href = "fonts/%05d.%s" % (fname_idx, ext)
|
||||||
with open(href.replace('/', os.sep), 'wb') as f:
|
with open(href.replace('/', os.sep), 'wb') as f:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user