mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Support KF8 files in inspect mobi tool
This commit is contained in:
parent
21f493e561
commit
b8d0c49f99
@ -405,7 +405,7 @@ class MOBIHeader(object): # {{{
|
||||
|
||||
class TagX(object): # {{{
|
||||
|
||||
def __init__(self, raw, control_byte_count):
|
||||
def __init__(self, raw):
|
||||
self.tag = ord(raw[0])
|
||||
self.num_values = ord(raw[1])
|
||||
self.bitmask = ord(raw[2])
|
||||
@ -465,8 +465,7 @@ class SecondaryIndexHeader(object): # {{{
|
||||
num_tagx_entries = len(tag_table) // 4
|
||||
self.tagx_entries = []
|
||||
for i in range(num_tagx_entries):
|
||||
self.tagx_entries.append(TagX(tag_table[i*4:(i+1)*4],
|
||||
self.tagx_control_byte_count))
|
||||
self.tagx_entries.append(TagX(tag_table[i*4:(i+1)*4]))
|
||||
if self.tagx_entries and not self.tagx_entries[-1].is_eof:
|
||||
raise ValueError('TAGX last entry is not EOF')
|
||||
|
||||
@ -569,8 +568,7 @@ class IndexHeader(object): # {{{
|
||||
num_tagx_entries = len(tag_table) // 4
|
||||
self.tagx_entries = []
|
||||
for i in range(num_tagx_entries):
|
||||
self.tagx_entries.append(TagX(tag_table[i*4:(i+1)*4],
|
||||
self.tagx_control_byte_count))
|
||||
self.tagx_entries.append(TagX(tag_table[i*4:(i+1)*4]))
|
||||
if self.tagx_entries and not self.tagx_entries[-1].is_eof:
|
||||
raise ValueError('TAGX last entry is not EOF')
|
||||
|
||||
@ -640,74 +638,29 @@ class Tag(object): # {{{
|
||||
TAG_MAP = {
|
||||
1: ('offset', 'Offset in HTML'),
|
||||
2: ('size', 'Size in HTML'),
|
||||
3: ('label_offset', 'Offset to label in CNCX'),
|
||||
3: ('label_offset', 'Label offset in CNCX'),
|
||||
4: ('depth', 'Depth of this entry in TOC'),
|
||||
5: ('class_offset', 'Class offset in CNCX'),
|
||||
6: ('pos_fid', 'File Index'),
|
||||
|
||||
11: ('secondary', '[unknown, unknown, '
|
||||
'tag type from TAGX in primary index header]'),
|
||||
|
||||
# The remaining tag types have to be interpreted subject to the type
|
||||
# of index entry they are present in
|
||||
21: ('parent_index', 'Parent'),
|
||||
22: ('first_child_index', 'First child'),
|
||||
23: ('last_child_index', 'Last child'),
|
||||
|
||||
69 : ('image_index', 'Offset from first image record to the'
|
||||
' image record associated with this entry'
|
||||
' (masthead for periodical or thumbnail for'
|
||||
' article entry).'),
|
||||
70 : ('desc_offset', 'Description offset in cncx'),
|
||||
71 : ('author_offset', 'Author offset in cncx'),
|
||||
72 : ('image_caption_offset', 'Image caption offset in cncx'),
|
||||
73 : ('image_attr_offset', 'Image attribution offset in cncx'),
|
||||
|
||||
}
|
||||
|
||||
INTERPRET_MAP = {
|
||||
'subchapter': {
|
||||
21 : ('Parent chapter index', 'parent_index'),
|
||||
5 : ('Unknown (koffs)', 'koffs'),
|
||||
},
|
||||
|
||||
'section' : {
|
||||
6 : ('File Index', 'pos_fid'),
|
||||
},
|
||||
|
||||
'section_with_subsections' : {
|
||||
6 : ('File Index', 'pos_fid'),
|
||||
22 : ('First subsection index', 'first_child_index'),
|
||||
23 : ('Last subsection index', 'last_child_index'),
|
||||
|
||||
},
|
||||
'subsection' : {
|
||||
6 : ('File Index', 'pos_fid'),
|
||||
21 : ('Parent section index', 'parent_index'),
|
||||
},
|
||||
|
||||
|
||||
'article' : {
|
||||
5 : ('Class offset in cncx', 'class_offset'),
|
||||
21 : ('Parent section index', 'parent_index'),
|
||||
69 : ('Offset from first image record num to the'
|
||||
' image record associated with this article',
|
||||
'image_index'),
|
||||
70 : ('Description offset in cncx', 'desc_offset'),
|
||||
71 : ('Author offset in cncx', 'author_offset'),
|
||||
72 : ('Image caption offset in cncx',
|
||||
'image_caption_offset'),
|
||||
73 : ('Image attribution offset in cncx',
|
||||
'image_attr_offset'),
|
||||
},
|
||||
|
||||
'chapter_with_subchapters' : {
|
||||
22 : ('First subchapter index', 'first_child_index'),
|
||||
23 : ('Last subchapter index', 'last_child_index'),
|
||||
},
|
||||
|
||||
'periodical' : {
|
||||
5 : ('Class offset in cncx', 'class_offset'),
|
||||
22 : ('First section index', 'first_child_index'),
|
||||
23 : ('Last section index', 'last_child_index'),
|
||||
69 : ('Offset from first image record num to masthead'
|
||||
' record', 'image_index'),
|
||||
},
|
||||
|
||||
'section' : {
|
||||
5 : ('Class offset in cncx', 'class_offset'),
|
||||
21 : ('Periodical index', 'parent_index'),
|
||||
22 : ('First article index', 'first_child_index'),
|
||||
23 : ('Last article index', 'last_child_index'),
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def __init__(self, tagx, vals, entry_type, cncx):
|
||||
self.value = vals if len(vals) > 1 else vals[0] if vals else None
|
||||
self.entry_type = entry_type
|
||||
@ -717,24 +670,15 @@ class Tag(object): # {{{
|
||||
if tag_type in self.TAG_MAP:
|
||||
self.attr, self.desc = self.TAG_MAP[tag_type]
|
||||
else:
|
||||
try:
|
||||
td = self.INTERPRET_MAP[entry_type]
|
||||
except:
|
||||
raise ValueError('Unknown entry type: %s'%entry_type)
|
||||
try:
|
||||
self.desc, self.attr = td[tag_type]
|
||||
except:
|
||||
print ('Unknown tag value: %d in entry type: %s'%(tag_type,
|
||||
entry_type))
|
||||
self.desc = '??Unknown (tag value: %d type: %s)'%(
|
||||
tag_type, entry_type)
|
||||
self.attr = 'unknown'
|
||||
|
||||
if '_offset' in self.attr:
|
||||
self.cncx_value = cncx[self.value]
|
||||
|
||||
if self.attr == 'pos_fid':
|
||||
print (1111111, vals)
|
||||
|
||||
def __str__(self):
|
||||
if self.cncx_value is not None:
|
||||
return '%s : %r [%r]'%(self.desc, self.value, self.cncx_value)
|
||||
@ -746,43 +690,21 @@ class IndexEntry(object): # {{{
|
||||
|
||||
'''
|
||||
The index is made up of entries, each of which is represented by an
|
||||
instance of this class. Index entries typically point to offsets int eh
|
||||
instance of this class. Index entries typically point to offsets in the
|
||||
HTML, specify HTML sizes and point to text strings in the CNCX that are
|
||||
used in the navigation UI.
|
||||
'''
|
||||
|
||||
TYPES = {
|
||||
# Present in secondary index record
|
||||
0x01 : 'null',
|
||||
0x02 : 'publication_meta',
|
||||
# Present in book type files
|
||||
0x0f : 'chapter',
|
||||
0x6f : 'chapter_with_subchapters',
|
||||
0x1f : 'subchapter',
|
||||
0x8f : 'section',
|
||||
0xef : 'section_with_subsections',
|
||||
0x9f : 'subsection',
|
||||
# Present in periodicals
|
||||
0xdf : 'periodical',
|
||||
0xff : 'section',
|
||||
0x3f : 'article',
|
||||
}
|
||||
|
||||
def __init__(self, ident, entry_type, raw, cncx, tagx_entries,
|
||||
control_byte_count):
|
||||
self.index = ident
|
||||
self.raw = raw
|
||||
self.tags = []
|
||||
self.entry_type_raw = entry_type
|
||||
self.entry_type = entry_type
|
||||
self.byte_size = len(raw)
|
||||
|
||||
orig_raw = raw
|
||||
|
||||
try:
|
||||
self.entry_type = self.TYPES[entry_type]
|
||||
except KeyError:
|
||||
raise ValueError('Unknown Index Entry type: %s'%bin(entry_type))
|
||||
|
||||
if control_byte_count not in (1, 2):
|
||||
raise ValueError('Unknown control byte count: %d'%
|
||||
control_byte_count)
|
||||
@ -800,7 +722,7 @@ class IndexEntry(object): # {{{
|
||||
for tag in expected_tags:
|
||||
vals = []
|
||||
|
||||
if tag.tag > 64:
|
||||
if tag.tag > 0b1000000: # 0b1000000 = 64
|
||||
has_tag = flags & 0b1
|
||||
flags = flags >> 1
|
||||
if not has_tag: continue
|
||||
@ -867,10 +789,17 @@ class IndexEntry(object): # {{{
|
||||
return tag.value
|
||||
return -1
|
||||
|
||||
@property
|
||||
def pos_fid(self):
|
||||
for tag in self.tags:
|
||||
if tag.attr == 'pos_fid':
|
||||
return tag.value
|
||||
return [0, 0]
|
||||
|
||||
def __str__(self):
|
||||
ans = ['Index Entry(index=%s, entry_type=%s, flags=%s, '
|
||||
'length=%d, byte_size=%d)'%(
|
||||
self.index, self.entry_type, bin(self.flags)[2:],
|
||||
self.index, bin(self.entry_type), bin(self.flags)[2:],
|
||||
len(self.tags), self.byte_size)]
|
||||
for tag in self.tags:
|
||||
if tag.value is not None:
|
||||
|
Loading…
x
Reference in New Issue
Block a user