mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Support KF8 files in inspect mobi tool
This commit is contained in:
parent
21f493e561
commit
b8d0c49f99
@ -405,7 +405,7 @@ class MOBIHeader(object): # {{{
|
|||||||
|
|
||||||
class TagX(object): # {{{
|
class TagX(object): # {{{
|
||||||
|
|
||||||
def __init__(self, raw, control_byte_count):
|
def __init__(self, raw):
|
||||||
self.tag = ord(raw[0])
|
self.tag = ord(raw[0])
|
||||||
self.num_values = ord(raw[1])
|
self.num_values = ord(raw[1])
|
||||||
self.bitmask = ord(raw[2])
|
self.bitmask = ord(raw[2])
|
||||||
@ -465,8 +465,7 @@ class SecondaryIndexHeader(object): # {{{
|
|||||||
num_tagx_entries = len(tag_table) // 4
|
num_tagx_entries = len(tag_table) // 4
|
||||||
self.tagx_entries = []
|
self.tagx_entries = []
|
||||||
for i in range(num_tagx_entries):
|
for i in range(num_tagx_entries):
|
||||||
self.tagx_entries.append(TagX(tag_table[i*4:(i+1)*4],
|
self.tagx_entries.append(TagX(tag_table[i*4:(i+1)*4]))
|
||||||
self.tagx_control_byte_count))
|
|
||||||
if self.tagx_entries and not self.tagx_entries[-1].is_eof:
|
if self.tagx_entries and not self.tagx_entries[-1].is_eof:
|
||||||
raise ValueError('TAGX last entry is not EOF')
|
raise ValueError('TAGX last entry is not EOF')
|
||||||
|
|
||||||
@ -569,8 +568,7 @@ class IndexHeader(object): # {{{
|
|||||||
num_tagx_entries = len(tag_table) // 4
|
num_tagx_entries = len(tag_table) // 4
|
||||||
self.tagx_entries = []
|
self.tagx_entries = []
|
||||||
for i in range(num_tagx_entries):
|
for i in range(num_tagx_entries):
|
||||||
self.tagx_entries.append(TagX(tag_table[i*4:(i+1)*4],
|
self.tagx_entries.append(TagX(tag_table[i*4:(i+1)*4]))
|
||||||
self.tagx_control_byte_count))
|
|
||||||
if self.tagx_entries and not self.tagx_entries[-1].is_eof:
|
if self.tagx_entries and not self.tagx_entries[-1].is_eof:
|
||||||
raise ValueError('TAGX last entry is not EOF')
|
raise ValueError('TAGX last entry is not EOF')
|
||||||
|
|
||||||
@ -640,74 +638,29 @@ class Tag(object): # {{{
|
|||||||
TAG_MAP = {
|
TAG_MAP = {
|
||||||
1: ('offset', 'Offset in HTML'),
|
1: ('offset', 'Offset in HTML'),
|
||||||
2: ('size', 'Size in HTML'),
|
2: ('size', 'Size in HTML'),
|
||||||
3: ('label_offset', 'Offset to label in CNCX'),
|
3: ('label_offset', 'Label offset in CNCX'),
|
||||||
4: ('depth', 'Depth of this entry in TOC'),
|
4: ('depth', 'Depth of this entry in TOC'),
|
||||||
|
5: ('class_offset', 'Class offset in CNCX'),
|
||||||
|
6: ('pos_fid', 'File Index'),
|
||||||
|
|
||||||
11: ('secondary', '[unknown, unknown, '
|
11: ('secondary', '[unknown, unknown, '
|
||||||
'tag type from TAGX in primary index header]'),
|
'tag type from TAGX in primary index header]'),
|
||||||
|
|
||||||
# The remaining tag types have to be interpreted subject to the type
|
21: ('parent_index', 'Parent'),
|
||||||
# of index entry they are present in
|
22: ('first_child_index', 'First child'),
|
||||||
|
23: ('last_child_index', 'Last child'),
|
||||||
|
|
||||||
|
69 : ('image_index', 'Offset from first image record to the'
|
||||||
|
' image record associated with this entry'
|
||||||
|
' (masthead for periodical or thumbnail for'
|
||||||
|
' article entry).'),
|
||||||
|
70 : ('desc_offset', 'Description offset in cncx'),
|
||||||
|
71 : ('author_offset', 'Author offset in cncx'),
|
||||||
|
72 : ('image_caption_offset', 'Image caption offset in cncx'),
|
||||||
|
73 : ('image_attr_offset', 'Image attribution offset in cncx'),
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
INTERPRET_MAP = {
|
|
||||||
'subchapter': {
|
|
||||||
21 : ('Parent chapter index', 'parent_index'),
|
|
||||||
5 : ('Unknown (koffs)', 'koffs'),
|
|
||||||
},
|
|
||||||
|
|
||||||
'section' : {
|
|
||||||
6 : ('File Index', 'pos_fid'),
|
|
||||||
},
|
|
||||||
|
|
||||||
'section_with_subsections' : {
|
|
||||||
6 : ('File Index', 'pos_fid'),
|
|
||||||
22 : ('First subsection index', 'first_child_index'),
|
|
||||||
23 : ('Last subsection index', 'last_child_index'),
|
|
||||||
|
|
||||||
},
|
|
||||||
'subsection' : {
|
|
||||||
6 : ('File Index', 'pos_fid'),
|
|
||||||
21 : ('Parent section index', 'parent_index'),
|
|
||||||
},
|
|
||||||
|
|
||||||
|
|
||||||
'article' : {
|
|
||||||
5 : ('Class offset in cncx', 'class_offset'),
|
|
||||||
21 : ('Parent section index', 'parent_index'),
|
|
||||||
69 : ('Offset from first image record num to the'
|
|
||||||
' image record associated with this article',
|
|
||||||
'image_index'),
|
|
||||||
70 : ('Description offset in cncx', 'desc_offset'),
|
|
||||||
71 : ('Author offset in cncx', 'author_offset'),
|
|
||||||
72 : ('Image caption offset in cncx',
|
|
||||||
'image_caption_offset'),
|
|
||||||
73 : ('Image attribution offset in cncx',
|
|
||||||
'image_attr_offset'),
|
|
||||||
},
|
|
||||||
|
|
||||||
'chapter_with_subchapters' : {
|
|
||||||
22 : ('First subchapter index', 'first_child_index'),
|
|
||||||
23 : ('Last subchapter index', 'last_child_index'),
|
|
||||||
},
|
|
||||||
|
|
||||||
'periodical' : {
|
|
||||||
5 : ('Class offset in cncx', 'class_offset'),
|
|
||||||
22 : ('First section index', 'first_child_index'),
|
|
||||||
23 : ('Last section index', 'last_child_index'),
|
|
||||||
69 : ('Offset from first image record num to masthead'
|
|
||||||
' record', 'image_index'),
|
|
||||||
},
|
|
||||||
|
|
||||||
'section' : {
|
|
||||||
5 : ('Class offset in cncx', 'class_offset'),
|
|
||||||
21 : ('Periodical index', 'parent_index'),
|
|
||||||
22 : ('First article index', 'first_child_index'),
|
|
||||||
23 : ('Last article index', 'last_child_index'),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, tagx, vals, entry_type, cncx):
|
def __init__(self, tagx, vals, entry_type, cncx):
|
||||||
self.value = vals if len(vals) > 1 else vals[0] if vals else None
|
self.value = vals if len(vals) > 1 else vals[0] if vals else None
|
||||||
self.entry_type = entry_type
|
self.entry_type = entry_type
|
||||||
@ -717,24 +670,15 @@ class Tag(object): # {{{
|
|||||||
if tag_type in self.TAG_MAP:
|
if tag_type in self.TAG_MAP:
|
||||||
self.attr, self.desc = self.TAG_MAP[tag_type]
|
self.attr, self.desc = self.TAG_MAP[tag_type]
|
||||||
else:
|
else:
|
||||||
try:
|
print ('Unknown tag value: %d in entry type: %s'%(tag_type,
|
||||||
td = self.INTERPRET_MAP[entry_type]
|
entry_type))
|
||||||
except:
|
self.desc = '??Unknown (tag value: %d type: %s)'%(
|
||||||
raise ValueError('Unknown entry type: %s'%entry_type)
|
tag_type, entry_type)
|
||||||
try:
|
self.attr = 'unknown'
|
||||||
self.desc, self.attr = td[tag_type]
|
|
||||||
except:
|
|
||||||
print ('Unknown tag value: %d in entry type: %s'%(tag_type,
|
|
||||||
entry_type))
|
|
||||||
self.desc = '??Unknown (tag value: %d type: %s)'%(
|
|
||||||
tag_type, entry_type)
|
|
||||||
self.attr = 'unknown'
|
|
||||||
if '_offset' in self.attr:
|
if '_offset' in self.attr:
|
||||||
self.cncx_value = cncx[self.value]
|
self.cncx_value = cncx[self.value]
|
||||||
|
|
||||||
if self.attr == 'pos_fid':
|
|
||||||
print (1111111, vals)
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
if self.cncx_value is not None:
|
if self.cncx_value is not None:
|
||||||
return '%s : %r [%r]'%(self.desc, self.value, self.cncx_value)
|
return '%s : %r [%r]'%(self.desc, self.value, self.cncx_value)
|
||||||
@ -746,43 +690,21 @@ class IndexEntry(object): # {{{
|
|||||||
|
|
||||||
'''
|
'''
|
||||||
The index is made up of entries, each of which is represented by an
|
The index is made up of entries, each of which is represented by an
|
||||||
instance of this class. Index entries typically point to offsets int eh
|
instance of this class. Index entries typically point to offsets in the
|
||||||
HTML, specify HTML sizes and point to text strings in the CNCX that are
|
HTML, specify HTML sizes and point to text strings in the CNCX that are
|
||||||
used in the navigation UI.
|
used in the navigation UI.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
TYPES = {
|
|
||||||
# Present in secondary index record
|
|
||||||
0x01 : 'null',
|
|
||||||
0x02 : 'publication_meta',
|
|
||||||
# Present in book type files
|
|
||||||
0x0f : 'chapter',
|
|
||||||
0x6f : 'chapter_with_subchapters',
|
|
||||||
0x1f : 'subchapter',
|
|
||||||
0x8f : 'section',
|
|
||||||
0xef : 'section_with_subsections',
|
|
||||||
0x9f : 'subsection',
|
|
||||||
# Present in periodicals
|
|
||||||
0xdf : 'periodical',
|
|
||||||
0xff : 'section',
|
|
||||||
0x3f : 'article',
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self, ident, entry_type, raw, cncx, tagx_entries,
|
def __init__(self, ident, entry_type, raw, cncx, tagx_entries,
|
||||||
control_byte_count):
|
control_byte_count):
|
||||||
self.index = ident
|
self.index = ident
|
||||||
self.raw = raw
|
self.raw = raw
|
||||||
self.tags = []
|
self.tags = []
|
||||||
self.entry_type_raw = entry_type
|
self.entry_type = entry_type
|
||||||
self.byte_size = len(raw)
|
self.byte_size = len(raw)
|
||||||
|
|
||||||
orig_raw = raw
|
orig_raw = raw
|
||||||
|
|
||||||
try:
|
|
||||||
self.entry_type = self.TYPES[entry_type]
|
|
||||||
except KeyError:
|
|
||||||
raise ValueError('Unknown Index Entry type: %s'%bin(entry_type))
|
|
||||||
|
|
||||||
if control_byte_count not in (1, 2):
|
if control_byte_count not in (1, 2):
|
||||||
raise ValueError('Unknown control byte count: %d'%
|
raise ValueError('Unknown control byte count: %d'%
|
||||||
control_byte_count)
|
control_byte_count)
|
||||||
@ -800,7 +722,7 @@ class IndexEntry(object): # {{{
|
|||||||
for tag in expected_tags:
|
for tag in expected_tags:
|
||||||
vals = []
|
vals = []
|
||||||
|
|
||||||
if tag.tag > 64:
|
if tag.tag > 0b1000000: # 0b1000000 = 64
|
||||||
has_tag = flags & 0b1
|
has_tag = flags & 0b1
|
||||||
flags = flags >> 1
|
flags = flags >> 1
|
||||||
if not has_tag: continue
|
if not has_tag: continue
|
||||||
@ -867,10 +789,17 @@ class IndexEntry(object): # {{{
|
|||||||
return tag.value
|
return tag.value
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pos_fid(self):
|
||||||
|
for tag in self.tags:
|
||||||
|
if tag.attr == 'pos_fid':
|
||||||
|
return tag.value
|
||||||
|
return [0, 0]
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
ans = ['Index Entry(index=%s, entry_type=%s, flags=%s, '
|
ans = ['Index Entry(index=%s, entry_type=%s, flags=%s, '
|
||||||
'length=%d, byte_size=%d)'%(
|
'length=%d, byte_size=%d)'%(
|
||||||
self.index, self.entry_type, bin(self.flags)[2:],
|
self.index, bin(self.entry_type), bin(self.flags)[2:],
|
||||||
len(self.tags), self.byte_size)]
|
len(self.tags), self.byte_size)]
|
||||||
for tag in self.tags:
|
for tag in self.tags:
|
||||||
if tag.value is not None:
|
if tag.value is not None:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user