mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
New MOBI output: Start work on support for hierarchical books
This commit is contained in:
parent
b203704a6b
commit
7e29ea72ad
@ -646,7 +646,7 @@ class Tag(object): # {{{
|
||||
|
||||
INTERPRET_MAP = {
|
||||
'subchapter': {
|
||||
5 : ('Parent chapter index', 'parent_index')
|
||||
21 : ('Parent chapter index', 'parent_index')
|
||||
},
|
||||
|
||||
'article' : {
|
||||
@ -702,7 +702,8 @@ class Tag(object): # {{{
|
||||
self.desc, self.attr = td[tag_type]
|
||||
except:
|
||||
print ('Unknown tag value: %d'%tag_type)
|
||||
self.desc = '??Unknown (tag value: %d)'%tag_type
|
||||
self.desc = '??Unknown (tag value: %d type: %s)'%(
|
||||
tag_type, entry_type)
|
||||
self.attr = 'unknown'
|
||||
if '_offset' in self.attr:
|
||||
self.cncx_value = cncx[self.value]
|
||||
@ -750,7 +751,7 @@ class IndexEntry(object): # {{{
|
||||
try:
|
||||
self.entry_type = self.TYPES[entry_type]
|
||||
except KeyError:
|
||||
raise ValueError('Unknown Index Entry type: %s'%hex(entry_type))
|
||||
raise ValueError('Unknown Index Entry type: %s'%bin(entry_type))
|
||||
|
||||
if control_byte_count not in (1, 2):
|
||||
raise ValueError('Unknown control byte count: %d'%
|
||||
@ -1223,8 +1224,7 @@ class TBSIndexing(object): # {{{
|
||||
tbs_type = 0
|
||||
is_periodical = self.doc_type in (257, 258, 259)
|
||||
if len(byts):
|
||||
outermost_index, extra, consumed = decode_tbs(byts, flag_size=4 if
|
||||
is_periodical else 3)
|
||||
outermost_index, extra, consumed = decode_tbs(byts, flag_size=3)
|
||||
byts = byts[consumed:]
|
||||
for k in extra:
|
||||
tbs_type |= k
|
||||
|
@ -109,6 +109,20 @@ class TAGX(object): # {{{
|
||||
list(map(self.add_tag, (11, 0)))
|
||||
return self.header(1) + bytes(self.byts)
|
||||
|
||||
|
||||
|
||||
class TAGX_BOOK(TAGX):
|
||||
BITMASKS = dict(TAGX.BITMASKS)
|
||||
BITMASKS.update({x:(1 << i) for i, x in enumerate([1, 2, 3, 4, 21, 22, 23])})
|
||||
|
||||
@property
|
||||
def hierarchical_book(self):
|
||||
'''
|
||||
TAGX block for the primary index header of a hierarchical book
|
||||
'''
|
||||
list(map(self.add_tag, (1, 2, 3, 4, 21, 22, 23, 0)))
|
||||
return self.header(1) + bytes(self.byts)
|
||||
|
||||
@property
|
||||
def flat_book(self):
|
||||
'''
|
||||
@ -117,6 +131,7 @@ class TAGX(object): # {{{
|
||||
list(map(self.add_tag, (1, 2, 3, 4, 0)))
|
||||
return self.header(1) + bytes(self.byts)
|
||||
|
||||
|
||||
# }}}
|
||||
|
||||
# Index Entries {{{
|
||||
@ -187,6 +202,9 @@ class IndexEntry(object):
|
||||
ans |= TAGX.BITMASKS[tag]
|
||||
return ans
|
||||
|
||||
def attr_for_tag(self, tag):
|
||||
return self.RTAG_MAP[tag]
|
||||
|
||||
@property
|
||||
def bytestring(self):
|
||||
buf = StringIO()
|
||||
@ -210,7 +228,7 @@ class IndexEntry(object):
|
||||
buf.write(bytes(bytearray([flags])))
|
||||
|
||||
for tag in self.tag_nums:
|
||||
attr = self.RTAG_MAP[tag]
|
||||
attr = self.attr_for_tag(tag)
|
||||
val = getattr(self, attr)
|
||||
if isinstance(val, int):
|
||||
val = [val]
|
||||
@ -226,6 +244,17 @@ class IndexEntry(object):
|
||||
ans = buf.getvalue()
|
||||
return ans
|
||||
|
||||
class BookIndexEntry(IndexEntry):
|
||||
|
||||
@property
|
||||
def entry_type(self):
|
||||
tagx = TAGX_BOOK()
|
||||
ans = 0
|
||||
for tag in self.tag_nums:
|
||||
ans |= tagx.BITMASKS[tag]
|
||||
return ans
|
||||
|
||||
|
||||
class PeriodicalIndexEntry(IndexEntry):
|
||||
|
||||
def __init__(self, offset, label_offset, class_offset, depth):
|
||||
@ -461,7 +490,6 @@ class Indexer(object): # {{{
|
||||
if not desc: desc = _('No details available')
|
||||
node.author, node.description = aut, desc
|
||||
|
||||
|
||||
self.cncx = CNCX(oeb.toc, self.is_periodical)
|
||||
|
||||
if self.is_periodical:
|
||||
@ -529,7 +557,9 @@ class Indexer(object): # {{{
|
||||
tagx_block = TAGX().secondary
|
||||
else:
|
||||
tagx_block = (TAGX().periodical if self.is_periodical else
|
||||
TAGX().flat_book)
|
||||
(TAGX_BOOK().hierarchical_book if
|
||||
self.book_has_subchapters else
|
||||
TAGX_BOOK().flat_book))
|
||||
header_length = 192
|
||||
|
||||
# Ident 0 - 4
|
||||
@ -615,47 +645,98 @@ class Indexer(object): # {{{
|
||||
# }}}
|
||||
|
||||
def create_book_index(self): # {{{
|
||||
self.book_has_subchapters = False
|
||||
indices = []
|
||||
seen = set()
|
||||
seen, sub_seen = set(), set()
|
||||
id_offsets = self.serializer.id_offsets
|
||||
|
||||
for node in self.oeb.toc.iterdescendants():
|
||||
# Flatten toc to contain only chapters and subchapters
|
||||
# Anything deeper than a subchapter is made into a subchapter
|
||||
chapters = []
|
||||
for node in self.oeb.toc:
|
||||
try:
|
||||
offset = id_offsets[node.href]
|
||||
label = self.cncx[node.title]
|
||||
except:
|
||||
self.log.warn('TOC item %s not found in document'%node.href)
|
||||
self.log.warn('TOC item %s [%s] not found in document'%(
|
||||
node.title, node.href))
|
||||
continue
|
||||
|
||||
if offset in seen:
|
||||
continue
|
||||
seen.add(offset)
|
||||
index = IndexEntry(offset, label)
|
||||
indices.append(index)
|
||||
|
||||
indices.sort(key=lambda x:x.offset)
|
||||
subchapters = []
|
||||
chapters.append((offset, label, subchapters))
|
||||
|
||||
# Set lengths
|
||||
for i, index in enumerate(indices):
|
||||
try:
|
||||
next_offset = indices[i+1].offset
|
||||
except:
|
||||
next_offset = self.serializer.body_end_offset
|
||||
index.length = next_offset - index.offset
|
||||
for descendant in node.iterdescendants():
|
||||
try:
|
||||
offset = id_offsets[descendant.href]
|
||||
label = self.cncx[descendant.title]
|
||||
except:
|
||||
self.log.warn('TOC item %s [%s] not found in document'%(
|
||||
descendant.title, descendant.href))
|
||||
continue
|
||||
|
||||
# Remove empty nodes
|
||||
indices = [i for i in indices if i.length > 0]
|
||||
if offset in sub_seen:
|
||||
continue
|
||||
sub_seen.add(offset)
|
||||
subchapters.append((offset, label))
|
||||
|
||||
# Set index values
|
||||
for i, index in enumerate(indices):
|
||||
index.index = i
|
||||
subchapters.sort(key=lambda x:x[0])
|
||||
|
||||
# Set lengths again to close up any gaps left by filtering
|
||||
for i, index in enumerate(indices):
|
||||
try:
|
||||
next_offset = indices[i+1].offset
|
||||
except:
|
||||
next_offset = self.serializer.body_end_offset
|
||||
index.length = next_offset - index.offset
|
||||
chapters.sort(key=lambda x:x[0])
|
||||
|
||||
chapters = [(BookIndexEntry(x[0], x[1]), [
|
||||
BookIndexEntry(y[0], y[1]) for y in x[2]]) for x in chapters]
|
||||
|
||||
def set_length(indices):
|
||||
for i, index in enumerate(indices):
|
||||
try:
|
||||
next_offset = indices[i+1].offset
|
||||
except:
|
||||
next_offset = self.serializer.body_end_offset
|
||||
index.length = next_offset - index.offset
|
||||
|
||||
# Set chapter and subchapter lengths
|
||||
set_length([x[0] for x in chapters])
|
||||
for x in chapters:
|
||||
set_length(x[1])
|
||||
|
||||
# Remove empty chapters
|
||||
chapters = [x for x in chapters if x[0].length > 0]
|
||||
|
||||
# Remove invalid subchapters
|
||||
for i, x in enumerate(list(chapters)):
|
||||
chapter, subchapters = x
|
||||
ok_subchapters = []
|
||||
for sc in subchapters:
|
||||
if sc.offset < chapter.next_offset and sc.length > 0:
|
||||
ok_subchapters.append(sc)
|
||||
chapters[i] = (chapter, ok_subchapters)
|
||||
|
||||
# Reset chapter and subchapter lengths in case any were removed
|
||||
set_length([x[0] for x in chapters])
|
||||
for x in chapters:
|
||||
set_length(x[1])
|
||||
|
||||
# Set index and depth values
|
||||
indices = []
|
||||
for index, x in enumerate(chapters):
|
||||
x[0].index = index
|
||||
indices.append(x[0])
|
||||
|
||||
for chapter, subchapters in chapters:
|
||||
for sc in subchapters:
|
||||
index += 1
|
||||
sc.index = index
|
||||
sc.parent_index = chapter.index
|
||||
indices.append(sc)
|
||||
sc.depth = 1
|
||||
self.book_has_subchapters = True
|
||||
if subchapters:
|
||||
chapter.first_child_index = subchapters[0].index
|
||||
chapter.last_child_index = subchapters[-1].index
|
||||
|
||||
return indices
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user