New MOBI output: Start work on support for hierarchical books

This commit is contained in:
Kovid Goyal 2011-08-04 20:03:54 -06:00
parent b203704a6b
commit 7e29ea72ad
2 changed files with 114 additions and 33 deletions

View File

@ -646,7 +646,7 @@ class Tag(object): # {{{
INTERPRET_MAP = {
'subchapter': {
5 : ('Parent chapter index', 'parent_index')
21 : ('Parent chapter index', 'parent_index')
},
'article' : {
@ -702,7 +702,8 @@ class Tag(object): # {{{
self.desc, self.attr = td[tag_type]
except:
print ('Unknown tag value: %d'%tag_type)
self.desc = '??Unknown (tag value: %d)'%tag_type
self.desc = '??Unknown (tag value: %d type: %s)'%(
tag_type, entry_type)
self.attr = 'unknown'
if '_offset' in self.attr:
self.cncx_value = cncx[self.value]
@ -750,7 +751,7 @@ class IndexEntry(object): # {{{
try:
self.entry_type = self.TYPES[entry_type]
except KeyError:
raise ValueError('Unknown Index Entry type: %s'%hex(entry_type))
raise ValueError('Unknown Index Entry type: %s'%bin(entry_type))
if control_byte_count not in (1, 2):
raise ValueError('Unknown control byte count: %d'%
@ -1223,8 +1224,7 @@ class TBSIndexing(object): # {{{
tbs_type = 0
is_periodical = self.doc_type in (257, 258, 259)
if len(byts):
outermost_index, extra, consumed = decode_tbs(byts, flag_size=4 if
is_periodical else 3)
outermost_index, extra, consumed = decode_tbs(byts, flag_size=3)
byts = byts[consumed:]
for k in extra:
tbs_type |= k

View File

@ -109,6 +109,20 @@ class TAGX(object): # {{{
list(map(self.add_tag, (11, 0)))
return self.header(1) + bytes(self.byts)
class TAGX_BOOK(TAGX):
BITMASKS = dict(TAGX.BITMASKS)
BITMASKS.update({x:(1 << i) for i, x in enumerate([1, 2, 3, 4, 21, 22, 23])})
@property
def hierarchical_book(self):
'''
TAGX block for the primary index header of a hierarchical book
'''
list(map(self.add_tag, (1, 2, 3, 4, 21, 22, 23, 0)))
return self.header(1) + bytes(self.byts)
@property
def flat_book(self):
'''
@ -117,6 +131,7 @@ class TAGX(object): # {{{
list(map(self.add_tag, (1, 2, 3, 4, 0)))
return self.header(1) + bytes(self.byts)
# }}}
# Index Entries {{{
@ -187,6 +202,9 @@ class IndexEntry(object):
ans |= TAGX.BITMASKS[tag]
return ans
def attr_for_tag(self, tag):
return self.RTAG_MAP[tag]
@property
def bytestring(self):
buf = StringIO()
@ -210,7 +228,7 @@ class IndexEntry(object):
buf.write(bytes(bytearray([flags])))
for tag in self.tag_nums:
attr = self.RTAG_MAP[tag]
attr = self.attr_for_tag(tag)
val = getattr(self, attr)
if isinstance(val, int):
val = [val]
@ -226,6 +244,17 @@ class IndexEntry(object):
ans = buf.getvalue()
return ans
class BookIndexEntry(IndexEntry):
@property
def entry_type(self):
tagx = TAGX_BOOK()
ans = 0
for tag in self.tag_nums:
ans |= tagx.BITMASKS[tag]
return ans
class PeriodicalIndexEntry(IndexEntry):
def __init__(self, offset, label_offset, class_offset, depth):
@ -461,7 +490,6 @@ class Indexer(object): # {{{
if not desc: desc = _('No details available')
node.author, node.description = aut, desc
self.cncx = CNCX(oeb.toc, self.is_periodical)
if self.is_periodical:
@ -529,7 +557,9 @@ class Indexer(object): # {{{
tagx_block = TAGX().secondary
else:
tagx_block = (TAGX().periodical if self.is_periodical else
TAGX().flat_book)
(TAGX_BOOK().hierarchical_book if
self.book_has_subchapters else
TAGX_BOOK().flat_book))
header_length = 192
# Ident 0 - 4
@ -615,47 +645,98 @@ class Indexer(object): # {{{
# }}}
def create_book_index(self): # {{{
self.book_has_subchapters = False
indices = []
seen = set()
seen, sub_seen = set(), set()
id_offsets = self.serializer.id_offsets
for node in self.oeb.toc.iterdescendants():
# Flatten toc to contain only chapters and subchapters
# Anything deeper than a subchapter is made into a subchapter
chapters = []
for node in self.oeb.toc:
try:
offset = id_offsets[node.href]
label = self.cncx[node.title]
except:
self.log.warn('TOC item %s not found in document'%node.href)
self.log.warn('TOC item %s [%s] not found in document'%(
node.title, node.href))
continue
if offset in seen:
continue
seen.add(offset)
index = IndexEntry(offset, label)
indices.append(index)
indices.sort(key=lambda x:x.offset)
subchapters = []
chapters.append((offset, label, subchapters))
# Set lengths
for i, index in enumerate(indices):
try:
next_offset = indices[i+1].offset
except:
next_offset = self.serializer.body_end_offset
index.length = next_offset - index.offset
for descendant in node.iterdescendants():
try:
offset = id_offsets[descendant.href]
label = self.cncx[descendant.title]
except:
self.log.warn('TOC item %s [%s] not found in document'%(
descendant.title, descendant.href))
continue
# Remove empty nodes
indices = [i for i in indices if i.length > 0]
if offset in sub_seen:
continue
sub_seen.add(offset)
subchapters.append((offset, label))
# Set index values
for i, index in enumerate(indices):
index.index = i
subchapters.sort(key=lambda x:x[0])
# Set lengths again to close up any gaps left by filtering
for i, index in enumerate(indices):
try:
next_offset = indices[i+1].offset
except:
next_offset = self.serializer.body_end_offset
index.length = next_offset - index.offset
chapters.sort(key=lambda x:x[0])
chapters = [(BookIndexEntry(x[0], x[1]), [
BookIndexEntry(y[0], y[1]) for y in x[2]]) for x in chapters]
def set_length(indices):
for i, index in enumerate(indices):
try:
next_offset = indices[i+1].offset
except:
next_offset = self.serializer.body_end_offset
index.length = next_offset - index.offset
# Set chapter and subchapter lengths
set_length([x[0] for x in chapters])
for x in chapters:
set_length(x[1])
# Remove empty chapters
chapters = [x for x in chapters if x[0].length > 0]
# Remove invalid subchapters
for i, x in enumerate(list(chapters)):
chapter, subchapters = x
ok_subchapters = []
for sc in subchapters:
if sc.offset < chapter.next_offset and sc.length > 0:
ok_subchapters.append(sc)
chapters[i] = (chapter, ok_subchapters)
# Reset chapter and subchapter lengths in case any were removed
set_length([x[0] for x in chapters])
for x in chapters:
set_length(x[1])
# Set index and depth values
indices = []
for index, x in enumerate(chapters):
x[0].index = index
indices.append(x[0])
for chapter, subchapters in chapters:
for sc in subchapters:
index += 1
sc.index = index
sc.parent_index = chapter.index
indices.append(sc)
sc.depth = 1
self.book_has_subchapters = True
if subchapters:
chapter.first_child_index = subchapters[0].index
chapter.last_child_index = subchapters[-1].index
return indices