mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
New MOBI output: Start work on support for hierarchical books
This commit is contained in:
parent
b203704a6b
commit
7e29ea72ad
@ -646,7 +646,7 @@ class Tag(object): # {{{
|
|||||||
|
|
||||||
INTERPRET_MAP = {
|
INTERPRET_MAP = {
|
||||||
'subchapter': {
|
'subchapter': {
|
||||||
5 : ('Parent chapter index', 'parent_index')
|
21 : ('Parent chapter index', 'parent_index')
|
||||||
},
|
},
|
||||||
|
|
||||||
'article' : {
|
'article' : {
|
||||||
@ -702,7 +702,8 @@ class Tag(object): # {{{
|
|||||||
self.desc, self.attr = td[tag_type]
|
self.desc, self.attr = td[tag_type]
|
||||||
except:
|
except:
|
||||||
print ('Unknown tag value: %d'%tag_type)
|
print ('Unknown tag value: %d'%tag_type)
|
||||||
self.desc = '??Unknown (tag value: %d)'%tag_type
|
self.desc = '??Unknown (tag value: %d type: %s)'%(
|
||||||
|
tag_type, entry_type)
|
||||||
self.attr = 'unknown'
|
self.attr = 'unknown'
|
||||||
if '_offset' in self.attr:
|
if '_offset' in self.attr:
|
||||||
self.cncx_value = cncx[self.value]
|
self.cncx_value = cncx[self.value]
|
||||||
@ -750,7 +751,7 @@ class IndexEntry(object): # {{{
|
|||||||
try:
|
try:
|
||||||
self.entry_type = self.TYPES[entry_type]
|
self.entry_type = self.TYPES[entry_type]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise ValueError('Unknown Index Entry type: %s'%hex(entry_type))
|
raise ValueError('Unknown Index Entry type: %s'%bin(entry_type))
|
||||||
|
|
||||||
if control_byte_count not in (1, 2):
|
if control_byte_count not in (1, 2):
|
||||||
raise ValueError('Unknown control byte count: %d'%
|
raise ValueError('Unknown control byte count: %d'%
|
||||||
@ -1223,8 +1224,7 @@ class TBSIndexing(object): # {{{
|
|||||||
tbs_type = 0
|
tbs_type = 0
|
||||||
is_periodical = self.doc_type in (257, 258, 259)
|
is_periodical = self.doc_type in (257, 258, 259)
|
||||||
if len(byts):
|
if len(byts):
|
||||||
outermost_index, extra, consumed = decode_tbs(byts, flag_size=4 if
|
outermost_index, extra, consumed = decode_tbs(byts, flag_size=3)
|
||||||
is_periodical else 3)
|
|
||||||
byts = byts[consumed:]
|
byts = byts[consumed:]
|
||||||
for k in extra:
|
for k in extra:
|
||||||
tbs_type |= k
|
tbs_type |= k
|
||||||
|
@ -109,6 +109,20 @@ class TAGX(object): # {{{
|
|||||||
list(map(self.add_tag, (11, 0)))
|
list(map(self.add_tag, (11, 0)))
|
||||||
return self.header(1) + bytes(self.byts)
|
return self.header(1) + bytes(self.byts)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TAGX_BOOK(TAGX):
|
||||||
|
BITMASKS = dict(TAGX.BITMASKS)
|
||||||
|
BITMASKS.update({x:(1 << i) for i, x in enumerate([1, 2, 3, 4, 21, 22, 23])})
|
||||||
|
|
||||||
|
@property
|
||||||
|
def hierarchical_book(self):
|
||||||
|
'''
|
||||||
|
TAGX block for the primary index header of a hierarchical book
|
||||||
|
'''
|
||||||
|
list(map(self.add_tag, (1, 2, 3, 4, 21, 22, 23, 0)))
|
||||||
|
return self.header(1) + bytes(self.byts)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def flat_book(self):
|
def flat_book(self):
|
||||||
'''
|
'''
|
||||||
@ -117,6 +131,7 @@ class TAGX(object): # {{{
|
|||||||
list(map(self.add_tag, (1, 2, 3, 4, 0)))
|
list(map(self.add_tag, (1, 2, 3, 4, 0)))
|
||||||
return self.header(1) + bytes(self.byts)
|
return self.header(1) + bytes(self.byts)
|
||||||
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
# Index Entries {{{
|
# Index Entries {{{
|
||||||
@ -187,6 +202,9 @@ class IndexEntry(object):
|
|||||||
ans |= TAGX.BITMASKS[tag]
|
ans |= TAGX.BITMASKS[tag]
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
def attr_for_tag(self, tag):
|
||||||
|
return self.RTAG_MAP[tag]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def bytestring(self):
|
def bytestring(self):
|
||||||
buf = StringIO()
|
buf = StringIO()
|
||||||
@ -210,7 +228,7 @@ class IndexEntry(object):
|
|||||||
buf.write(bytes(bytearray([flags])))
|
buf.write(bytes(bytearray([flags])))
|
||||||
|
|
||||||
for tag in self.tag_nums:
|
for tag in self.tag_nums:
|
||||||
attr = self.RTAG_MAP[tag]
|
attr = self.attr_for_tag(tag)
|
||||||
val = getattr(self, attr)
|
val = getattr(self, attr)
|
||||||
if isinstance(val, int):
|
if isinstance(val, int):
|
||||||
val = [val]
|
val = [val]
|
||||||
@ -226,6 +244,17 @@ class IndexEntry(object):
|
|||||||
ans = buf.getvalue()
|
ans = buf.getvalue()
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
class BookIndexEntry(IndexEntry):
|
||||||
|
|
||||||
|
@property
|
||||||
|
def entry_type(self):
|
||||||
|
tagx = TAGX_BOOK()
|
||||||
|
ans = 0
|
||||||
|
for tag in self.tag_nums:
|
||||||
|
ans |= tagx.BITMASKS[tag]
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
class PeriodicalIndexEntry(IndexEntry):
|
class PeriodicalIndexEntry(IndexEntry):
|
||||||
|
|
||||||
def __init__(self, offset, label_offset, class_offset, depth):
|
def __init__(self, offset, label_offset, class_offset, depth):
|
||||||
@ -461,7 +490,6 @@ class Indexer(object): # {{{
|
|||||||
if not desc: desc = _('No details available')
|
if not desc: desc = _('No details available')
|
||||||
node.author, node.description = aut, desc
|
node.author, node.description = aut, desc
|
||||||
|
|
||||||
|
|
||||||
self.cncx = CNCX(oeb.toc, self.is_periodical)
|
self.cncx = CNCX(oeb.toc, self.is_periodical)
|
||||||
|
|
||||||
if self.is_periodical:
|
if self.is_periodical:
|
||||||
@ -529,7 +557,9 @@ class Indexer(object): # {{{
|
|||||||
tagx_block = TAGX().secondary
|
tagx_block = TAGX().secondary
|
||||||
else:
|
else:
|
||||||
tagx_block = (TAGX().periodical if self.is_periodical else
|
tagx_block = (TAGX().periodical if self.is_periodical else
|
||||||
TAGX().flat_book)
|
(TAGX_BOOK().hierarchical_book if
|
||||||
|
self.book_has_subchapters else
|
||||||
|
TAGX_BOOK().flat_book))
|
||||||
header_length = 192
|
header_length = 192
|
||||||
|
|
||||||
# Ident 0 - 4
|
# Ident 0 - 4
|
||||||
@ -615,26 +645,52 @@ class Indexer(object): # {{{
|
|||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def create_book_index(self): # {{{
|
def create_book_index(self): # {{{
|
||||||
|
self.book_has_subchapters = False
|
||||||
indices = []
|
indices = []
|
||||||
seen = set()
|
seen, sub_seen = set(), set()
|
||||||
id_offsets = self.serializer.id_offsets
|
id_offsets = self.serializer.id_offsets
|
||||||
|
|
||||||
for node in self.oeb.toc.iterdescendants():
|
# Flatten toc to contain only chapters and subchapters
|
||||||
|
# Anything deeper than a subchapter is made into a subchapter
|
||||||
|
chapters = []
|
||||||
|
for node in self.oeb.toc:
|
||||||
try:
|
try:
|
||||||
offset = id_offsets[node.href]
|
offset = id_offsets[node.href]
|
||||||
label = self.cncx[node.title]
|
label = self.cncx[node.title]
|
||||||
except:
|
except:
|
||||||
self.log.warn('TOC item %s not found in document'%node.href)
|
self.log.warn('TOC item %s [%s] not found in document'%(
|
||||||
|
node.title, node.href))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if offset in seen:
|
if offset in seen:
|
||||||
continue
|
continue
|
||||||
seen.add(offset)
|
seen.add(offset)
|
||||||
index = IndexEntry(offset, label)
|
|
||||||
indices.append(index)
|
|
||||||
|
|
||||||
indices.sort(key=lambda x:x.offset)
|
subchapters = []
|
||||||
|
chapters.append((offset, label, subchapters))
|
||||||
|
|
||||||
# Set lengths
|
for descendant in node.iterdescendants():
|
||||||
|
try:
|
||||||
|
offset = id_offsets[descendant.href]
|
||||||
|
label = self.cncx[descendant.title]
|
||||||
|
except:
|
||||||
|
self.log.warn('TOC item %s [%s] not found in document'%(
|
||||||
|
descendant.title, descendant.href))
|
||||||
|
continue
|
||||||
|
|
||||||
|
if offset in sub_seen:
|
||||||
|
continue
|
||||||
|
sub_seen.add(offset)
|
||||||
|
subchapters.append((offset, label))
|
||||||
|
|
||||||
|
subchapters.sort(key=lambda x:x[0])
|
||||||
|
|
||||||
|
chapters.sort(key=lambda x:x[0])
|
||||||
|
|
||||||
|
chapters = [(BookIndexEntry(x[0], x[1]), [
|
||||||
|
BookIndexEntry(y[0], y[1]) for y in x[2]]) for x in chapters]
|
||||||
|
|
||||||
|
def set_length(indices):
|
||||||
for i, index in enumerate(indices):
|
for i, index in enumerate(indices):
|
||||||
try:
|
try:
|
||||||
next_offset = indices[i+1].offset
|
next_offset = indices[i+1].offset
|
||||||
@ -642,20 +698,45 @@ class Indexer(object): # {{{
|
|||||||
next_offset = self.serializer.body_end_offset
|
next_offset = self.serializer.body_end_offset
|
||||||
index.length = next_offset - index.offset
|
index.length = next_offset - index.offset
|
||||||
|
|
||||||
# Remove empty nodes
|
# Set chapter and subchapter lengths
|
||||||
indices = [i for i in indices if i.length > 0]
|
set_length([x[0] for x in chapters])
|
||||||
|
for x in chapters:
|
||||||
|
set_length(x[1])
|
||||||
|
|
||||||
# Set index values
|
# Remove empty chapters
|
||||||
for i, index in enumerate(indices):
|
chapters = [x for x in chapters if x[0].length > 0]
|
||||||
index.index = i
|
|
||||||
|
|
||||||
# Set lengths again to close up any gaps left by filtering
|
# Remove invalid subchapters
|
||||||
for i, index in enumerate(indices):
|
for i, x in enumerate(list(chapters)):
|
||||||
try:
|
chapter, subchapters = x
|
||||||
next_offset = indices[i+1].offset
|
ok_subchapters = []
|
||||||
except:
|
for sc in subchapters:
|
||||||
next_offset = self.serializer.body_end_offset
|
if sc.offset < chapter.next_offset and sc.length > 0:
|
||||||
index.length = next_offset - index.offset
|
ok_subchapters.append(sc)
|
||||||
|
chapters[i] = (chapter, ok_subchapters)
|
||||||
|
|
||||||
|
# Reset chapter and subchapter lengths in case any were removed
|
||||||
|
set_length([x[0] for x in chapters])
|
||||||
|
for x in chapters:
|
||||||
|
set_length(x[1])
|
||||||
|
|
||||||
|
# Set index and depth values
|
||||||
|
indices = []
|
||||||
|
for index, x in enumerate(chapters):
|
||||||
|
x[0].index = index
|
||||||
|
indices.append(x[0])
|
||||||
|
|
||||||
|
for chapter, subchapters in chapters:
|
||||||
|
for sc in subchapters:
|
||||||
|
index += 1
|
||||||
|
sc.index = index
|
||||||
|
sc.parent_index = chapter.index
|
||||||
|
indices.append(sc)
|
||||||
|
sc.depth = 1
|
||||||
|
self.book_has_subchapters = True
|
||||||
|
if subchapters:
|
||||||
|
chapter.first_child_index = subchapters[0].index
|
||||||
|
chapter.last_child_index = subchapters[-1].index
|
||||||
|
|
||||||
return indices
|
return indices
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user