diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index b85d73f55c..67f20e691f 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -12,7 +12,7 @@ from collections import OrderedDict, defaultdict from calibre.utils.date import utc_tz from calibre.ebooks.mobi.langcodes import main_language, sub_language from calibre.ebooks.mobi.utils import (decode_hex_number, decint, - get_trailing_data, decode_fvwi) + get_trailing_data, decode_tbs) from calibre.utils.magick.draw import identify_data # PalmDB {{{ @@ -949,20 +949,22 @@ class TBSIndexing(object): # {{{ ans.append(('\t\tIndex Entry: %d (Parent index: %d, ' 'Depth: %d, Offset: %d, Size: %d) [%s]')%( x.index, x.parent_index, x.depth, x.offset, x.size, x.label)) - def bin3(num): + def bin4(num): ans = bin(num)[2:] - return '0'*(3-len(ans)) + ans + return bytes('0'*(4-len(ans)) + ans) + + def repr_extra(x): + return str({bin4(k):v for k, v in extra.iteritems()}) tbs_type = 0 if len(byts): - outer, consumed = decint(byts) + outermost_index, extra, consumed = decode_tbs(byts) byts = byts[consumed:] - tbs_type = outer & 0b111 - ans.append('TBS Type: %s (%d)'%(bin3(tbs_type), tbs_type)) - ans.append('Outer Index entry: %d'%(outer >> 3)) - arg1, consumed = decint(byts) - byts = byts[consumed:] - ans.append('Unknown (vwi: always 0?): %d'%arg1) + for k in extra: + tbs_type |= k + ans.append('\nTBS: %d (%s)'%(tbs_type, bin4(tbs_type))) + ans.append('Outermost index: %d'%outermost_index) + ans.append('Unknown extra start bytes: %s'%repr_extra(extra)) if self.doc_type in (257, 259): # Hierarchical periodical byts, a = self.interpret_periodical(tbs_type, byts, dat['geom'][0]) @@ -977,53 +979,21 @@ class TBSIndexing(object): # {{{ def interpret_periodical(self, tbs_type, byts, record_offset): ans = [] - def tbs_type_6(byts, psi=None, msg=None, fmsg='Unknown'): # {{{ - if psi is None: - # Assume parent section is 1 - psi = self.get_index(1) - if msg is None: - msg = ('Article index at start of record or first article' - ' index, relative to parent section') - if byts: - # byts could be empty - arg, consumed = decint(byts) - byts = byts[consumed:] - flags = (arg & 0b1111) - ai = (arg >> 4) - ans.append('%s (fvwi): %d [%d absolute]'%(msg, ai, - ai+psi.index)) - if flags == 1: - arg, consumed = decint(byts) - if arg == 0: - # EOF of record, otherwise ignore and hope someone else - # will deal with these bytes - byts = byts[consumed:] - ans.append('EOF (vwi: should be 0): %d'%arg) - elif flags in (4, 5): - num = byts[0] - byts = byts[1:] - ans.append('Number of article nodes in the record (byte): %d'%num) - if flags == 5: - arg, consumed = decint(byts) - byts = byts[consumed:] - ans.append('%s (vwi)): %d'%(fmsg, arg)) - elif flags == 0: - pass - else: - raise ValueError('Unknown flags: %d'%flags) - return byts - - # }}} - def read_section_transitions(byts, psi=None): # {{{ if psi is None: - # Assume parent section is 1 + # Assume previous section is 1 psi = self.get_index(1) while byts: - ai, flags, consumed = decode_fvwi(byts) + ai, extra, consumed = decode_tbs(byts) byts = byts[consumed:] - if flags & 0b1000: + if extra.get(0b0010, None) is not None: + raise ValueError('Dont know how to interpret flag 0b0010' + ' while reading section transitions') + if extra.get(0b1000, None) is not None: + if len(extra) > 1: + raise ValueError('Dont know how to interpret flags' + ' %r while reading section transitions'%extra) nsi = self.get_index(psi.index+1) ans.append('Last article in this record of section %d' ' (relative to next section index [%d]): ' @@ -1036,113 +1006,57 @@ class TBSIndexing(object): # {{{ ' (relative to its parent section): ' '%d [%d absolute index]'%(psi.index, ai, ai+psi.index)) - if flags == 0: - ans.append('The section %d has only one article' - ' in this record'%psi.index) - continue + num = extra.get(0b0100, None) + if num is None: + msg = ('The section %d has at most one article' + ' in this record')%psi.index + else: + msg = ('Number of articles in this record of ' + 'section %d: %d')%(psi.index, num) + ans.append(msg) - if flags & 0b0100: - num = byts[0] - byts = byts[1:] - ans.append('Number of articles in this record of ' - 'section %d: %d'%(psi.index, num)) - - if flags & 0b0010: - raise ValueError( - 'Dont know how to interpret the 0b0010 flag') - - if flags & 0b0001: - arg, consumed = decint(byts) - byts = byts[consumed:] - ans.append('->Offset to start of next section (%d) from start' + offset = extra.get(0b0001, None) + if offset is not None: + if offset == 0: + ans.append('This record is spanned by the article:' + '%d'%(ai+psi.index)) + else: + ans.append('->Offset to start of next section (%d) from start' ' of record: %d [%d absolute offset]'%(psi.index+1, - arg, arg+record_offset)) + offset, offset+record_offset)) + return byts # }}} - if tbs_type == 3: # {{{ - arg2, consumed = decint(byts) + def read_starting_section(byts): # {{{ + si, extra, consumed = decode_tbs(byts) byts = byts[consumed:] - ans.append('Unknown (vwi: always 0?): %d'%arg2) - - arg3, consumed = decint(byts) - byts = byts[consumed:] - fsi = arg3 >> 4 - flags = arg3 & 0b1111 - ans.append('First section index (fvwi): %d'%fsi) - psi = self.get_index(fsi) - ans.append('Flags: %d'%flags) - if flags == 4: - ans.append('Number of articles in this section: %d'%byts[0]) - byts = byts[1:] - elif flags == 0: - pass - else: - raise ValueError('Unknown flags value: %d'%flags) - byts = read_section_transitions(byts, psi) - - # }}} - - elif tbs_type == 7: # {{{ - # This occurs for records that have no section nodes and - # whose parent section's index == 1 - ans.append('Unknown (maybe vwi?): %r'%bytes(byts[:2])) - byts = byts[2:] - arg, consumed = decint(byts) - byts = byts[consumed:] - ai = arg >> 4 - flags = arg & 0b1111 - ans.append('Article at start of record (fvwi): %d'%ai) - if flags == 4: - num = byts[0] - byts = byts[1:] - ans.append('Number of articles in record (byte): %d'%num) - elif flags == 0: - pass - elif flags == 1: - arg, consumed = decint(byts) - byts = byts[consumed:] - ans.append('EOF (vwi: should be 0): %d'%arg) - else: - raise ValueError('Unknown flags value: %d'%flags) + if len(extra) > 1 or 0b0010 in extra or 0b1000 in extra: + raise ValueError('Dont know how to interpret flags %r' + ' when reading starting section'%extra) + si = self.get_index(si) + ans.append('The section at the start of this record is:' + ' %d'%si.index) + if 0b0100 in extra: + num = extra[0b0100] + ans.append('The number of articles from the section %d' + ' in this record: %d'%(si.index, num)) + elif 0b0001 in extra: + eof = extra[0b0001] + if eof != 0: + raise ValueError('Unknown eof value %s when reading' + ' starting section'%eof) + ans.append('This record is spanned by an article from' + ' the section: %d'%si.index) + return si, byts # }}} - elif tbs_type == 6: # {{{ - # This is used for records spanned by an article whose parent - # section's index == 1 or for the opening record if it contains the - # periodical start, section 1 start and at least one article. The - # two cases are distinguished by the flags on the article index - # vwi. - unk = byts[0] - byts = byts[1:] - ans.append('Unknown (byte: always 2?): %d'%unk) - byts = tbs_type_6(byts) - # }}} + if tbs_type & 0b0100: + # Starting section is the first section + ssi = self.get_index(1) + else: + ssi, byts = read_starting_section(byts) - elif tbs_type == 2: # {{{ - # This occurs for records with no section nodes and whose parent - # section's index != 1 (undefined (records before the first - # section) or > 1) - # This is also used for records that are spanned by an article - # whose parent section index > 1. In this case the flags of the - # vwi referring to the article at the start - # of the record are set to 1 instead of 4. - arg, consumed = decint(byts) - byts = byts[consumed:] - flags = (arg & 0b1111) - psi = (arg >> 4) - ans.append('Parent section index (fvwi): %d'%psi) - psi = self.get_index(psi) - ans.append('Flags: %d'%flags) - if flags == 1: - arg, consumed = decint(byts) - byts = byts[consumed:] - ans.append('Unknown (vwi?: always 0?): %d'%arg) - byts = tbs_type_6(byts, psi=psi) - elif flags == 0: - byts = tbs_type_6(byts, psi=psi) - else: - raise ValueError('Unknown flags: %d'%flags) - # }}} + byts = read_section_transitions(byts, ssi) return byts, ans diff --git a/src/calibre/ebooks/mobi/tbs_periodicals.rst b/src/calibre/ebooks/mobi/tbs_periodicals.rst index d770133625..2fa6ec90f3 100644 --- a/src/calibre/ebooks/mobi/tbs_periodicals.rst +++ b/src/calibre/ebooks/mobi/tbs_periodicals.rst @@ -3,6 +3,20 @@ Reverse engineering the trailing byte sequences for hierarchical periodicals In the following, *vwi* means variable width integer and *fvwi* means a vwi whose lowest four bits are used as a flag. All the following information/inferences are from examining the output of kindlegen on a sample periodical. Given the general level of Amazon's incompetence, there are no guarantees that this information is the *best/most complete* way to do TBS indexing. +Sequence encoding: + +0b1000 : Continuation bit + +First sequences: +0b0010 : 80 +0b0011 : 80 80 +0b0110 : 80 2 +0b0111 : 80 2 80 + +Other sequences: +0b0101 : 4 1a +0b0001 : c b1 + Opening record ---------------- @@ -52,10 +66,60 @@ The text record that contains the opening node for the periodical (depth=0 node If there was only a single article, instead of 2, then the last two bytes would be: c0, i.e. there would be no byte giving the number of articles in the record. + Starting record with two section transitions:: + + Record #1: Starts at: 0 Ends at: 4095 + Contains: 7 index entries (0 ends, 4 complete, 3 starts) + TBS bytes: 86 80 2 c0 b8 c4 3 + Complete: + Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 564, Size: 375) [Ars Technica] + Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 572, Size: 367) [Week in gaming: 3DS review, Crysis 2, George Hotz] + Index Entry: 6 (Parent index: 2, Depth: 2, Offset: 947, Size: 1014) [Max and the Magic Marker for iPad: Review] + Index Entry: 7 (Parent index: 2, Depth: 2, Offset: 1961, Size: 1077) [iPad 2 steers itself into home console gaming territory with Real Racing 2 HD] + Starts: + Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 35372) [j_x's Google reader] + Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 939, Size: 10368) [Neowin.net] + Index Entry: 8 (Parent index: 2, Depth: 2, Offset: 3038, Size: 1082) [Microsoft's Joe Belfiore still working on upcoming Zune hardware] + TBS Type: 110 (6) + Outer Index entry: 0 + Unknown (vwi: always 0?): 0 + Unknown (byte: always 2?): 2 + Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute] + Remaining bytes: b8 c4 3 + + Starting record with three section transitions:: + + Record #1: Starts at: 0 Ends at: 4095 + Contains: 10 index entries (0 ends, 7 complete, 3 starts) + TBS bytes: 86 80 2 c0 b8 c0 b8 c4 4 + Complete: + Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 564, Size: 375) [Ars Technica] + Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 939, Size: 316) [Neowin.net] + Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 572, Size: 367) [Week in gaming: 3DS review, Crysis 2, George Hotz] + Index Entry: 6 (Parent index: 2, Depth: 2, Offset: 947, Size: 308) [Max and the Magic Marker for iPad: Review] + Index Entry: 7 (Parent index: 3, Depth: 2, Offset: 1263, Size: 760) [OSnews Asks on Interrupts: The Results] + Index Entry: 8 (Parent index: 3, Depth: 2, Offset: 2023, Size: 693) [Apple Ditches SAMBA in Favour of Homegrown Replacement] + Index Entry: 9 (Parent index: 3, Depth: 2, Offset: 2716, Size: 747) [ITC: Apple's Mobile Products Do Not Violate Nokia Patents] + Starts: + Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 25320) [j_x's Google reader] + Index Entry: 3 (Parent index: 0, Depth: 1, Offset: 1255, Size: 6829) [OSNews] + Index Entry: 10 (Parent index: 3, Depth: 2, Offset: 3463, Size: 666) [Transparent Monitor Embedded in Window Glass] + TBS Type: 110 (6) + Outer Index entry: 0 + Unknown (vwi: always 0?): 0 + Unknown (byte: always 2?): 2 + Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute] + Remaining bytes: b8 c0 b8 c4 4 + + + + Records with no nodes ------------------------ +subtype = 010 + These records are spanned by a single article. They are of two types: 1. If the parent section index is 1, TBS type of 6, like this:: @@ -247,7 +311,7 @@ In such a record there is a transition from one section to the next. As such the Last article of ending section w.r.t. starting section offset (fvwi): 12 [15 absolute] Flags (always 8?): 8 Article index at start of record or first article index, relative to parent section (fvwi): 13 [16 absolute] - Number of article nodes in the record (byte): 4 + Number of article nodes in the record belonging ot the last section (byte): 4 Ending record @@ -274,3 +338,26 @@ Logically, ending records must have at least one article ending, one section end If the record had only a single article end, the last two bytes would be replaced with: f0 +If the last record has multiple section transitions, it is of type 6 and looks like:: + + Record #9: Starts at: 32768 Ends at: 34953 + Contains: 9 index entries (3 ends, 6 complete, 0 starts) + TBS bytes: 86 80 2 1 d0 1 c8 1 d0 1 c8 1 d0 1 c8 1 d0 + Ends: + Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 34739) [j_x's Google reader] + Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 7758, Size: 26279) [Ars Technica] + Index Entry: 14 (Parent index: 1, Depth: 2, Offset: 31929, Size: 2108) [Trademarked keyword sales may soon be restricted in Europe] + Complete: + Index Entry: 2 (Parent index: 0, Depth: 1, Offset: 34037, Size: 316) [Neowin.net] + Index Entry: 3 (Parent index: 0, Depth: 1, Offset: 34353, Size: 282) [OSNews] + Index Entry: 4 (Parent index: 0, Depth: 1, Offset: 34635, Size: 319) [Slashdot] + Index Entry: 15 (Parent index: 2, Depth: 2, Offset: 34045, Size: 308) [Max and the Magic Marker for iPad: Review] + Index Entry: 16 (Parent index: 3, Depth: 2, Offset: 34361, Size: 274) [OSnews Asks on Interrupts: The Results] + Index Entry: 17 (Parent index: 4, Depth: 2, Offset: 34643, Size: 311) [Leonard Nimoy Turns 80] + TBS Type: 110 (6) + Outer Index entry: 0 + Unknown (vwi: always 0?): 0 + Unknown (byte: always 2?): 2 + Article index at start of record or first article index, relative to parent section (fvwi): 13 [14 absolute] + Remaining bytes: 1 c8 1 d0 1 c8 1 d0 1 c8 1 d0 + diff --git a/src/calibre/ebooks/mobi/utils.py b/src/calibre/ebooks/mobi/utils.py index ae1241e2f1..37d2093066 100644 --- a/src/calibre/ebooks/mobi/utils.py +++ b/src/calibre/ebooks/mobi/utils.py @@ -11,6 +11,7 @@ import struct from collections import OrderedDict from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail +from calibre.ebooks import normalize IMAGE_MAX_SIZE = 10 * 1024 * 1024 @@ -197,3 +198,96 @@ def encode_trailing_data(raw): lsize += 1 return raw + encoded +def encode_fvwi(val, flags): + ''' + Encode the value val and the 4 bit flags flags as a fvwi. This encoding is + used in the trailing byte sequences for indexing. Returns encoded + bytestring. + ''' + ans = (val << 4) | (flags & 0b1111) + return encint(ans) + + +def decode_fvwi(byts): + ''' + Decode encoded fvwi. Returns number, flags, consumed + ''' + arg, consumed = decint(bytes(byts)) + return (arg >> 4), (arg & 0b1111), consumed + +def decode_tbs(byts): + ''' + Trailing byte sequences for indexing consists of series of fvwi numbers. + This function reads the fvwi number and its associated flags. It them uses + the flags to read any more numbers that belong to the series. The flags are + the lowest 4 bits of the vwi (see the encode_fvwi function above). + + Returns the fvwi number, a dictionary mapping flags bits to the associated + data and the number of bytes consumed. + ''' + byts = bytes(byts) + val, flags, consumed = decode_fvwi(byts) + extra = {} + byts = byts[consumed:] + if flags & 0b1000: + extra[0b1000] = True + if flags & 0b0010: + x, consumed2 = decint(byts) + byts = byts[consumed2:] + extra[0b0010] = x + consumed += consumed2 + if flags & 0b0100: + extra[0b0100] = ord(byts[0]) + byts = byts[1:] + consumed += 1 + if flags & 0b0001: + x, consumed2 = decint(byts) + byts = byts[consumed2:] + extra[0b0001] = x + consumed += consumed2 + return val, extra, consumed + +def encode_tbs(val, extra): + ''' + Encode the number val and the extra data in the extra dict as an fvwi. See + decode_tbs above. + ''' + flags = 0 + for flag in extra: + flags |= flag + ans = encode_fvwi(val, flags) + + if 0b0010 in extra: + ans += encint(extra[0b0010]) + if 0b0100 in extra: + ans += bytes(bytearray([extra[0b0100]])) + if 0b0001 in extra: + ans += encint(extra[0b0001]) + return ans + +def utf8_text(text): + ''' + Convert a possibly null string to utf-8 bytes, guaranteeing to return a non + empty, normalized bytestring. + ''' + if text and text.strip(): + text = text.strip() + if not isinstance(text, unicode): + text = text.decode('utf-8', 'replace') + text = normalize(text).encode('utf-8') + else: + text = _('Unknown').encode('utf-8') + return text + +def align_block(raw, multiple=4, pad=b'\0'): + ''' + Return raw with enough pad bytes append to ensure its length is a multiple + of 4. + ''' + extra = len(raw) % multiple + if extra == 0: return raw + return raw + pad*(multiple - extra) + + + + diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index 41c5d2ec91..04387f47f7 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -10,35 +10,13 @@ __docformat__ = 'restructuredtext en' from struct import pack from cStringIO import StringIO -from collections import OrderedDict +from collections import OrderedDict, defaultdict -from calibre.ebooks import normalize -from calibre.ebook.mobi.writer2 import RECORD_SIZE -from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex) +from calibre.ebooks.mobi.writer2 import RECORD_SIZE +from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex, + encode_trailing_data, encode_tbs, align_block, utf8_text) from calibre.ebooks.mobi.langcodes import iana2mobi -def utf8_text(text): - ''' - Convert a possibly null string to utf-8 bytes, guaranteeing to return a non - empty, normalized bytestring. - ''' - if text and text.strip(): - text = text.strip() - if not isinstance(text, unicode): - text = text.decode('utf-8', 'replace') - text = normalize(text).encode('utf-8') - else: - text = _('Unknown').encode('utf-8') - return text - -def align_block(raw, multiple=4, pad=b'\0'): - ''' - Return raw with enough pad bytes append to ensure its length is a multiple - of 4. - ''' - extra = len(raw) % multiple - if extra == 0: return raw - return raw + pad*(multiple - extra) class CNCX(object): # {{{ @@ -98,7 +76,7 @@ class IndexEntry(object): # {{{ 'first_child_index': 22, 'last_child_index': 23, } - RTAG_MAP = dict(TAG_VALUES.itervalues(), TAG_VALUES.iterkeys()) + RTAG_MAP = {v:k for k, v in TAG_VALUES.iteritems()} BITMASKS = [1, 2, 3, 4, 5, 21, 22, 23,] @@ -186,17 +164,123 @@ class TBS(object): # {{{ trailing byte sequence for the record. ''' - def __init__(self, data, is_periodical): - if is_periodical: - self.periodical_tbs(data) + def __init__(self, data, is_periodical, first=False, all_sections=[]): + if not data: + self.bytestring = encode_trailing_data(b'') else: - self.book_tbs(data) + self.section_map = OrderedDict((i.index, i) for i in + sorted(all_sections, key=lambda x:x.offset)) - def periodical_tbs(self, data): - self.bytestring = b'' + if is_periodical: + # The starting bytes. + # The value is zero which I think indicates the periodical + # index entry. The values for the various flags seem to be + # unused. If the 0b0100 is present, it means that the record + # deals with section 1 (or is the final record with section + # transitions). + self.type_010 = encode_tbs(0, {0b0010: 0}) + self.type_011 = encode_tbs(0, {0b0010: 0, 0b0001: 0}) + self.type_110 = encode_tbs(0, {0b0100: 2, 0b0010: 0}) + self.type_111 = encode_tbs(0, {0b0100: 2, 0b0010: 0, 0b0001: 0}) - def book_tbs(self, data): - self.bytestring = b'' + depth_map = defaultdict(list) + for x in ('starts', 'ends', 'completes'): + for idx in data[x]: + depth_map[idx.depth].append(idx) + for l in depth_map.itervalues(): + l.sort(key=lambda x:x.offset) + self.periodical_tbs(data, first, depth_map) + else: + self.book_tbs(data, first) + + def periodical_tbs(self, data, first, depth_map): + buf = StringIO() + + has_section_start = (depth_map[1] and depth_map[1][0] in + data['starts']) + spanner = data['spans'] + first_node = None + for nodes in depth_map.values(): + for node in nodes: + if (first_node is None or (node.offset, node.depth) < + (first_node.offset, first_node.depth)): + first_node = node + + parent_section_index = -1 + if depth_map[0]: + # We have a terminal record + typ = (self.type_110 if has_section_start else self.type_010) + if first_node.depth > 0: + parent_section_index = (first_node.index if first_node.depth + == 1 else first_node.parent_index) + else: + if spanner is not None: + # record is spanned by a single article + parent_section_index = spanner.parent_index + typ = (self.type_110 if parent_section_index == 1 else + self.type_010) + elif not depth_map[1]: + # has only article nodes, i.e. spanned by a section + parent_section_index = self.depth_map[2][0].parent_index + typ = (self.type_111 if parent_section_index == 1 else + self.type_010) + else: + # has section transitions + parent_section_index = self.depth_map[2][0].parent_index + + buf.write(typ) + + if parent_section_index > 1: + # Write starting section information + if spanner is None: + num_articles = len(depth_map[1]) + extra = {} + if num_articles > 1: + extra = {0b0100: num_articles} + else: + extra = {0b0001: 0} + buf.write(encode_tbs(parent_section_index, extra)) + + if spanner is None: + articles = depth_map[2] + sections = [self.section_map[a.parent_index] for a in articles] + sections.sort(key=lambda x:x.offset) + section_map = {s:[a for a in articles is a.parent_index == + s.index] for s in sections} + for i, section in enumerate(sections): + # All the articles in this record that belong to section + articles = section_map[section] + first_article = articles[0] + last_article = articles[-1] + num = len(articles) + + try: + next_sec = sections[i+1] + except: + next_sec == None + + extra = {} + if num > 1: + extra[0b0100] = num + if i == 0 and next_sec is not None: + # Write offset to next section from start of record + # For some reason kindlegen only writes this offset + # for the first section transition. Imitate it. + extra[0b0001] = next_sec.offset - data['offset'] + + buf.write(encode_tbs(first_article.index-section.index, extra)) + + if next_sec is not None: + buf.write(encode_tbs(last_article.index-next_sec.index, + {0b1000: 0})) + else: + buf.write(encode_tbs(spanner.index - parent_section_index, + {0b0001: 0})) + + self.bytestring = encode_trailing_data(buf.getvalue()) + + def book_tbs(self, data, first): + self.bytestring = encode_trailing_data(b'') # }}} class Indexer(object): # {{{ @@ -548,11 +632,13 @@ class Indexer(object): # {{{ def calculate_trailing_byte_sequences(self): self.tbs_map = {} + found_node = False + sections = [i for i in self.indices if i.depth == 1] for i in xrange(self.number_of_text_records): offset = i * RECORD_SIZE next_offset = offset + RECORD_SIZE data = OrderedDict([('ends',[]), ('completes',[]), ('starts',[]), - ('spans', None)]) + ('spans', None), ('offset', offset)]) for index in self.indices: if index.offset >= next_offset: # Node starts after current record @@ -574,7 +660,13 @@ class Indexer(object): # {{{ data['ends'].append(index) else: data['spans'] = index - self.tbs_map[i+1] = TBS(data, self.is_periodical) + if (data['ends'] or data['completes'] or data['starts'] or + data['spans'] is not None): + self.tbs_map[i+1] = TBS(data, self.is_periodical, first=not + found_node, all_sections=sections) + found_node = True + else: + self.tbs_map[i+1] = TBS({}, self.is_periodical, first=False) def get_trailing_byte_sequence(self, num): return self.tbs_map[num].bytestring