diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py
index 28aacd6e07..b1276050e6 100644
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@@ -40,6 +40,7 @@ EXTH_CODES = {
'description': 103,
'identifier': 104,
'subject': 105,
+ 'pubdate': 106,
'date': 106,
'review': 107,
'contributor': 108,
@@ -219,8 +220,9 @@ class Serializer(object):
def serialize_body(self):
buffer = self.buffer
- buffer.write('
')
self.anchor_offset = buffer.tell()
+ buffer.write('')
+ self.anchor_offset_kindle = buffer.tell()
# CybookG3 'Start Reading' link
if 'text' in self.oeb.guide:
href = self.oeb.guide['text'].href
@@ -328,12 +330,16 @@ class MobiWriter(object):
self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
self._prefer_author_sort = prefer_author_sort
self._primary_index_record = None
- self._hasValideNCXEntries = False
+ self._conforming_periodical_toc = False
+ self._indexable = False
self._ctoc = ""
self._HTMLRecords = []
self._tbSequence = ""
+ self._MobiDoc = None
+ self._anchor_offset_kindle = 0
self._initialIndexRecordFound = False
-
+ self._firstSectionConcluded = False
+ self._currentSectionIndex = 0
@classmethod
def generate(cls, opts):
@@ -368,12 +374,13 @@ class MobiWriter(object):
def _generate_content(self):
self._map_image_names()
self._generate_text()
- #if INDEXING and not self.opts.no_mobi_index:
- if INDEXING and self._hasValidNCXEntries :
+
+ if INDEXING and self._indexable :
try:
self._generate_index()
except:
- self._oeb.log.exception('Failed to generate index')
+ self.oeb.log.exception('Failed to generate index')
+
self._generate_images()
def _map_image_names(self):
@@ -414,10 +421,10 @@ class MobiWriter(object):
text.seek(npos)
return data, overlap
- def _build_HTMLRecords_Data_List(self):
+ def _generate_flat_indexed_navpoints(self):
# Assemble a HTMLRecordData instance for each HTML record
# Return True if valid, False if invalid
- self._oeb.logger.info('Indexing navPoints ...')
+ self._oeb.logger.info('Indexing flat navPoints ...')
numberOfHTMLRecords = ( self._content_length // RECORD_SIZE ) + 1
@@ -436,18 +443,18 @@ class MobiWriter(object):
length = 0
entries = list(toc.iter())[1:]
-
# Get offset, length per entry
- for i, child in enumerate(entries):
-
- '''
+ for (i, child) in enumerate(entries):
if not child.title or not child.title.strip():
child.title = "(none)"
- '''
+
+ if not child.title or not child.title.strip():
+ child.title = "(none)"
+
h = child.href
if h not in self._id_offsets:
- self._oeb.log.warning('Could not find TOC entry:', child.title)
- continue
+ self._oeb.log.warning('Could not find TOC entry "%s", aborting indexing ...'% child.title)
+ return False
offset = self._id_offsets[h]
length = None
@@ -463,13 +470,21 @@ class MobiWriter(object):
if length is None:
length = self._content_length - offset
- # Look a gap between nodes
- if (i) :
+ if self.opts.verbose > 3 :
+ self._oeb.logger.info("child %03d: %s" % (i, child))
+ self._oeb.logger.info(" title: %s" % child.title)
+ self._oeb.logger.info(" depth: %d" % child.depth())
+ self._oeb.logger.info(" offset: 0x%06X \tlength: 0x%06X \tnext: 0x%06X" % (offset, length, offset + length))
+
+ # Look a gap between chapter nodes. Don't evaluate periodical or section nodes
+ if (i and child.depth() == 1 and entries[i-1].depth() == 1) :
if offset != previousOffset + previousLength :
- self._oeb.log.warning("\tnodes %d and %d have a gap:" % (i-1, i))
- self._oeb.log.warning("\tnode %d offset: 0x%X \t node %d: offset: 0x%X length: 0x%X" % \
- (i, offset, i-1, previousOffset, previousLength) )
- self._oeb.log.warning('Failed to generate index')
+ self._oeb.log.warning("*** TOC discontinuity ***")
+ self._oeb.log.warning(" node %03d: '%s' offset: 0x%X length: 0x%X" % \
+ (i-1, entries[i-1].title, previousOffset, previousLength) )
+ self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % \
+ (i, child.title, offset, previousOffset + previousLength) )
+ self._oeb.log.warning('_generate_flat_indexed_navpoints: Failed to generate index')
# Zero out self._HTMLRecords, return False
self._HTMLRecords = []
last_name = None
@@ -501,13 +516,178 @@ class MobiWriter(object):
self._HTMLRecords[interimSpanRecord].continuingNode = myIndex
self._HTMLRecords[interimSpanRecord].currentSectionNodeCount = 1
interimSpanRecord += 1
- if self.opts.verbose > 3 :self._oeb.logger.info("\tnode %03d %-15.15s... spans HTML records %03d - %03d \t offset: 0x%06X length: 0x%06X" % \
+ if self.opts.verbose > 3 :self._oeb.logger.info(" node %03d: %-15.15s... spans HTML records %03d - %03d \t offset: 0x%06X length: 0x%06X" % \
(myIndex, child.title if child.title.strip() > "" else "(missing)", myStartingRecord, interimSpanRecord, offset, length) )
else :
- if self.opts.verbose > 3 : self._oeb.logger.info("\tnode %03d %-15.15s... spans HTML records %03d - %03d \t offset: 0x%06X length: 0x%06X" % \
+ if self.opts.verbose > 3 : self._oeb.logger.info(" node %03d: %-15.15s... spans HTML records %03d - %03d \t offset: 0x%06X length: 0x%06X" % \
(myIndex, child.title if child.title.strip() > "" else "(missing)", myStartingRecord, myStartingRecord, offset, length) )
- ctoc_offset = self._ctoc_map[child]
+ last_name = "%04X" % myIndex
+ myIndex += 1
+
+ # Successfully parsed the entries
+ return True
+
+ def _generate_indexed_navpoints(self):
+ # Assemble a HTMLRecordData instance for each HTML record
+ # Return True if valid, False if invalid
+ self._oeb.logger.info('Indexing navPoints ...')
+
+ numberOfHTMLRecords = ( self._content_length // RECORD_SIZE ) + 1
+
+ # Create a list of HTMLRecordData class instances
+ x = numberOfHTMLRecords
+ while x:
+ self._HTMLRecords.append(HTMLRecordData())
+ x -= 1
+
+ toc = self._oeb.toc
+ myIndex = 0
+ myEndingRecord = 0
+ previousOffset = 0
+ previousLength = 0
+ offset = 0
+ length = 0
+ sectionChangesInRecordNumber = -1
+ sectionChangesInThisRecord = False
+ entries = list(toc.iter())[1:]
+
+ # Get offset, length per entry
+ for (firstSequentialNode, node) in enumerate(list(self._ctoc_map)) :
+ if node['klass'] != 'article' and node['klass'] != 'chapter' :
+ # Skip periodical and section entries
+ continue
+ else :
+ if self.opts.verbose > 3 :self._oeb.logger.info("\tFirst sequential node: %03d" % firstSequentialNode)
+ break
+
+ for i, child in enumerate(entries):
+ # Entries continues with a stream of section+articles, section+articles ...
+ h = child.href
+ if h not in self._id_offsets:
+ self._oeb.log.warning('Could not find TOC entry "%s", aborting indexing ...'% child.title)
+ return False
+ offset = self._id_offsets[h]
+
+ length = None
+
+ for sibling in entries[i+1:]:
+ h2 = sibling.href
+ if h2 in self._id_offsets:
+ offset2 = self._id_offsets[h2]
+ if offset2 > offset:
+ length = offset2 - offset
+ break
+
+ if length is None:
+ length = self._content_length - offset
+
+ if self.opts.verbose > 3 :
+ self._oeb.logger.info("child %03d: %s" % (i, child))
+ self._oeb.logger.info(" title: %s" % child.title)
+ self._oeb.logger.info(" depth: %d" % child.depth())
+ self._oeb.logger.info(" offset: 0x%06X \tlength: 0x%06X \tnext: 0x%06X" % (offset, length, offset + length))
+
+ # Look a gap between nodes, articles/chapters only, as
+ # periodical and section lengths cover spans of articles
+ if (i>firstSequentialNode) and self._ctoc_map[i-1]['klass'] != 'section':
+ if offset != previousOffset + previousLength :
+ self._oeb.log.warning("*** TOC discontinuity: nodes are not sequential ***")
+ self._oeb.log.warning(" node %03d: '%s' offset: 0x%X length: 0x%X" % \
+ (i-1, entries[i-1].title, previousOffset, previousLength) )
+ self._oeb.log.warning(" node %03d: '%s' offset: 0x%X != 0x%06X" % \
+ (i, child.title, offset, previousOffset + previousLength) )
+ self._oeb.log.warning("\tnode data %03d: %s" % (i-1, self._ctoc_map[i-1]) )
+ self._oeb.log.warning("\tnode data %03d: %s" % (i, self._ctoc_map[i]) )
+ self._oeb.log.warning('_generate_indexed_navpoints: Failed to generate index')
+ # Zero out self._HTMLRecords, return False
+ self._HTMLRecords = []
+ last_name = None
+ return False
+
+ previousOffset = offset
+ previousLength = length
+
+ # Calculate the HTML record for this entry
+ thisRecord = offset // RECORD_SIZE
+
+ # Store the current continuingNodeParent and openingNodeParent
+ if self._ctoc_map[i]['klass'] == 'article':
+ if thisRecord > 0 :
+ if sectionChangesInThisRecord :
+ self._HTMLRecords[thisRecord].continuingNodeParent = self._currentSectionIndex - 1
+ else :
+ self._HTMLRecords[thisRecord].continuingNodeParent = self._currentSectionIndex
+
+ # periodical header?
+ if self._ctoc_map[i]['klass'] == 'periodical' :
+ # INCREMENT currentSectionNode count
+ # Commented out because structured docs don't count section changes in nodeCount
+ # compensation at 948 for flat periodicals
+ # self._HTMLRecords[thisRecord].currentSectionNodeCount = 1
+ continue
+
+ # Is this node a new section?
+ if self._ctoc_map[i]['klass'] == 'section' :
+ # INCREMENT currentSectionNode count
+ # Commented out because structured docs don't count section changes in nodeCount
+ # self._HTMLRecords[thisRecord].currentSectionNodeCount += 1
+
+ # *** This should check currentSectionNumber, because content could start late
+ if thisRecord > 0:
+ sectionChangesInThisRecord = True
+ sectionChangesInRecordNumber = thisRecord
+ self._currentSectionIndex += 1
+ self._HTMLRecords[thisRecord].nextSectionNumber = self._currentSectionIndex
+ # The following node opens the nextSection
+ self._HTMLRecords[thisRecord].nextSectionOpeningNode = myIndex
+ continue
+ else :
+ continue
+
+ # If no one has taken the openingNode slot, it must be us
+ if self._HTMLRecords[thisRecord].openingNode == -1 :
+ self._HTMLRecords[thisRecord].openingNode = myIndex
+ self._HTMLRecords[thisRecord].openingNodeParent = self._currentSectionIndex
+
+ # Bump the nextSection node count while we're in the same record
+ if sectionChangesInRecordNumber == thisRecord :
+ if self._ctoc_map[i]['klass'] == 'article' :
+ if self._HTMLRecords[thisRecord].nextSectionNodeCount == -1:
+ self._HTMLRecords[thisRecord].nextSectionNodeCount = 1
+ else:
+ self._HTMLRecords[thisRecord].nextSectionNodeCount += 1
+ else :
+ # Bump the currentSectionNodeCount one last time
+ self._HTMLRecords[thisRecord].currentSectionNodeCount += 1
+
+ else :
+ # Reset the change record
+ sectionChangesInRecordNumber = -1
+ sectionChangesInThisRecord = False
+ if self._HTMLRecords[thisRecord].currentSectionNodeCount == -1:
+ self._HTMLRecords[thisRecord].currentSectionNodeCount = 1
+ else:
+ self._HTMLRecords[thisRecord].currentSectionNodeCount += 1
+
+ # Fill in the spanning records
+ myEndingRecord = (offset + length) // RECORD_SIZE
+ if myEndingRecord > thisRecord :
+ sectionChangesInThisRecord = False
+ interimSpanRecord = thisRecord + 1
+ while interimSpanRecord <= myEndingRecord :
+ self._HTMLRecords[interimSpanRecord].continuingNode = myIndex
+
+ self._HTMLRecords[interimSpanRecord].continuingNodeParent = self._currentSectionIndex
+ self._HTMLRecords[interimSpanRecord].currentSectionNodeCount = 1
+ interimSpanRecord += 1
+
+ if self.opts.verbose > 3 :self._oeb.logger.info("\tnode %03d %-10.10s %-15.15s... spans HTML records %03d-%03d \t offset: 0x%06X length: 0x%06X" % \
+ (myIndex, self._ctoc_map[i]['klass'], child.title if child.title.strip() > "" else "(missing)", thisRecord, interimSpanRecord, offset, length) )
+ else :
+ if self.opts.verbose > 3 : self._oeb.logger.info("\tnode %03d %-10.10s %-15.15s... spans HTML records %03d-%03d \t offset: 0x%06X length: 0x%06X" % \
+ (myIndex, self._ctoc_map[i]['klass'], child.title if child.title.strip() > "" else "(missing)", thisRecord, thisRecord, offset, length) )
+
last_name = "%04X" % myIndex
myIndex += 1
@@ -515,11 +695,9 @@ class MobiWriter(object):
return True
- def _build_TBS_Book(self, nrecords, lastrecord):
- if self.opts.verbose > 3 and False :
- self._oeb.logger.info("_build_TBS_Book: HTML record %d of %d" % (nrecords, lastrecord) )
- self._HTMLRecords[nrecords].dumpData(nrecords,self._oeb)
-
+ def _generate_tbs_book(self, nrecords, lastrecord):
+ if self.opts.verbose > 3 :self._oeb.logger.info("Assembling TBS for Book: HTML record %03d of %03d" % \
+ (nrecords, lastrecord) )
# Variables for trailing byte sequence
tbsType = 0x00
tbSequence = ""
@@ -578,11 +756,448 @@ class MobiWriter(object):
tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount)
tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)
- if self.opts.verbose > 3 and False:
- self._oeb.logger.info("record %d: tbsType %d" % (nrecords, tbsType) )
+ self._tbSequence = tbSequence
+
+
+ def _generate_tbs_flat_periodical(self, nrecords, lastrecord):
+ # Flat periodicals <0x102> have a single section for all articles
+ # Structured periodicals <0x101 | 0x103> have one or more sections with articles
+ # The first section TBS sequence is different for Flat and Structured
+ # This function is called once per HTML record
+
+ # Variables for trailing byte sequence
+ tbsType = 0x00
+ tbSequence = ""
+
+ # Generate TBS for type 0x102 - mobi_feed - flat periodical
+ if self._initialIndexRecordFound == False :
+ # Is there any indexed content yet?
+ if self._HTMLRecords[nrecords].currentSectionNodeCount == -1 :
+ # No indexing data - write vwi length of 1 only
+ tbSequence = decint(len(tbSequence) + 1, DECINT_FORWARD)
+
+ else :
+ # First indexed record: Type 6 with nodeCount only
+ self._initialIndexRecordFound = True
+ tbsType = 6
+ tbSequence = decint(tbsType, DECINT_FORWARD)
+ tbSequence += decint(0x00, DECINT_FORWARD)
+ # nodeCount = 0xDF + 0xFF + n(0x3F) - need to add 2 because we didn't count them earlier
+ tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount + 2)
+ tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD)
+ if self.opts.verbose > 2 :
+ self._oeb.logger.info("\nAssembling TBS for Flat Periodical: HTML record %03d of %03d, section %d" % \
+ (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) )
+ self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
+
+ else :
+ # An HTML record with nextSectionNumber = -1 has no section change in this record
+ # Default for flat periodicals with only one section
+ if self.opts.verbose > 2 :
+ self._oeb.logger.info("\nAssembling TBS for Flat Periodical: HTML record %03d of %03d, section %d" % \
+ (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) )
+ self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
+
+ # First section has different Type values
+ # Determine tbsType for HTMLRecords > 0
+ if nrecords == lastrecord and self._HTMLRecords[nrecords].currentSectionNodeCount == 1 :
+ # Ending record with singleton node
+ tbsType = 6
+
+ # Assemble the Type 6 TBS
+ tbSequence = decint(tbsType, DECINT_FORWARD) # Type
+ tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
+ tbSequence += chr(2) # arg2 = 0x02
+
+ # Assemble arg3 - (article index +1) << 4 + flag: 1 = article spans this record
+ arg3 = self._HTMLRecords[nrecords].continuingNode
+ arg3 += 1
+ arg3 <<= 4
+ arg3 |= 0x0 #flags = 0
+ tbSequence += decint(arg3, DECINT_FORWARD) # arg3
+
+
+ # tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount
+ tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
+
+ elif self._HTMLRecords[nrecords].continuingNode > 0 and self._HTMLRecords[nrecords].openingNode == -1 :
+ # This is a span-only record
+ tbsType = 6
+ # Zero out the nodeCount with a pre-formed vwi
+ self._HTMLRecords[nrecords].currentSectionNodeCount = 0x80
+
+ # Assemble the Type 6 TBS
+ tbSequence = decint(tbsType, DECINT_FORWARD) # Type
+ tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
+ tbSequence += chr(2) # arg2 = 0x02
+ # Assemble arg3 - article index << 3 + flag: 1 = article spans this record
+ arg3 = self._HTMLRecords[nrecords].continuingNode
+ # Add the index of the openingNodeParent to get the offset start
+ # We know that section 0 is at position 1, section 1 at index 2, etc.
+ arg3 += self._HTMLRecords[nrecords].continuingNodeParent + 1
+ arg3 <<= 4
+ arg3 |= 0x01
+ tbSequence += decint(arg3, DECINT_FORWARD) # arg3
+ tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount
+ tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
+
+ else :
+ tbsType = 7
+ # Assemble the Type 7 TBS
+ tbSequence = decint(tbsType, DECINT_FORWARD) # Type
+ tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
+ tbSequence += chr(2) # arg2 = 0x02
+ tbSequence += decint(0x00, DECINT_FORWARD) # arg3 = 0x80
+ # Assemble arg4 - article index << 4 + flag: 1 = article spans this record
+ arg4 = self._HTMLRecords[nrecords].continuingNode
+ # Add the index of the openingNodeParent to get the offset start
+ # We know that section 0 is at position 1, section 1 at index 2, etc.
+ arg4 += self._HTMLRecords[nrecords].continuingNodeParent + 1
+ arg4 <<= 4
+ arg4 |= 0x04 # 4: multiple nodes
+ tbSequence += decint(arg4, DECINT_FORWARD) # arg4
+ tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount
+ tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
self._tbSequence = tbSequence
+ def _generate_tbs_structured_periodical(self, nrecords, lastrecord):
+ # Structured periodicals <0x101 | 0x103> have one or more sections for all articles
+ # The first section TBS sequences is different for Flat and Structured
+ # This function is called once per HTML record
+
+ # Variables for trailing byte sequence
+ tbsType = 0x00
+ tbSequence = ""
+
+
+ # Generate TBS for type 0x101/0x103 - structured periodical
+ if self._initialIndexRecordFound == False :
+ # Is there any indexed content yet?
+ if self._HTMLRecords[nrecords].currentSectionNodeCount == -1 :
+ # No indexing data - write vwi length of 1 only
+ tbSequence = decint(len(tbSequence) + 1, DECINT_FORWARD)
+
+ else :
+ self._initialIndexRecordFound = True
+
+ if self.opts.verbose > 2 :
+ self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, section %d" % \
+ (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) )
+ self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
+
+ # First record only
+ tbsType = 6
+ # Assemble the Type 6 TBS
+ tbSequence = decint(tbsType, DECINT_FORWARD) # Type
+ tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
+ tbSequence += chr(2) # arg2 = 0x02
+ # Assemble arg3: (section jump + article index) << 4 + flag: 1 = article spans this record
+ arg3 = self._sectionCount # Jump over the section group
+ arg3 += 0 # First article index = 0
+ arg3 <<= 4
+ arg3 |= 0x04
+ tbSequence += decint(arg3, DECINT_FORWARD) # arg3
+
+ # Structured periodicals don't count periodical, section in nodeCount
+ #tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount - 2) # nodeCount
+ tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount
+ tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
+ else :
+ if self._firstSectionConcluded == False :
+ # Use type 6 & 7 until first section switch, then 2
+
+ if self._HTMLRecords[nrecords].nextSectionNumber == -1 :
+ # An HTML record with nextSectionNumber = -1 has no section change in this record
+ if self.opts.verbose > 2 :
+ self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, section %d" % \
+ (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) )
+ self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
+
+ # First section has different Type values
+ # Determine tbsType for HTMLRecords > 0
+ if nrecords == lastrecord and self._HTMLRecords[nrecords].currentSectionNodeCount == 1 :
+ # Ending record with singleton node
+ tbsType = 6
+
+ # Assemble the Type 6 TBS
+ tbSequence = decint(tbsType, DECINT_FORWARD) # Type
+ tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
+ tbSequence += chr(2) # arg2 = 0x02
+ # Assemble arg3: (section jump + article index) << 4 + flag: 1 = article spans this record
+ arg3 = self._sectionCount
+ arg3 += self._HTMLRecords[nrecords].continuingNode
+ arg3 <<= 4
+ arg3 |= 0x04
+ tbSequence += decint(arg3, DECINT_FORWARD) # arg3
+ tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount
+ tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
+
+ elif self._HTMLRecords[nrecords].continuingNode > 0 and self._HTMLRecords[nrecords].openingNode == -1 :
+ # This is a span-only record
+ tbsType = 6
+ # Zero out the nodeCount with a pre-formed vwi
+ self._HTMLRecords[nrecords].currentSectionNodeCount = 0x80
+
+ # Assemble the Type 6 TBS
+ tbSequence = decint(tbsType, DECINT_FORWARD) # Type
+ tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
+ tbSequence += chr(2) # arg2 = 0x02
+ # Assemble arg3: (section jump + article index) << 4 + flag: 1 = article spans this record
+ arg3 = self._sectionCount
+ arg3 += self._HTMLRecords[nrecords].continuingNode
+ arg3 <<= 4
+ arg3 |= 0x01
+ tbSequence += decint(arg3, DECINT_FORWARD) # arg3
+ tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount
+ tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
+
+ else :
+ tbsType = 7
+ # Assemble the Type 7 TBS
+ tbSequence = decint(tbsType, DECINT_FORWARD) # Type
+ tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
+ tbSequence += chr(2) # arg2 = 0x02
+ tbSequence += decint(0x00, DECINT_FORWARD) # arg3 = 0x80
+ # Assemble arg4: (section jump + article index) << 4 + flag: 1 = article spans this record
+ arg4 = self._sectionCount
+ arg4 += self._HTMLRecords[nrecords].continuingNode
+ arg4 <<= 4
+ arg4 |= 0x04 # 4: multiple nodes
+ tbSequence += decint(arg4, DECINT_FORWARD) # arg4
+ tbSequence += chr(self._HTMLRecords[nrecords].currentSectionNodeCount) # nodeCount
+ tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
+
+
+ # Initial section switch from section 1
+ elif self._HTMLRecords[nrecords].nextSectionNumber > 0 :
+ tbsType = 3
+
+ if self.opts.verbose > 2 :
+ self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, switching sections %d-%d" % \
+ (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent, self._HTMLRecords[nrecords].nextSectionNumber) )
+ self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
+
+ tbSequence = decint(tbsType, DECINT_FORWARD) # Type
+ tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
+ tbSequence += decint(0x00, DECINT_FORWARD) # arg2 = 0x80
+
+ # Assemble arg3: Upper nybble: ending section index
+ # Lower nybble = flags for next section - 0 or 1
+ arg3 = (self._HTMLRecords[nrecords].continuingNodeParent + 1) << 4
+ arg3Flags = 0 # 0: has nodes?
+ arg3 |= arg3Flags
+ tbSequence += decint(arg3, DECINT_FORWARD)
+
+ # Assemble arg4: Upper nybble: continuingNode << 4
+ # Lower nybble: flag: 0 = no starting nodes from previous section
+ # flag: 4 = starting nodes from previous section
+
+ sectionBase = self._HTMLRecords[nrecords].continuingNodeParent
+ sectionDelta = self._sectionCount - sectionBase - 1
+ articleOffset = self._HTMLRecords[nrecords].continuingNode + 1
+ arg4 = (sectionDelta + articleOffset) << 4
+
+ arg4Flags = 0
+ if self._HTMLRecords[nrecords].currentSectionNodeCount > 1 :
+ arg4Flags = 4
+ else :
+ arg4Flags = 0
+ arg4 |= arg4Flags
+ tbSequence += decint(arg4, DECINT_FORWARD) # arg4
+
+ # Write optional 4a if previous section node count > 1
+ if arg4Flags == 4 : # arg4a
+ nodeCountValue = self._HTMLRecords[nrecords].currentSectionNodeCount
+ nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue
+ tbSequence += chr(nodeCountValue)
+
+ # Write article2: not completely understood
+ arg5 = sectionDelta + articleOffset
+ if self._HTMLRecords[nrecords].currentSectionNodeCount < 2:
+ arg5 -= 1
+ arg5 <<= 4
+ arg5Flags = 8
+ arg5 |= arg5Flags
+ tbSequence += decint(arg5, DECINT_FORWARD) # arg5
+
+ # Write first article of new section
+ #arg6 = self._sectionCount - 1 # We're now into the following section
+ #arg6 = self._HTMLRecords[nrecords].nextSectionNumber
+ arg6 = sectionDelta + self._HTMLRecords[nrecords].nextSectionOpeningNode
+ arg6 <<= 4
+ if self._HTMLRecords[nrecords].nextSectionNodeCount > 1 :
+ arg6Flags = 4
+ else :
+ arg6Flags = 0
+ arg6 |= arg6Flags
+ tbSequence += decint(arg6, DECINT_FORWARD) # arg5
+
+ # Write optional 6a if previous section node count > 1
+ if arg6Flags == 4 : # arg4a
+ nodeCountValue = self._HTMLRecords[nrecords].nextSectionNodeCount
+ nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue
+ tbSequence += chr(nodeCountValue)
+
+ tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
+
+ self._firstSectionConcluded = True
+ else :
+ # After first section switch, use types 2 and 3
+ if self._HTMLRecords[nrecords].nextSectionNumber == -1 :
+ if self.opts.verbose > 2 :
+ self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, section %d" % \
+ (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent ) )
+ self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
+
+ tbsType = 2
+ tbSequence = decint(tbsType, DECINT_FORWARD) # Type
+ tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
+ arg2 = self._HTMLRecords[nrecords].continuingNodeParent + 1
+ arg2 <<= 4
+ # Add flag = 1 if there are multiple nodes in this record
+ arg2Flags = 0
+ if self._HTMLRecords[nrecords].currentSectionNodeCount > 0 :
+ arg2Flags = 1
+ arg2 |= arg2Flags
+ tbSequence += decint(arg2, DECINT_FORWARD)
+
+ if arg2Flags :
+ # Add an extra vwi 0x00
+ tbSequence += decint(0x00, DECINT_FORWARD) # arg2Flags = 0x80
+
+ # arg3 - offset of continuingNode from sectionParent
+ arg3 = self._sectionCount - self._HTMLRecords[nrecords].continuingNodeParent # Total guess
+ arg3 += self._HTMLRecords[nrecords].continuingNode
+ arg3 <<= 4
+ arg3Flags = 1
+ if self._HTMLRecords[nrecords].currentSectionNodeCount > 0 :
+ arg3Flags = 4
+ arg3 |= arg3Flags
+ tbSequence += decint(arg3, DECINT_FORWARD)
+
+ if arg3Flags == 4 :
+ nodeCountValue = self._HTMLRecords[nrecords].currentSectionNodeCount
+ nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue
+ tbSequence += chr(nodeCountValue)
+ else :
+ tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
+
+ tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
+
+ else :
+ # Section switch when section > 1
+ tbsType = 3
+
+ if self.opts.verbose > 2 :
+ self._oeb.logger.info("\nAssembling TBS for Structured Periodical: HTML record %03d of %03d, switching sections %d-%d" % \
+ (nrecords, lastrecord, self._HTMLRecords[nrecords].continuingNodeParent, self._HTMLRecords[nrecords].nextSectionNumber) )
+ self._HTMLRecords[nrecords].dumpData(nrecords, self._oeb)
+
+ tbSequence = decint(tbsType, DECINT_FORWARD) # Type
+ tbSequence += decint(0x00, DECINT_FORWARD) # arg1 = 0x80
+ tbSequence += decint(0x00, DECINT_FORWARD) # arg2 = 0x80
+
+ # arg3: continuingNodeParent section
+ # Upper nybble: ending section index
+ # Lower nybble = flags for next section - 0 or 1
+ arg3 = (self._HTMLRecords[nrecords].continuingNodeParent + 1) << 4
+ arg3Flags = 0 # 0: has nodes?
+ arg3 |= arg3Flags
+ tbSequence += decint(arg3, DECINT_FORWARD)
+
+ # Assemble arg4: Upper nybble: continuingNode << 4
+ # Lower nybble: flag: 0 = no starting nodes from previous section
+ # flag: 4 = starting nodes from previous section
+ sectionBase = self._HTMLRecords[nrecords].continuingNodeParent
+ sectionDelta = self._sectionCount - sectionBase - 1
+ articleOffset = self._HTMLRecords[nrecords].continuingNode + 1
+ arg4 = (sectionDelta + articleOffset) << 4
+
+ arg4Flags = 0
+ if self._HTMLRecords[nrecords].currentSectionNodeCount > 1 :
+ arg4Flags = 4
+ else :
+ arg4Flags = 0
+ arg4 |= arg4Flags
+ tbSequence += decint(arg4, DECINT_FORWARD) # arg4
+
+ # Write optional 4a if previous section node count > 1
+ if arg4Flags == 4 : # arg4a
+ nodeCountValue = self._HTMLRecords[nrecords].currentSectionNodeCount
+ nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue
+ tbSequence += chr(nodeCountValue)
+
+ # Write article2: not completely understood
+ arg5 = sectionDelta + articleOffset
+ if self._HTMLRecords[nrecords].currentSectionNodeCount < 2:
+ arg5 -= 1
+ arg5 <<= 4
+ arg5Flags = 8
+ arg5 |= arg5Flags
+ tbSequence += decint(arg5, DECINT_FORWARD) # arg5
+
+ # Write first article of new section
+ arg6 = sectionDelta + self._HTMLRecords[nrecords].nextSectionOpeningNode
+ arg6 <<= 4
+ if self._HTMLRecords[nrecords].nextSectionNodeCount > 1 :
+ arg6Flags = 4
+ else :
+ arg6Flags = 0
+ arg6 |= arg6Flags
+ tbSequence += decint(arg6, DECINT_FORWARD) # arg5
+
+ # Write optional 6a if previous section node count > 1
+ if arg6Flags == 4 : # arg4a
+ nodeCountValue = self._HTMLRecords[nrecords].nextSectionNodeCount
+ nodeCountValue = 0x80 if nodeCountValue == 0 else nodeCountValue
+ tbSequence += chr(nodeCountValue)
+
+ tbSequence += decint(len(tbSequence) + 1, DECINT_FORWARD) # len
+
+ self._tbSequence = tbSequence
+
+ def _evaluate_periodical_toc(self):
+ '''
+ Periodical:
+ depth=4
+ depth=3 1
+ depth=2 1 or more
+ depth=1 multiple
+ Book:
+ depth=2
+ depth=1 multiple
+ '''
+ toc = self._oeb.toc
+ nodes = list(toc.iter())[1:]
+ for (i, child) in enumerate(nodes) :
+ if self.opts.verbose > 3 :
+ self._oeb.logger.info(" : %-25.25s \tklass=%-15.15s \tdepth:%d playOrder=%03d" % \
+ (child.title, child.klass, child.depth(), child.play_order) )
+
+ if child.klass == "periodical" and child.depth() != 3 :
+ self._oeb.logger.info(' found at depth %d, nonconforming TOC' % \
+ child.depth() )
+ return False
+
+ if child.klass == "section" and child.depth() != 2 :
+ self._oeb.logger.info(' found at depth %d, nonconforming TOC' % \
+ child.depth() )
+ return False
+
+ if child.klass == "article" and child.depth() != 1 :
+ self._oeb.logger.info(' found at depth %d, nonconforming TOC' % \
+ child.depth() )
+ return False
+
+ # We also need to know that we have a pubdate or timestamp in the metadata, which the Kindle needs
+ if self._oeb.metadata['date'] == [] and self._oeb.metadata['timestamp'] == [] :
+ self._oeb.logger.info('metadata missing timestamp needed for periodical')
+ return False
+
+ self._oeb.logger.info('TOC structure and pubdate verified')
+ return True
+
def _generate_text(self):
self._oeb.logger.info('Serializing markup content...')
@@ -590,6 +1205,7 @@ class MobiWriter(object):
write_page_breaks_after_item=self.write_page_breaks_after_item)
breaks = serializer.breaks
text = serializer.text
+ self._anchor_offset_kindle = serializer.anchor_offset_kindle
self._id_offsets = serializer.id_offsets
self._content_length = len(text)
self._text_length = len(text)
@@ -597,47 +1213,76 @@ class MobiWriter(object):
buf = []
nrecords = 0
lastrecord = (self._content_length // RECORD_SIZE )
-
offset = 0
if self._compression != UNCOMPRESSED:
self._oeb.logger.info('Compressing markup content...')
data, overlap = self._read_text_record(text)
- # We need entries[] before calling self._build_HTMLRecords_Data_List()
+ # Evaluate toc for conformance
+ if self.opts.mobi_periodical :
+ self._oeb.logger.info('--mobi-periodical specified, evaluating TOC for periodical conformance ...')
+ self._conforming_periodical_toc = self._evaluate_periodical_toc()
+
+ # This routine decides whether to build flat or structured based on self._conforming_periodical_toc
self._ctoc = self._generate_ctoc()
# Build the HTMLRecords list so we can assemble the trailing bytes sequences in the following while loop
toc = self._oeb.toc
entries = list(toc.iter())[1:]
+
if len(entries) :
- self._hasValidNCXEntries = self._build_HTMLRecords_Data_List()
+ self._indexable = self._generate_indexed_navpoints()
else :
- self._hasValidNCXEntries = False
+ self._oeb.logger.info('No entries found in TOC ...')
+ self._indexable = False
+
+ if not self._indexable :
+ self._oeb.logger.info('Writing unindexed mobi ...')
while len(data) > 0:
if self._compression == PALMDOC:
data = compress_doc(data)
record = StringIO()
record.write(data)
+
+ # Marshall's utf-8 break code.
record.write(overlap)
record.write(pack('>B', len(overlap)))
nextra = 0
pbreak = 0
running = offset
+ while breaks and (breaks[0] - offset) < RECORD_SIZE:
+ # .pop returns item, removes it from list
+ pbreak = (breaks.pop(0) - running) >> 3
+ self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) )
+ encoded = decint(pbreak, DECINT_FORWARD)
+ record.write(encoded)
+ running += pbreak << 3
+ nextra += len(encoded)
+
+ lsize = 1
+ while True:
+ size = decint(nextra + lsize, DECINT_BACKWARD)
+ if len(size) == lsize:
+ break
+ lsize += 1
+
+ record.write(size)
+
# Write Trailing Byte Sequence
- if INDEXING and self._hasValidNCXEntries:
+ if INDEXING and self._indexable:
# Dispatch to different TBS generators based upon publication type
- booktype = 0x101 if self.opts.mobi_periodical else 0x002
+ booktype = self._MobiDoc.mobiType
if booktype == 0x002 :
- self._build_TBS_Book(nrecords, lastrecord)
- #elif booktype == flatPeriodical :
- # tbSequence = self._build_TBS_FlatPeriodicalTBS()
- #elif booktype == structuredPeriodical :
- # tbSequence = self._build_TBS_StructuredPeriodicalTBS()
+ self._generate_tbs_book(nrecords, lastrecord)
+ elif booktype == 0x102 :
+ self._generate_tbs_flat_periodical(nrecords, lastrecord)
+ elif booktype == 0x101 or booktype == 0x103 :
+ self._generate_tbs_structured_periodical(nrecords, lastrecord)
else :
- raise NotImplementedError('Indexing for periodicals not implemented')
+ raise NotImplementedError('Indexing for mobitype 0x%X not implemented' % booktype)
# Dump the current HTML Record Data / TBS
# GR diagnostics
@@ -651,27 +1296,6 @@ class MobiWriter(object):
# Write the sequence
record.write(self._tbSequence)
- else :
- # Marshall's original code
- while breaks and (breaks[0] - offset) < RECORD_SIZE:
- # .pop returns item, removes it from list
- pbreak = (breaks.pop(0) - running) >> 3
- self._oeb.logger.info('pbreak = 0x%X' % pbreak )
- encoded = decint(pbreak, DECINT_FORWARD)
- record.write(encoded)
- running += pbreak << 3
- nextra += len(encoded)
-
- lsize = 1
- while True:
- size = decint(nextra + lsize, DECINT_BACKWARD)
- if len(size) == lsize:
- break
- lsize += 1
-
- # Writing vwi length byte here
- record.write(size)
-
self._records.append(record.getvalue())
buf.append(self._records[-1])
nrecords += 1
@@ -686,262 +1310,6 @@ class MobiWriter(object):
nrecords += 1
self._text_nrecords = nrecords
- def _generate_indxt(self, ctoc):
-
- if self.opts.mobi_periodical:
- raise NotImplementedError('Indexing for periodicals not implemented')
- toc = self._oeb.toc
- indxt, indices, c = StringIO(), StringIO(), 0
-
- indices.write('IDXT')
- c = 0
- last_name = None
-
- def add_node(node, offset, length, count):
- if self.opts.verbose > 2:
- self._oeb.log.debug('Adding TOC node:', node.title, 'href:',
- node.href)
-
- pos = 0xc0 + indxt.tell()
- indices.write(pack('>H', pos))
- name = "%04X"%count
- indxt.write(chr(len(name)) + name)
- indxt.write(INDXT['chapter'])
- indxt.write(decint(offset, DECINT_FORWARD))
- indxt.write(decint(length, DECINT_FORWARD))
- indxt.write(decint(self._ctoc_map[node], DECINT_FORWARD))
- indxt.write(decint(0, DECINT_FORWARD))
-
-
- entries = list(toc.iter())[1:]
- for i, child in enumerate(entries):
- h = child.href
- if h not in self._id_offsets:
- self._oeb.log.warning('Could not find TOC entry:', child.title)
- continue
- offset = self._id_offsets[h]
- length = None
- for sibling in entries[i+1:]:
- h2 = sibling.href
- if h2 in self._id_offsets:
- offset2 = self._id_offsets[h2]
- if offset2 > offset:
- length = offset2 - offset
- break
- if length is None:
- length = self._content_length - offset
-
- add_node(child, offset, length, c)
- ctoc_offset = self._ctoc_map[child]
- last_name = "%04X"%c
- c += 1
-
- return align_block(indxt.getvalue()), c, \
- align_block(indices.getvalue()), last_name
-
-
- def _generate_index(self):
- self._oeb.log('Generating primary index...')
- self._primary_index_record = None
-
- indxt, indxt_count, indices, last_name = \
- self._generate_indxt(self._ctoc)
- if last_name is None:
- self._oeb.log.warn('Input document has no TOC. No index generated.')
- return
-
- # GR: indx0 => INDX0[0]
- # indx1 => INDX1[0]
- indx1 = StringIO()
- indx1.write('INDX'+pack('>I', 0xc0)) # header length
-
- # 0x8 - 0xb : Unknown
- indx1.write('\0'*4)
-
- # 0xc - 0xf : Header type
- indx1.write(pack('>I', 1))
-
- # 0x10 - 0x13 : Unknown
- indx1.write('\0'*4)
-
- # 0x14 - 0x17 : IDXT offset
- # 0x18 - 0x1b : IDXT count
- indx1.write(pack('>I', 0xc0+len(indxt)))
- indx1.write(pack('>I', indxt_count))
-
- # 0x1c - 0x23 : Unknown
- indx1.write('\xff'*8)
-
- # 0x24 - 0xbf
- indx1.write('\0'*156)
- indx1.write(indxt)
- indx1.write(indices)
- indx1 = indx1.getvalue()
-
- idxt0 = chr(len(last_name)) + last_name + pack('>H', indxt_count + 1)
- idxt0 = align_block(idxt0)
- indx0 = StringIO()
-
- tagx = TAGX['periodical' if self.opts.mobi_periodical else 'chapter']
- tagx = align_block('TAGX' + pack('>I', 8 + len(tagx)) + tagx)
- indx0_indices_pos = 0xc0 + len(tagx) + len(idxt0)
- indx0_indices = align_block('IDXT' + pack('>H', 0xc0 + len(tagx)))
- # Generate record header
- header = StringIO()
-
- header.write('INDX')
- header.write(pack('>I', 0xc0)) # header length
-
- # 0x08 - 0x0b : Unknown
- header.write('\0'*4)
-
- # 0x0c - 0x0f : Header type
- header.write(pack('>I', 0))
-
- # 0x10 - 0x13 : Generator ID
- header.write(pack('>I', 6))
-
- # 0x14 - 0x17 : IDXT offset
- header.write(pack('>I', indx0_indices_pos))
-
- # 0x18 - 0x1b : IDXT count
- header.write(pack('>I', 1))
-
- # 0x1c - 0x1f : Text encoding ?
- # header.write(pack('>I', 650001))
- # GR: This needs to be either 0xFDE9 or 0x4E4
- header.write(pack('>I', 0xFDE9))
-
- # 0x20 - 0x23 : Language code?
- header.write(iana2mobi(str(self._oeb.metadata.language[0])))
-
- # 0x24 - 0x27 : Number of TOC entries in INDX1
- header.write(pack('>I', indxt_count + 1))
-
- # 0x28 - 0x2b : ORDT Offset
- header.write('\0'*4)
-
- # 0x2c - 0x2f : LIGT offset
- header.write('\0'*4)
-
- # 0x30 - 0x33 : Number of LIGT entries
- header.write('\0'*4)
-
- # 0x34 - 0x37 : Unknown
- header.write(pack('>I', 1))
-
- # 0x38 - 0xb3 : Unknown (pad?)
- header.write('\0'*124)
-
- # 0xb4 - 0xb7 : TAGX offset
- header.write(pack('>I', 0xc0))
-
- # 0xb8 - 0xbf : Unknown
- header.write('\0'*8)
-
- header = header.getvalue()
-
- indx0.write(header)
- indx0.write(tagx)
- indx0.write(idxt0)
- indx0.write(indx0_indices)
- indx0 = indx0.getvalue()
-
- self._primary_index_record = len(self._records)
- self._records.extend([indx0, indx1, self._ctoc])
-
- # Turn this off for now
- if False:
- # Write secondary index records
- tagx = TAGX['secondary_'+\
- ('periodical' if self.opts.mobi_periodical else 'book')]
- tagx_len = 8 + len(tagx)
-
- indx0 = StringIO()
- indx0.write('INDX'+pack('>I', 0xc0)+'\0'*8)
- indx0.write(pack('>I', 0x02))
- indx0.write(pack('>I', 0xc0+tagx_len+4))
- indx0.write(pack('>I', 1))
- indx0.write(pack('>I', 65001))
- indx0.write('\xff'*4)
- indx0.write(pack('>I', 1))
- indx0.write('\0'*4)
- indx0.write('\0'*136)
- indx0.write(pack('>I', 0xc0))
- indx0.write('\0'*8)
- indx0.write('TAGX'+pack('>I', tagx_len)+tagx)
- if self.opts.mobi_periodical:
- raise NotImplementedError
- else:
- indx0.write('\0'*3 + '\x01' + 'IDXT' + '\0\xd4\0\0')
- indx1 = StringIO()
- indx1.write('INDX' + pack('>I', 0xc0) + '\0'*4)
- indx1.write(pack('>I', 1))
- extra = 0xf0 if self.opts.mobi_periodical else 4
- indx1.write('\0'*4 + pack('>I', 0xc0+extra))
- num = 4 if self.opts.mobi_periodical else 1
- indx1.write(pack('>I', num))
- indx1.write('\xff'*8)
- indx1.write('\0'*(0xc0-indx1.tell()))
- if self.opts.mobi_periodical:
- raise NotImplementedError
- else:
- indx1.write('\0\x01\x80\0')
- indx1.write('IDXT')
- if self.opts.mobi_periodical:
- raise NotImplementedError
- else:
- indx1.write('\0\xc0\0\0')
-
- indx0, indx1 = indx0.getvalue(), indx1.getvalue()
- self._records.extend((indx0, indx1))
- if self.opts.verbose > 3:
- from tempfile import mkdtemp
- import os
- t = mkdtemp()
- for i, n in enumerate(['sindx1', 'sindx0', 'ctoc', 'indx0', 'indx1']):
- open(os.path.join(t, n+'.bin'), 'wb').write(self._records[-(i+1)])
- self._oeb.log.debug('Index records dumped to', t)
-
-
-
-
- def _generate_ctoc(self):
- if self.opts.mobi_periodical:
- raise NotImplementedError('Indexing for periodicals not implemented')
- self._oeb.logger.info('Generating CTOC ...')
-
- toc = self._oeb.toc
- self._ctoc_map = {}
- self._ctoc_name_map = {}
- self._last_toc_entry = None
- ctoc = StringIO()
-
- def add_node(node, cls, title=None):
- t = node.title if title is None else title
-
- if t and t.strip():
- t = t.strip()
- if not isinstance(t, unicode):
- t = t.decode('utf-8', 'replace')
- t = t.encode('utf-8')
- self._last_toc_entry = t
- self._ctoc_map[node] = ctoc.tell()
- self._ctoc_name_map[node] = t
- ctoc.write(decint(len(t), DECINT_FORWARD)+t)
- else :
- t = "(none)".encode('utf-8')
- self._last_toc_entry = t
- self._ctoc_map[node] = ctoc.tell()
- self._ctoc_name_map[node] = t
- ctoc.write(decint(len(t), DECINT_FORWARD)+t)
-
- first = True
- for child in toc.iter():
- add_node(child, 'chapter')#, title='Title Page' if first else None)
- first = False
-
- return align_block(ctoc.getvalue())
def _generate_images(self):
self._oeb.logger.info('Serializing images...')
@@ -981,7 +1349,7 @@ class MobiWriter(object):
metadata = self._oeb.metadata
exth = self._build_exth()
last_content_record = len(self._records) - 1
- if INDEXING:
+ if INDEXING and self._indexable:
self._generate_end_records()
record0 = StringIO()
# The PalmDOC Header
@@ -1005,7 +1373,9 @@ class MobiWriter(object):
# 0xC - 0xF : Text encoding (65001 is utf-8)
# 0x10 - 0x13 : UID
# 0x14 - 0x17 : Generator version
- btype = 0x101 if self.opts.mobi_periodical else 2
+
+ btype = self._MobiDoc.mobiType
+
record0.write(pack('>IIIII',
0xe8, btype, 65001, uid, 6))
@@ -1014,12 +1384,13 @@ class MobiWriter(object):
# 0x20 - 0x23 : Secondary index record
- # Turned off as it seems unnecessary
- if True:
+ if btype < 0x100 :
record0.write(pack('>I', 0xffffffff))
- else:
+ elif btype > 0x100 and self._indexable :
record0.write(pack('>I', 0xffffffff if self._primary_index_record is
None else self._primary_index_record+3))
+ else :
+ record0.write(pack('>I', 0xffffffff))
# 0x24 - 0x3f : Unknown
record0.write('\xff' * 28)
@@ -1113,12 +1484,12 @@ class MobiWriter(object):
# 0xe0 - 0xe3 : Extra record data
# The '5' is a bitmask of extra record data at the end:
# - 0x1: (?)
- # - 0x2: GR
+ # - 0x2: GR
# - 0x4:
# Of course, the formats aren't quite the same.
- # GR: Use 2 for indexed files
- if INDEXING :
- record0.write(pack('>I', 2))
+ # GR: Use 7 for indexed files, 5 for unindexed
+ if INDEXING and self._indexable :
+ record0.write(pack('>I', 7))
else:
record0.write(pack('>I', 5))
@@ -1158,6 +1529,24 @@ class MobiWriter(object):
exth.write(pack('>II', code, len(data) + 8))
exth.write(data)
nrecs += 1
+ if term == 'rights' :
+ rights = unicode(oeb.metadata.rights[0])
+ exth.write(pack('>II', EXTH_CODES['rights'], len(rights) + 8))
+ exth.write(rights)
+
+ # Add a publication date entry
+ if oeb.metadata['date'] != [] :
+ datestr = str(oeb.metadata['date'][0])
+ elif oeb.metadata['timestamp'] != [] :
+ datestr = str(oeb.metadata['timestamp'][0])
+
+ if datestr is not None:
+ exth.write(pack('>II',EXTH_CODES['pubdate'], len(datestr) + 8))
+ exth.write(datestr)
+ nrecs += 1
+ else:
+ raise NotImplementedError("missing date or timestamp needed for mobi_periodical")
+
if oeb.metadata.cover:
id = unicode(oeb.metadata.cover[0])
item = oeb.manifest.ids[id]
@@ -1171,13 +1560,6 @@ class MobiWriter(object):
exth.write(pack('>III', 0xca, 0x0c, index - 1))
nrecs += 1
- # Not sure what these are, but not needed for indexing
- if False :
- # Write unknown EXTH records as 0s
- for code, size in [(204,4), (205,4), (206,4), (207,4), (300,40)]:
- exth.write(pack('>II', code, 8+size)+'\0'*size)
- nrecs += 1
-
exth = exth.getvalue()
trail = len(exth) % 4
pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte
@@ -1216,8 +1598,825 @@ class MobiWriter(object):
for record in self._records:
self._write(record)
+ def _generate_index(self):
+ self._oeb.log('Generating primary index ...')
+ self._primary_index_record = None
+
+ # Build the NCXEntries and INDX
+ indxt, indxt_count, indices, last_name = \
+ self._generate_indxt(self._ctoc)
+
+ if last_name is None:
+ self._oeb.log.warn('Input document has no TOC. No index generated.')
+ return
+
+ # Assemble the INDX0[0] and INDX1[0] output streams
+ indx1 = StringIO()
+ indx1.write('INDX'+pack('>I', 0xc0)) # header length
+
+ # 0x8 - 0xb : Unknown
+ indx1.write('\0'*4)
+
+ # 0xc - 0xf : Header type
+ indx1.write(pack('>I', 1))
+
+ # 0x10 - 0x13 : Unknown
+ indx1.write('\0'*4)
+
+ # 0x14 - 0x17 : IDXT offset
+ # 0x18 - 0x1b : IDXT count
+ indx1.write(pack('>I', 0xc0+len(indxt)))
+ indx1.write(pack('>I', indxt_count + 1))
+
+ # 0x1c - 0x23 : Unknown
+ indx1.write('\xff'*8)
+
+ # 0x24 - 0xbf
+ indx1.write('\0'*156)
+ indx1.write(indxt)
+ indx1.write(indices)
+ indx1 = indx1.getvalue()
+
+ idxt0 = chr(len(last_name)) + last_name + pack('>H', indxt_count + 1)
+ idxt0 = align_block(idxt0)
+ indx0 = StringIO()
+
+ if self._MobiDoc.mobiType == 0x002 :
+ tagx = TAGX['chapter']
+ else :
+ tagx = TAGX['periodical']
+
+ tagx = align_block('TAGX' + pack('>I', 8 + len(tagx)) + tagx)
+ indx0_indices_pos = 0xc0 + len(tagx) + len(idxt0)
+ indx0_indices = align_block('IDXT' + pack('>H', 0xc0 + len(tagx)))
+ # Generate record header
+ header = StringIO()
+
+ header.write('INDX')
+ header.write(pack('>I', 0xc0)) # header length
+
+ # 0x08 - 0x0b : Unknown
+ header.write('\0'*4)
+
+ # 0x0c - 0x0f : Header type
+ header.write(pack('>I', 0))
+
+ # 0x10 - 0x13 : Generator ID
+ header.write(pack('>I', 6))
+
+ # 0x14 - 0x17 : IDXT offset
+ header.write(pack('>I', indx0_indices_pos))
+
+ # 0x18 - 0x1b : IDXT count
+ header.write(pack('>I', 1))
+
+ # 0x1c - 0x1f : Text encoding ?
+ # header.write(pack('>I', 650001))
+ # GR: This needs to be either 0xFDE9 or 0x4E4
+ header.write(pack('>I', 0xFDE9))
+
+ # 0x20 - 0x23 : Language code?
+ header.write(iana2mobi(str(self._oeb.metadata.language[0])))
+
+ # 0x24 - 0x27 : Number of TOC entries in INDX1
+ header.write(pack('>I', indxt_count + 1))
+
+ # 0x28 - 0x2b : ORDT Offset
+ header.write('\0'*4)
+
+ # 0x2c - 0x2f : LIGT offset
+ header.write('\0'*4)
+
+ # 0x30 - 0x33 : Number of LIGT entries
+ header.write('\0'*4)
+
+ # 0x34 - 0x37 : Unknown
+ header.write(pack('>I', 1))
+
+ # 0x38 - 0xb3 : Unknown (pad?)
+ header.write('\0'*124)
+
+ # 0xb4 - 0xb7 : TAGX offset
+ header.write(pack('>I', 0xc0))
+
+ # 0xb8 - 0xbf : Unknown
+ header.write('\0'*8)
+
+ header = header.getvalue()
+
+ indx0.write(header)
+ indx0.write(tagx)
+ indx0.write(idxt0)
+ indx0.write(indx0_indices)
+ indx0 = indx0.getvalue()
+
+ self._primary_index_record = len(self._records)
+ self._records.extend([indx0, indx1, self._ctoc])
+
+ # Indexing for author/description fields in summary section
+ # Test for indexed periodical - only one that needs secondary index
+ if self._MobiDoc.mobiType > 0x100 :
+ # Write secondary index records
+ #tagx = TAGX['secondary_'+\
+ # ('periodical' if self.opts.mobi_periodical else 'book')]
+ tagx = TAGX['secondary_'+'periodical']
+ tagx_len = 8 + len(tagx)
+
+ # generate secondary INDX0
+ indx0 = StringIO()
+ indx0.write('INDX'+pack('>I', 0xc0)+'\0'*8) # header + 8x00
+ indx0.write(pack('>I', 0x06)) # generator ID
+ indx0.write(pack('>I', 0xe8)) # IDXT offset
+ indx0.write(pack('>I', 1)) # IDXT entries
+ indx0.write(pack('>I', 65001)) # encoding
+ indx0.write('\xff'*4) # language
+ indx0.write(pack('>I', 4)) # IDXT Entries in INDX1
+ indx0.write('\0'*4) # ORDT Offset
+ indx0.write('\0'*136) # everything up to TAGX offset
+ indx0.write(pack('>I', 0xc0)) # TAGX offset
+ indx0.write('\0'*8) # unknowns
+ indx0.write('TAGX'+pack('>I', tagx_len)+tagx) # TAGX
+ indx0.write('\x0D'+'mastheadImage' + '\x00\x04') # mastheadImage
+ indx0.write('IDXT'+'\x00\xd8\x00\x00') # offset plus pad
+
+ # generate secondary INDX1
+ indx1 = StringIO()
+ indx1.write('INDX' + pack('>I', 0xc0) + '\0'*4) # header + 4x00
+ indx1.write(pack('>I', 1)) # blockType 1
+ indx1.write(pack('>I', 0x00)) # unknown
+ indx1.write('\x00\x00\x00\xF0') # IDXT offset
+ indx1.write(pack('>I', 4)) # num of IDXT entries
+ indx1.write('\xff'*8) # encoding, language
+ indx1.write('\0'*(0xc0-indx1.tell())) # 00 to IDXT Entries @ 0xC0
+ indx1.write('\0\x01\x80') # 1 - null
+ indx1.write('\x06'+'author' + '\x02\x80\x80\xc7') # author
+ indx1.write('\x0B'+'description' + '\x02\x80\x80\xc6') # description
+ indx1.write('\x0D'+'mastheadImage' + '\x02\x85\x80\xc5') # mastheadImage
+ indx1.write('IDXT'+'\x00\xc0\x00\xc3\x00\xce\x00\xde') # IDXT header
+
+ # Write INDX0 and INDX1 to the stream
+ indx0, indx1 = indx0.getvalue(), indx1.getvalue()
+ self._records.extend((indx0, indx1))
+ if self.opts.verbose > 3:
+ from tempfile import mkdtemp
+ import os
+ t = mkdtemp()
+ for i, n in enumerate(['sindx1', 'sindx0', 'ctoc', 'indx0', 'indx1']):
+ open(os.path.join(t, n+'.bin'), 'wb').write(self._records[-(i+1)])
+ self._oeb.log.debug('Index records dumped to', t)
+
+ def _clean_text_value(self, text):
+ if text and text.strip():
+ text = text.strip()
+ if not isinstance(text, unicode):
+ text = text.decode('utf-8', 'replace')
+ text = text.encode('utf-8')
+ else :
+ text = "(none)".encode('utf-8')
+ return text
+
+ def _add_flat_ctoc_node(self, node, ctoc, title=None):
+ # Process 'chapter' or 'article' nodes only, force either to 'chapter'
+ t = node.title if title is None else title
+ t = self._clean_text_value(t)
+ self._last_toc_entry = t
+
+ # Create an empty dictionary for this node
+ ctoc_name_map = {}
+
+ # article = chapter
+ if node.klass == 'article' :
+ ctoc_name_map['klass'] = 'chapter'
+ else :
+ ctoc_name_map['klass'] = node.klass
+
+ # Add title offset to name map
+ ctoc_name_map['titleOffset'] = ctoc.tell()
+ ctoc.write(decint(len(t), DECINT_FORWARD)+t)
+ self._chapterCount += 1
+
+ # append this node's name_map to map
+ self._ctoc_map.append(ctoc_name_map)
+
+ return
+
+
+ def _add_structured_ctoc_node(self, node, ctoc, title=None):
+ # Process 'periodical', 'section' and 'article'
+ if node.klass is None :
+ return
+
+ t = node.title if title is None else title
+ t = self._clean_text_value(t)
+ self._last_toc_entry = t
+
+ # Create an empty dictionary for this node
+ ctoc_name_map = {}
+
+ # Add the klass of this node
+ ctoc_name_map['klass'] = node.klass
+
+ if node.klass == 'chapter' :
+ # Add title offset to name map
+ ctoc_name_map['titleOffset'] = ctoc.tell()
+ ctoc.write(decint(len(t), DECINT_FORWARD)+t)
+ self._chapterCount += 1
+
+ elif node.klass == 'periodical' :
+ # Add title offset
+ ctoc_name_map['titleOffset'] = ctoc.tell()
+ ctoc.write(decint(len(t), DECINT_FORWARD)+t)
+
+ # Look for existing class entry 'periodical' in _ctoc_map
+ for entry in self._ctoc_map:
+ if entry['klass'] == 'periodical':
+ # Use the pre-existing instance
+ ctoc_name_map['classOffset'] = entry['classOffset']
+ break
+ else :
+ continue
+ else:
+ ctoc_name_map['classOffset'] = ctoc.tell()
+ ctoc.write(decint(len(node.klass), DECINT_FORWARD)+node.klass)
+
+ self._periodicalCount += 1
+
+ elif node.klass == 'section' :
+ # Add title offset
+ ctoc_name_map['titleOffset'] = ctoc.tell()
+ ctoc.write(decint(len(t), DECINT_FORWARD)+t)
+
+ # Look for existing class entry 'section' in _ctoc_map
+ for entry in self._ctoc_map:
+ if entry['klass'] == 'section':
+ # Use the pre-existing instance
+ ctoc_name_map['classOffset'] = entry['classOffset']
+ break
+ else :
+ continue
+ else:
+ ctoc_name_map['classOffset'] = ctoc.tell()
+ ctoc.write(decint(len(node.klass), DECINT_FORWARD)+node.klass)
+
+ self._sectionCount += 1
+
+ elif node.klass == 'article' :
+ # Add title offset/title
+ ctoc_name_map['titleOffset'] = ctoc.tell()
+ ctoc.write(decint(len(t), DECINT_FORWARD)+t)
+
+ # Look for existing class entry 'article' in _ctoc_map
+ for entry in self._ctoc_map:
+ if entry['klass'] == 'article':
+ ctoc_name_map['classOffset'] = entry['classOffset']
+ break
+ else :
+ continue
+ else:
+ ctoc_name_map['classOffset'] = ctoc.tell()
+ ctoc.write(decint(len(node.klass), DECINT_FORWARD)+node.klass)
+
+ # Add description offset/description
+ if node.description :
+ d = self._clean_text_value(node.description)
+ ctoc_name_map['descriptionOffset'] = ctoc.tell()
+ ctoc.write(decint(len(d), DECINT_FORWARD)+d)
+ else :
+ ctoc_name_map['descriptionOffset'] = None
+
+ # Add author offset/description
+ if node.author :
+ a = self._clean_text_value(node.author)
+ ctoc_name_map['authorOffset'] = ctoc.tell()
+ ctoc.write(decint(len(a), DECINT_FORWARD)+a)
+ else :
+ ctoc_name_map['authorOffset'] = None
+
+ self._articleCount += 1
+
+ else :
+ raise NotImplementedError( \
+ 'writer._generate_ctoc.add_node: title: %s has unrecognized klass: %s, playOrder: %d' % \
+ (node.title, node.klass, node.play_order))
+
+ # append this node's name_map to map
+ self._ctoc_map.append(ctoc_name_map)
+
+ def _generate_ctoc(self):
+ # Generate the compiled TOC strings
+ # Each node has 1-4 CTOC entries:
+ # Periodical (0xDF)
+ # title, class
+ # Section (0xFF)
+ # title, class
+ # Article (0x3F)
+ # title, class, description, author
+ # Chapter (0x0F)
+ # title, class
+ # nb: Chapters don't actually have @class, so we synthesize it
+ # in reader._toc_from_navpoint
+
+ toc = self._oeb.toc
+ reduced_toc = []
+ self._ctoc_map = [] # per node dictionary of {class/title/desc/author} offsets
+ self._last_toc_entry = None
+ ctoc = StringIO()
+
+ # Track the individual node types
+ self._periodicalCount = 0
+ self._sectionCount = 0
+ self._articleCount = 0
+ self._chapterCount = 0
+
+ first = True
+
+ if self._conforming_periodical_toc :
+ self._oeb.logger.info('Generating structured CTOC ...')
+ for (child) in toc.iter():
+ if self.opts.verbose > 2 :
+ self._oeb.logger.info(" %s" % child)
+ self._add_structured_ctoc_node(child, ctoc)
+ first = False
+ else :
+ self._oeb.logger.info('Generating flat CTOC ...')
+ for (i, child) in enumerate(toc.iter()):
+ # Only add chapters or articles at depth==1
+ # no class defaults to 'chapter'
+ if child.klass is None : child.klass = 'chapter'
+ if (child.klass == 'article' or child.klass == 'chapter') and child.depth() == 1 :
+ if self.opts.verbose > 2 :
+ self._oeb.logger.info("adding (klass:%s depth:%d) %s to flat ctoc" % \
+ (child.klass, child.depth(), child) )
+ self._add_flat_ctoc_node(child, ctoc)
+ reduced_toc.append(child)
+ first = False
+ else :
+ if self.opts.verbose > 2 :
+ self._oeb.logger.info("skipping class: %s depth %d at position %d" % \
+ (child.klass, child.depth(),i))
+
+ # Update the TOC with our edited version
+ self._oeb.toc.nodes = reduced_toc
+
+ # Instantiate a MobiDocument(mobitype)
+ if (not self._periodicalCount and not self._sectionCount and not self._articleCount) or \
+ not self.opts.mobi_periodical :
+ mobiType = 0x002
+ elif self._periodicalCount and self._sectionCount == 1 :
+ mobiType = 0x102
+ elif self._periodicalCount and self._sectionCount > 1 :
+ mobiType = 0x103 # Could also be 0x101 - need cli switch
+ else :
+ raise NotImplementedError('_generate_ctoc: Unrecognized document structured')
+
+ self._MobiDoc = MobiDocument(mobiType)
+
+ if self.opts.verbose > 2 :
+ structType = 'book'
+ if mobiType > 0x100 :
+ structType = 'flat periodical' if mobiType == 0x102 else 'structured periodical'
+ self._oeb.logger.info("Instantiating a %s MobiDocument of type 0x%X" % (structType, mobiType ) )
+ if mobiType > 0x100 :
+ self._oeb.logger.info("periodicalCount: %d sectionCount: %d articleCount: %d"% \
+ (self._periodicalCount, self._sectionCount, self._articleCount) )
+ else :
+ self._oeb.logger.info("chapterCount: %d" % self._chapterCount)
+
+ return align_block(ctoc.getvalue())
+
+ def _write_periodical_node(self, indxt, indices, index, offset, length, count, firstSection, lastSection) :
+ pos = 0xc0 + indxt.tell()
+ indices.write(pack('>H', pos)) # Save the offset for IDXTIndices
+ name = "%04X"%count
+ indxt.write(chr(len(name)) + name) # Write the name
+ indxt.write(INDXT['periodical']) # entryType [0x0F | 0xDF | 0xFF | 0x3F]
+ indxt.write(chr(1)) # subType 1
+ indxt.write(decint(offset, DECINT_FORWARD)) # offset
+ indxt.write(decint(length, DECINT_FORWARD)) # length
+ indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
+
+ indxt.write(decint(0, DECINT_FORWARD)) # unknown byte
+
+ indxt.write(decint(self._ctoc_map[index]['classOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
+ indxt.write(decint(firstSection, DECINT_FORWARD)) # first section in periodical
+ indxt.write(decint(lastSection, DECINT_FORWARD)) # first section in periodical
+
+ indxt.write(decint(0, DECINT_FORWARD)) # 0x80
+
+ def _write_section_node(self, indxt, indices, myCtocMapIndex, index, offset, length, count, firstArticle, lastArticle, parentIndex) :
+ pos = 0xc0 + indxt.tell()
+ indices.write(pack('>H', pos)) # Save the offset for IDXTIndices
+ name = "%04X"%count
+ indxt.write(chr(len(name)) + name) # Write the name
+ indxt.write(INDXT['section']) # entryType [0x0F | 0xDF | 0xFF | 0x3F]
+ indxt.write(chr(0)) # subType 0
+ indxt.write(decint(offset, DECINT_FORWARD)) # offset
+ indxt.write(decint(length, DECINT_FORWARD)) # length
+ indxt.write(decint(self._ctoc_map[myCtocMapIndex]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
+
+ indxt.write(decint(1, DECINT_FORWARD)) # unknown byte
+
+ indxt.write(decint(self._ctoc_map[myCtocMapIndex]['classOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
+ indxt.write(decint(parentIndex, DECINT_FORWARD)) # index of periodicalParent
+ indxt.write(decint(firstArticle, DECINT_FORWARD)) # first section in periodical
+ indxt.write(decint(lastArticle, DECINT_FORWARD)) # first section in periodical
+
+ def _write_article_node(self, indxt, indices, index, offset, length, count, parentIndex) :
+ pos = 0xc0 + indxt.tell()
+ indices.write(pack('>H', pos)) # Save the offset for IDXTIndices
+ name = "%04X"%count
+ indxt.write(chr(len(name)) + name) # Write the name
+ indxt.write(INDXT['article']) # entryType [0x0F | 0xDF | 0xFF | 0x3F]
+
+ hasAuthor = True if self._ctoc_map[index]['authorOffset'] else False
+ hasDescription = True if self._ctoc_map[index]['descriptionOffset'] else False
+ initialOffset = offset
+
+ if hasAuthor :
+ if offset < 0x4000 :
+ # Set bit 17
+ offset += 0x00010000
+ else :
+ # Set bit 24
+ offset += 0x00800000
+
+ if hasDescription :
+ if initialOffset < 0x4000 :
+ # Set bit 16
+ offset += 0x00008000
+ else :
+ # Set bit 23
+ offset += 0x00400000
+
+ # If we didn't set any flags, write an extra zero in the stream
+ # Seems unnecessary, but matching Mobigen
+ if initialOffset == offset:
+ indxt.write(chr(0))
+
+ indxt.write(decint(offset, DECINT_FORWARD)) # offset
+
+ indxt.write(decint(length, DECINT_FORWARD)) # length
+ indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
+
+ indxt.write(decint(2, DECINT_FORWARD)) # unknown byte
+
+ indxt.write(decint(self._ctoc_map[index]['classOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
+ indxt.write(decint(parentIndex, DECINT_FORWARD)) # index of periodicalParent
+
+ # Optionally write the author and description fields
+ descriptionOffset = self._ctoc_map[index]['descriptionOffset']
+ if descriptionOffset :
+ indxt.write(decint(descriptionOffset, DECINT_FORWARD))
+
+ authorOffset = self._ctoc_map[index]['authorOffset']
+ if authorOffset :
+ indxt.write(decint(authorOffset, DECINT_FORWARD))
+
+ def _write_chapter_node(self, indxt, indices, index, offset, length, count):
+ # Writes an INDX1 NCXEntry of entryType 0x0F - chapter
+ if self.opts.verbose > 2:
+ # *** GR: Turn this off while I'm developing my code
+ #self._oeb.log.debug('Writing TOC node to IDXT:', node.title, 'href:', node.href)
+ pass
+
+ pos = 0xc0 + indxt.tell()
+ indices.write(pack('>H', pos)) # Save the offset for IDXTIndices
+ name = "%04X"%count
+ indxt.write(chr(len(name)) + name) # Write the name
+ indxt.write(INDXT['chapter']) # entryType [0x0F | 0xDF | 0xFF | 0x3F]
+ indxt.write(decint(offset, DECINT_FORWARD)) # offset
+ indxt.write(decint(length, DECINT_FORWARD)) # length
+ indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
+ indxt.write(decint(0, DECINT_FORWARD)) # unknown byte
+
+ def _compute_offset_length(self, i, node, entries) :
+ h = node.href
+ if h not in self._id_offsets:
+ self._oeb.log.warning('Could not find TOC entry:', node.title)
+ return -1, -1
+
+ offset = self._id_offsets[h]
+ length = None
+ # Calculate length based on next entry's offset
+ for sibling in entries[i+1:]:
+ h2 = sibling.href
+ if h2 in self._id_offsets:
+ offset2 = self._id_offsets[h2]
+ if offset2 > offset:
+ length = offset2 - offset
+ break
+ if length is None:
+ length = self._content_length - offset
+ return offset, length
+
+ def _establish_document_structure(self) :
+ documentType = None
+ try :
+ klass = self._ctoc_map[0]['klass']
+ except :
+ klass = None
+
+ if klass == 'chapter' or klass == None :
+ documentType = 'book'
+ if self.opts.verbose > 2 :
+ self._oeb.logger.info("Adding a MobiBook to self._MobiDoc")
+ self._MobiDoc.documentStructure = MobiBook()
+
+ elif klass == 'periodical' :
+ documentType = klass
+ if self.opts.verbose > 2 :
+ self._oeb.logger.info("Adding a MobiPeriodical to self._MobiDoc")
+ self._MobiDoc.documentStructure = MobiPeriodical(self._MobiDoc.getNextNode())
+ self._MobiDoc.documentStructure.startAddress = self._anchor_offset_kindle
+ else :
+ raise NotImplementedError('_establish_document_structure: unrecognized klass: %s' % klass)
+ return documentType
+
+ def _generate_section_indices(self, child, currentSection, myPeriodical, myDoc ) :
+ sectionTitles = list(child.iter())[1:]
+ sectionIndices = []
+ sectionParents = []
+ for (j, section) in enumerate(sectionTitles):
+ # iterate over just the sections
+
+ if section.klass == 'periodical' :
+ # Write our index to the list
+ sectionIndices.append(currentSection)
+
+ if self.opts.verbose > 3 :
+ self._oeb.logger.info("Periodical: %15.15s \tkls:%s \tdpt:%d ply:%03d" % \
+ (section.title, section.klass, section.depth(), section.play_order) )
+
+ elif section.klass == 'section' :
+ # Add sections, save in list with original sequence number
+ myNewSection = myPeriodical.addSectionParent(myDoc, j)
+ sectionParents.append(myNewSection)
+
+ # Bump the section #
+ currentSection += 1
+ # Write our index to the list
+ sectionIndices.append(currentSection)
+
+ if self.opts.verbose > 3 :
+ self._oeb.logger.info(" Section: %15.15s \tkls:%s \tdpt:%d ply:%03d \tindex:%d" % \
+ (section.title, section.klass, section.depth(), section.play_order,j) )
+
+ elif section.klass == 'article' :
+ # Write our index to the list
+ sectionIndices.append(currentSection)
+
+ else :
+ if self.opts.verbose > 3 :
+ self._oeb.logger.info( " Unrecognized class %s in structured document" % section.klass)
+ return sectionIndices, sectionParents
+
+
+ def _generate_section_article_indices(self, i, section, entries, sectionIndices, sectionParents):
+ sectionArticles = list(section.iter())[1:]
+ # Iterate over the section's articles
+
+ for (j, article) in enumerate(sectionArticles):
+ # Recompute offset and length for each article
+ offset, length = self._compute_offset_length(i, article, entries)
+ if self.opts.verbose > 2 :
+ self._oeb.logger.info( "article %02d: offset = 0x%06X length = 0x%06X" % (j, offset, length) )
+
+ ctoc_map_index = i + j + 1
+
+ #hasAuthor = self._ctoc_map[ctoc_map_index].get('authorOffset')
+ #hasDescription = self._ctoc_map[ctoc_map_index].get('descriptionOffset')
+ mySectionParent = sectionParents[sectionIndices[i-1]]
+ myNewArticle = MobiArticle(mySectionParent, offset, length, ctoc_map_index )
+ mySectionParent.addArticle( myNewArticle )
+
+
+ def _add_book_chapters(self, myDoc, indxt, indices):
+ chapterCount = myDoc.documentStructure.chapterCount()
+ if self.opts.verbose > 3 :
+ self._oeb.logger.info("Writing %d chapters for mobitype 0x%03X" % (chapterCount, myDoc.mobiType))
+
+ for (c, chapter) in enumerate(list(myDoc.documentStructure.chapters)) :
+ index = chapter.myCtocMapIndex
+ self._write_chapter_node(indxt, indices, index, chapter.startAddress, chapter.length, c)
+
+ last_name = "%04X"%c # Returned when done
+ return last_name, c
+
+ def _add_periodical_flat_articles(self, myDoc, indxt, indices):
+ sectionParent = myDoc.documentStructure.sectionParents[0]
+ articleCount = len(sectionParent.articles)
+ if self.opts.verbose > 3 :
+ self._oeb.logger.info("Writing %d articles for mobitype 0x%03X" % (articleCount, myDoc.mobiType))
+
+ # Singleton periodical
+ index = 0
+ offset = myDoc.documentStructure.startAddress
+ length = myDoc.documentStructure.length
+ c = 0
+ firstSection = myDoc.documentStructure.firstSectionIndex
+ lastSection = myDoc.documentStructure.lastSectionIndex
+ self._write_periodical_node(indxt, indices, index, offset, length, c, firstSection, lastSection)
+
+ # Singleton section
+ index += 1
+ offset = sectionParent.startAddress
+ length = sectionParent.sectionLength
+ c += 1
+ firstArticle = sectionParent.firstArticleIndex
+ lastArticle = sectionParent.lastArticleIndex
+ parentIndex = sectionParent.parentIndex
+ self._write_section_node(indxt, indices, sectionParent.myCtocMapIndex, index, offset, length, c, firstArticle, lastArticle, parentIndex)
+
+ last_name = "%04X"%c
+
+ # articles
+ for (i, article) in enumerate(list(sectionParent.articles)) :
+ index = article.myCtocMapIndex
+ offset = article.startAddress
+ length = article.articleLength
+ c += 1
+ parentIndex = article.sectionParentIndex
+ self._write_article_node(indxt, indices, index, offset, length, c, parentIndex)
+
+ last_name = "%04X" % c
+ return last_name, c
+
+ def _add_periodical_structured_articles(self, myDoc, indxt, indices):
+ # Write NCXEntries for Structured Periodical
+ #
+ #
+ # ...
+ #
+ # ...
+
+ if self.opts.verbose > 2 :
+ self._oeb.logger.info( "Writing NCXEntries for mobiType 0x%03X" % myDoc.mobiType)
+
+ sectionParent = myDoc.documentStructure.sectionParents[0]
+ articleCount = len(sectionParent.articles)
+
+ # Write opening periodical 0xDF entry
+ index = 0
+ offset = myDoc.documentStructure.startAddress
+ length = myDoc.documentStructure.length
+ c = 0
+ firstSection = myDoc.documentStructure.firstSectionIndex
+ lastSection = myDoc.documentStructure.lastSectionIndex
+ self._write_periodical_node(indxt, indices, index, offset, length, c, firstSection, lastSection)
+
+ # Write each section 0xFF entry
+ sectionCount = firstSection
+ while sectionCount <= lastSection :
+ # section
+ sectionParent = myDoc.documentStructure.sectionParents[sectionCount - 1]
+ articleCount = len(sectionParent.articles)
+ #index += 1
+ offset = sectionParent.startAddress
+ length = sectionParent.sectionLength
+ c += 1
+ firstArticle = sectionParent.firstArticleIndex
+ lastArticle = sectionParent.lastArticleIndex
+ parentIndex = sectionParent.parentIndex
+ self._write_section_node(indxt, indices, sectionParent.myCtocMapIndex, sectionCount, offset, length, c, firstArticle, lastArticle, parentIndex)
+ sectionCount += 1
+
+ # Write each article 0x3F entry
+ sectionCount = firstSection
+ while sectionCount <= lastSection :
+ # section
+ sectionParent = myDoc.documentStructure.sectionParents[sectionCount - 1]
+# articleCount = len(sectionParent.articles)
+# index += 1
+# offset = sectionParent.startAddress
+# length = sectionParent.sectionLength
+# c += 1
+# firstArticle = sectionParent.firstArticleIndex
+# lastArticle = sectionParent.lastArticleIndex
+# parentIndex = sectionParent.parentIndex
+# add_section_node(index, offset, length, c, firstArticle, lastArticle, parentIndex)
+
+ last_name = "%04X"%c
+
+ # articles
+ for (i, article) in enumerate(list(sectionParent.articles)) :
+ if self.opts.verbose > 3 :
+ self._oeb.logger.info( "Adding section:article %d:%02d" % \
+ (sectionParent.myIndex, i))
+ index = article.myCtocMapIndex
+ offset = article.startAddress
+ length = article.articleLength
+ c += 1
+ parentIndex = article.sectionParentIndex
+ self._write_article_node(indxt, indices, index, offset, length, c, parentIndex)
+
+ last_name = "%04X"%c
+
+ sectionCount += 1
+
+ return last_name, c
+
+ def _generate_indxt(self, ctoc):
+ # Assumption: child.depth() represents nestedness of the TOC.
+ # A flat document (book) has a depth of 2:
+ # child.depth() = 2
+ # Chapter child.depth() = 1
+ # Chapter etc
+ # -or-
+ # A structured document (periodical) has a depth of 4 (Mobigen-prepped)
+ # child.depth() = 4
+ # Periodical child.depth() = 3
+ # Section 1 child.depth() = 2
+ # Article child.depth() = 1
+ # Article(s) child.depth() = 1
+ # Section 2
+
+ documentType = "unknown"
+ sectionIndices = []
+ sectionParents = []
+ currentSection = 0 # Starting section number
+ toc = self._oeb.toc
+ indxt, indices, c = StringIO(), StringIO(), 0
+
+ indices.write('IDXT')
+ c = 0
+ last_name = None
+
+ # 'book', 'periodical' or None
+ documentType = self._establish_document_structure()
+ myDoc = self._MobiDoc
+
+ nodes = list(toc.iter())[0:1]
+ for (i, child) in enumerate(nodes) :
+
+ if documentType == "periodical" :
+ myPeriodical = myDoc.documentStructure
+ if self.opts.verbose > 3 :
+ self._oeb.logger.info("\nDocument: %s \tkls:%s \tdpt:%d ply:%03d" % \
+ (child.title, child.klass, child.depth(), child.play_order) )
+ sectionIndices, sectionParents = \
+ self._generate_section_indices(child, currentSection, myPeriodical, myDoc)
+
+ elif documentType == "book" :
+ myBook = myDoc.documentStructure
+
+ if self.opts.verbose > 3 :
+ self._oeb.logger.info("\nBook: %-19.19s \tkls:%s \tdpt:%d ply:%03d" % \
+ (child.title, child.klass, child.depth(), child.play_order) )
+ else :
+ if self.opts.verbose > 3 :
+ self._oeb.logger.info("unknown document type %12.12s \tdepth:%d" % (child.title, child.depth()) )
+
+ # Original code starts here
+ # test first node for depth/class
+ entries = list(toc.iter())[1:]
+ for (i, child) in enumerate(entries):
+ if not child.title or not child.title.strip():
+ continue
+
+ offset, length = self._compute_offset_length(i, child, entries)
+
+ if child.klass == 'chapter' or \
+ (not self.opts.mobi_periodical and child.klass == 'article') :
+ # create chapter object - confirm i + 0 is correct!!
+ myNewChapter = MobiChapter(myDoc.getNextNode(), offset, length, i)
+ myBook.addChapter(myNewChapter)
+
+ # Diagnostic
+ try :
+ if self.opts.verbose > 3 :
+ self._oeb.logger.info( " Chapter: %-14.14s \tcls:%s \tdpt:%d ply:%03d \toff:0x%X \t:len0x%X" % \
+ (child.title, child.klass, child.depth(), child.play_order, offset, length) )
+ except :
+ if self.opts.verbose > 3 :
+ self._oeb.logger.info( " Chapter: %-14.14s \tclass:%s \tdepth:%d playOrder:%03d \toff:0x%X \t:len0x%X" % \
+ ("(bad string)", child.klass, child.depth(), child.play_order, offset, length))
+
+ elif child.klass == 'section' and self.opts.mobi_periodical :
+ if self.opts.verbose > 3 :
+ self._oeb.logger.info("\n Section: %-15.15s \tkls:%s \tdpt:%d ply:%03d" % \
+ (child.title, child.klass, child.depth(), child.play_order))
+ self._generate_section_article_indices(i, child, entries, sectionIndices, sectionParents)
+
+ if self.opts.verbose > 3 :
+ self._oeb.logger.info("")
+
+ mobiType = myDoc.mobiType
+ if self.opts.verbose > 3 :
+ self._MobiDoc.dumpInfo()
+
+ if mobiType == 0x02 :
+ last_name, c = self._add_book_chapters(myDoc, indxt, indices)
+
+ elif mobiType == 0x102 and myDoc.documentStructure.sectionCount() == 1 :
+ last_name, c = self._add_periodical_flat_articles(myDoc, indxt, indices)
+
+ else :
+ last_name, c = self._add_periodical_structured_articles(myDoc, indxt, indices)
+
+ return align_block(indxt.getvalue()), c, align_block(indices.getvalue()), last_name
class HTMLRecordData(object):
+ """ A data structure containing indexing/navigation data for an HTML record """
def __init__(self):
self._continuingNode = -1
self._continuingNodeParent = -1
@@ -1287,3 +2486,344 @@ class HTMLRecordData(object):
oeb.logger.info( " nextSectionOpeningNode: %03d" % self.nextSectionOpeningNode )
oeb.logger.info( " nextSectionNodeCount: %03d" % self.nextSectionNodeCount )
+class MobiDocument(object):
+ """ Hierarchical description of a Mobi document """
+
+ # Counter to assign index values as new nodes are created
+ _nextNode = -1
+
+ def __init__(self, mobitype):
+ self._mobitype = mobitype
+ self._documentStructure = None # Assigned in _generate_indxt
+
+ def getMobiType(self):
+ return self._mobitype
+ def setMobiType(self, value):
+ self._mobitype = value
+ mobiType = property(getMobiType, setMobiType, None, None)
+
+ def getDocumentStructure(self):
+ return self._documentStructure
+ def setDocumentStructure(self, value):
+ self._documentStructure = value
+ documentStructure = property(getDocumentStructure, setDocumentStructure, None, None)
+
+ def getNextNode(self):
+ self._nextNode += 1
+ return self._nextNode
+
+ def dumpInfo(self):
+ self._documentStructure.dumpInfo()
+
+class MobiBook(object):
+ """ A container for a flat chapter-to-chapter Mobi book """
+ def __init__(self):
+ self._chapters = []
+
+ def chapterCount(self):
+ return len(self._chapters)
+
+ def getChapters(self):
+ return self._chapters
+ def setChapters(self, value):
+ self._chapters = value
+ chapters = property(getChapters, setChapters, None, None)
+
+ def addChapter(self, value):
+ self._chapters.append(value)
+
+ def dumpInfo(self):
+ print "%20s:" % ("Book")
+ print "%20s: %d" % ("Number of chapters", len(self._chapters))
+ for (count, chapter) in enumerate(self._chapters):
+ print "%20s: %d" % ("myCtocMapIndex",chapter.myCtocMapIndex)
+ print "%20s: %d" % ("Chapter",count)
+ print "%20s: 0x%X" % ("startAddress", chapter.startAddress)
+ print "%20s: 0x%X" % ("length", chapter.length)
+ print
+
+class MobiChapter(object):
+ """ A container for Mobi chapters """
+ def __init__(self, myIndex, startAddress, length, ctoc_map_index):
+ self._myIndex = myIndex
+ self._startAddress = startAddress
+ self._length = length
+ self._myCtocMapIndex = ctoc_map_index
+
+ def getMyCtocMapIndex(self):
+ return self._myCtocMapIndex
+ def setMyCtocMapIndex(self, value):
+ self._myCtocMapIndex = value
+ myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None)
+
+ def getMyIndex(self):
+ return self._myIndex
+ myIndex = property(getMyIndex, None, None, None)
+
+ def getStartAddress(self):
+ return self._startAddress
+ def setStartAddress(self, value):
+ self._startAddress = value
+ startAddress = property(getStartAddress, setStartAddress, None, None)
+
+ def getLength(self):
+ return self._length
+ def setLength(self, value):
+ self._length = value
+ length = property(getLength, setLength, None, None)
+
+class MobiPeriodical(object):
+ """ A container for a structured periodical """
+ def __init__(self, myIndex):
+ self._myIndex = myIndex
+ self._sectionParents = []
+ self._startAddress = 0xFFFFFFFF
+ self._length = 0xFFFFFFFF
+ self._firstSectionIndex = 0xFFFFFFFF
+ self._lastSectionIndex = 0xFFFFFFFF
+ self._myCtocMapIndex = 0 # Always first entry
+
+ def getMyIndex(self):
+ return self._myIndex
+ def setMyIndex(self, value):
+ self._myIndex = value
+ myIndex = property(getMyIndex, setMyIndex, None, None)
+
+ def getSectionParents(self):
+ return self._sectionParents
+ def setSectionParents(self, value):
+ self._sectionParents = value
+ sectionParents = property(getSectionParents, setSectionParents, None, None)
+
+ def sectionCount(self):
+ return len(self._sectionParents)
+
+ def getStartAddress(self):
+ return self._startAddress
+ def setStartAddress(self, value):
+ self._startAddress = value
+ startAddress = property(getStartAddress, setStartAddress, None, None)
+
+ def getLength(self):
+ return self._length
+ def setLength(self, value):
+ self._length = value
+ length = property(getLength, setLength, None, None)
+
+ def getFirstSectionIndex(self):
+ return self._firstSectionIndex
+ def setFirstSectionIndex(self, value):
+ self._firstSectionIndex = value
+ firstSectionIndex = property(getFirstSectionIndex, setFirstSectionIndex, None, None)
+
+ def getLastSectionIndex(self):
+ return self._lastSectionIndex
+ def setLastSectionIndex(self, value):
+ self._lastSectionIndex = value
+ lastSectionIndex = property(getLastSectionIndex, setLastSectionIndex, None, None)
+
+ def getMyCtocMapIndex(self):
+ return self._myCtocMapIndex
+ def setMyCtocMapIndex(self, value):
+ self._myCtocMapIndex = value
+ myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None)
+
+ def addSectionParent(self, myIndex, ctoc_map_index):
+ # Create a new section parent
+ newSection = MobiSection(myIndex)
+ # Assign our index to the section
+ newSection.parentIndex = self._myIndex
+ # Assign section number
+ newSection.sectionIndex = len(self._sectionParents)
+ # Assign ctoc_map_index
+ newSection.myCtocMapIndex = ctoc_map_index
+ # Add it to the list
+ self._sectionParents.append(newSection)
+ return newSection
+
+ def dumpInfo(self):
+ print "%20s:" % ("Periodical")
+ print "%20s: 0x%X" % ("myIndex", self.myIndex)
+ print "%20s: 0x%X" % ("startAddress", self.startAddress)
+ print "%20s: 0x%X" % ("length", self.length)
+ print "%20s: 0x%X" % ("myCtocMapIndex", self.myCtocMapIndex)
+ print "%20s: 0x%X" % ("firstSectionIndex", self.firstSectionIndex)
+ print "%20s: 0x%X" % ("lastSectionIndex", self.lastSectionIndex)
+ print "%20s: %d" % ("Number of Sections", len(self._sectionParents))
+ for (count, section) in enumerate(self._sectionParents):
+ print "\t%20s: %d" % ("Section",count)
+ print "\t%20s: 0x%X" % ("startAddress", section.startAddress)
+ print "\t%20s: 0x%X" % ("length", section.sectionLength)
+ print "\t%20s: 0x%X" % ("parentIndex", section.parentIndex)
+ print "\t%20s: 0x%X" % ("myIndex", section.myIndex)
+ print "\t%20s: 0x%X" % ("firstArticleIndex", section.firstArticleIndex)
+ print "\t%20s: 0x%X" % ("lastArticleIndex", section.lastArticleIndex)
+ print "\t%20s: 0x%X" % ("articles", len(section.articles) )
+ print "\t%20s: 0x%X" % ("myCtocMapIndex", section.myCtocMapIndex )
+ print
+ for (artCount, article) in enumerate(section.articles) :
+ print "\t\t%20s: %d" % ("Article",artCount)
+ print "\t\t%20s: 0x%X" % ("startAddress", article.startAddress)
+ print "\t\t%20s: 0x%X" % ("length", article.articleLength)
+ print "\t\t%20s: 0x%X" % ("sectionIndex", article.sectionParentIndex)
+ print "\t\t%20s: 0x%X" % ("myIndex", article.myIndex)
+ print "\t\t%20s: 0x%X" % ("myCtocMapIndex", article.myCtocMapIndex)
+ print
+
+class MobiSection(object):
+ """ A container for periodical sections """
+ def __init__(self, myMobiDoc):
+ self._myMobiDoc = myMobiDoc
+ self._myIndex = myMobiDoc.getNextNode()
+ self._parentIndex = 0xFFFFFFFF
+ self._firstArticleIndex = 0x00
+ self._lastArticleIndex = 0x00
+ self._startAddress = 0xFFFFFFFF
+ self._sectionLength = 0xFFFFFFFF
+ self._articles = []
+ self._myCtocMapIndex = -1
+
+ def getMyMobiDoc(self):
+ return self._myMobiDoc
+ def setMyMobiDoc(self, value):
+ self._myMobiDoc = value
+ myMobiDoc = property(getMyMobiDoc, setMyMobiDoc, None, None)
+
+ def getMyIndex(self):
+ return self._myIndex
+ def setMyIndex(self, value):
+ self._myIndex = value
+ myIndex = property(getMyIndex, setMyIndex, None, None)
+
+ def getParentIndex(self):
+ return self._parentIndex
+ def setParentIndex(self, value):
+ self._parentIndex = value
+ parenIndex = property(getParentIndex, setParentIndex, None, None)
+
+ def getFirstArticleIndex(self):
+ return self._firstArticleIndex
+ def setFirstArticleIndex(self, value):
+ self._firstArticleIndex = value
+ firstArticleIndex = property(getFirstArticleIndex, setFirstArticleIndex, None, None)
+
+ def getLastArticleIndex(self):
+ return self._lastArticleIndex
+ def setLastArticleIndex(self, value):
+ self._lastArticleIndex = value
+ lastArticleIndex = property(getLastArticleIndex, setLastArticleIndex, None, None)
+
+ def getStartAddress(self):
+ return self._startAddress
+ def setStartAddress(self, value):
+ self._startAddress = value
+ startAddress = property(getStartAddress, setStartAddress, None, None)
+
+ def getSectionLength(self):
+ return self._sectionLength
+ def setSectionLength(self, value):
+ self._sectionLength = value
+ sectionLength = property(getSectionLength, setSectionLength, None, None)
+
+ def getArticles(self):
+ return self._articles
+ def setArticles(self, value):
+ self._articles = value
+ articles = property(getArticles, setArticles, None, None)
+
+ def getMyCtocMapIndex(self):
+ return self._myCtocMapIndex
+ def setMyCtocMapIndex(self, value):
+ self._myCtocMapIndex = value
+ myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None)
+
+ def addArticle(self, article):
+ self._articles.append(article)
+
+ # Adjust the Periodical parameters
+ # If this is the first article of the first section, init the values
+ if self.myIndex == 1 and len(self.articles) == 1 :
+ self.myMobiDoc.documentStructure.firstSectionIndex = self.myIndex
+ self.myMobiDoc.documentStructure.lastSectionIndex = self.myIndex
+ self.myMobiDoc.documentStructure.length = article.articleLength + \
+ ( article.startAddress - self.myMobiDoc.documentStructure.startAddress)
+ else:
+ self.myMobiDoc.documentStructure.length += article.articleLength
+
+ # Always set the highest section index to myIndex
+ self.myMobiDoc.documentStructure.lastSectionIndex = self.myIndex
+
+ # Adjust the Section parameters
+ if len(self.articles) == 1 :
+ self.firstArticleIndex = article.myIndex
+
+ if len(self.myMobiDoc.documentStructure.sectionParents) == 1 :
+ self.startAddress = self.myMobiDoc.documentStructure.startAddress
+ self.sectionLength = article.articleLength + \
+ ( article.startAddress - self.myMobiDoc.documentStructure.startAddress )
+
+ else :
+ self.startAddress = article.startAddress
+ self.sectionLength = article.articleLength
+
+ self.lastArticleIndex = article.myIndex
+ else :
+ self.lastArticleIndex = article.myIndex
+
+ # Adjust the Section length
+ if len(self.articles) > 1 :
+ self.sectionLength += article.articleLength
+
+class MobiArticle(object):
+ """ A container for periodical articles """
+ def __init__(self, sectionParent, startAddress, length, ctocMapIndex):
+ self._mySectionParent = sectionParent
+ self._myMobiDoc = sectionParent.myMobiDoc
+ self._myIndex = sectionParent.myMobiDoc.getNextNode()
+ self._myCtocMapIndex = ctocMapIndex
+ self._sectionParentIndex = sectionParent.myIndex
+ self._startAddress = startAddress
+ self._articleLength = length
+
+ def getMySectionParent(self):
+ return self._mySectionParent
+ def setMySectionParent(self, value):
+ self._mySectionParent = value
+ mySectionParent = property(getMySectionParent, setMySectionParent, None, None)
+
+ def getMyMobiDoc(self):
+ return self._myMobiDoc
+ def setMyMobiDoc(self, value):
+ self._myMobiDoc = value
+ myMobiDoc = property(getMyMobiDoc, setMyMobiDoc, None, None)
+
+ def getMyIndex(self):
+ return self._myIndex
+ def setMyIndex(self, value):
+ self._sectionIndex = value
+ myIndex = property(getMyIndex, setMyIndex, None, None)
+
+ def getSectionParentIndex(self):
+ return self._sectionParentIndex
+ def setSectionParentIndex(self, value):
+ self._sectionParentIndex = value
+ sectionParentIndex = property(getSectionParentIndex, setSectionParentIndex, None, None)
+
+ def getStartAddress(self):
+ return self._startAddress
+ def setStartAddress(self, value):
+ self._startAddress = value
+ startAddress = property(getStartAddress, setStartAddress, None, None)
+
+ def getArticleLength(self):
+ return self._articleLength
+ def setArticleLength(self, value):
+ self._articleLength = value
+ articleLength = property(getArticleLength, setArticleLength, None, None)
+
+ def getMyCtocMapIndex(self):
+ return self._myCtocMapIndex
+ def setMyCtocMapIndex(self, value):
+ self._myCtocMapIndex = value
+ myCtocMapIndex = property(getMyCtocMapIndex, setMyCtocMapIndex, None, None)
+