mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
...
This commit is contained in:
parent
59d9e15580
commit
dbefbfbd86
@ -1642,6 +1642,61 @@ class MobiWriter(object):
|
|||||||
for record in self._records:
|
for record in self._records:
|
||||||
self._write(record)
|
self._write(record)
|
||||||
|
|
||||||
|
def _clean_text_value(self, text):
|
||||||
|
if text is not None and text.strip() :
|
||||||
|
text = text.strip()
|
||||||
|
if not isinstance(text, unicode):
|
||||||
|
text = text.decode('utf-8', 'replace')
|
||||||
|
text = normalize(text).encode('utf-8')
|
||||||
|
else :
|
||||||
|
text = "(none)".encode('utf-8')
|
||||||
|
return text
|
||||||
|
|
||||||
|
def _compute_offset_length(self, i, node, entries) :
|
||||||
|
h = node.href
|
||||||
|
if h not in self._id_offsets:
|
||||||
|
self._oeb.log.warning('Could not find TOC entry:', node.title)
|
||||||
|
return -1, -1
|
||||||
|
|
||||||
|
offset = self._id_offsets[h]
|
||||||
|
length = None
|
||||||
|
# Calculate length based on next entry's offset
|
||||||
|
for sibling in entries[i+1:]:
|
||||||
|
h2 = sibling.href
|
||||||
|
if h2 in self._id_offsets:
|
||||||
|
offset2 = self._id_offsets[h2]
|
||||||
|
if offset2 > offset:
|
||||||
|
length = offset2 - offset
|
||||||
|
break
|
||||||
|
if length is None:
|
||||||
|
length = self._content_length - offset
|
||||||
|
return offset, length
|
||||||
|
|
||||||
|
def _establish_document_structure(self) :
|
||||||
|
documentType = None
|
||||||
|
try :
|
||||||
|
klass = self._ctoc_map[0]['klass']
|
||||||
|
except :
|
||||||
|
klass = None
|
||||||
|
|
||||||
|
if klass == 'chapter' or klass == None :
|
||||||
|
documentType = 'book'
|
||||||
|
if self.opts.verbose > 2 :
|
||||||
|
self._oeb.logger.info("Adding a MobiBook to self._MobiDoc")
|
||||||
|
self._MobiDoc.documentStructure = MobiBook()
|
||||||
|
|
||||||
|
elif klass == 'periodical' :
|
||||||
|
documentType = klass
|
||||||
|
if self.opts.verbose > 2 :
|
||||||
|
self._oeb.logger.info("Adding a MobiPeriodical to self._MobiDoc")
|
||||||
|
self._MobiDoc.documentStructure = MobiPeriodical(self._MobiDoc.getNextNode())
|
||||||
|
self._MobiDoc.documentStructure.startAddress = self._anchor_offset_kindle
|
||||||
|
else :
|
||||||
|
raise NotImplementedError('_establish_document_structure: unrecognized klass: %s' % klass)
|
||||||
|
return documentType
|
||||||
|
|
||||||
|
# Index {{{
|
||||||
|
|
||||||
def _generate_index(self):
|
def _generate_index(self):
|
||||||
self._oeb.log('Generating INDX ...')
|
self._oeb.log('Generating INDX ...')
|
||||||
self._primary_index_record = None
|
self._primary_index_record = None
|
||||||
@ -1815,276 +1870,7 @@ class MobiWriter(object):
|
|||||||
open(os.path.join(t, n+'.bin'), 'wb').write(self._records[-(i+1)])
|
open(os.path.join(t, n+'.bin'), 'wb').write(self._records[-(i+1)])
|
||||||
self._oeb.log.debug('Index records dumped to', t)
|
self._oeb.log.debug('Index records dumped to', t)
|
||||||
|
|
||||||
def _clean_text_value(self, text):
|
# Index nodes {{{
|
||||||
if text is not None and text.strip() :
|
|
||||||
text = text.strip()
|
|
||||||
if not isinstance(text, unicode):
|
|
||||||
text = text.decode('utf-8', 'replace')
|
|
||||||
text = normalize(text).encode('utf-8')
|
|
||||||
else :
|
|
||||||
text = "(none)".encode('utf-8')
|
|
||||||
return text
|
|
||||||
|
|
||||||
def _add_to_ctoc(self, ctoc_str, record_offset):
|
|
||||||
# Write vwilen + string to ctoc
|
|
||||||
# Return offset
|
|
||||||
# Is there enough room for this string in the current ctoc record?
|
|
||||||
if 0xfbf8 - self._ctoc.tell() < 2 + len(ctoc_str):
|
|
||||||
# flush this ctoc, start a new one
|
|
||||||
# print "closing ctoc_record at 0x%X" % self._ctoc.tell()
|
|
||||||
# print "starting new ctoc with '%-50.50s ...'" % ctoc_str
|
|
||||||
# pad with 00
|
|
||||||
pad = 0xfbf8 - self._ctoc.tell()
|
|
||||||
# print "padding %d bytes of 00" % pad
|
|
||||||
self._ctoc.write('\0' * (pad))
|
|
||||||
self._ctoc_records.append(self._ctoc.getvalue())
|
|
||||||
self._ctoc.truncate(0)
|
|
||||||
self._ctoc_offset += 0x10000
|
|
||||||
record_offset = self._ctoc_offset
|
|
||||||
|
|
||||||
offset = self._ctoc.tell() + record_offset
|
|
||||||
self._ctoc.write(decint(len(ctoc_str), DECINT_FORWARD) + ctoc_str)
|
|
||||||
return offset
|
|
||||||
|
|
||||||
def _add_flat_ctoc_node(self, node, ctoc, title=None):
|
|
||||||
# Process 'chapter' or 'article' nodes only, force either to 'chapter'
|
|
||||||
t = node.title if title is None else title
|
|
||||||
t = self._clean_text_value(t)
|
|
||||||
self._last_toc_entry = t
|
|
||||||
|
|
||||||
# Create an empty dictionary for this node
|
|
||||||
ctoc_name_map = {}
|
|
||||||
|
|
||||||
# article = chapter
|
|
||||||
if node.klass == 'article' :
|
|
||||||
ctoc_name_map['klass'] = 'chapter'
|
|
||||||
else :
|
|
||||||
ctoc_name_map['klass'] = node.klass
|
|
||||||
|
|
||||||
# Add title offset to name map
|
|
||||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
|
||||||
self._chapterCount += 1
|
|
||||||
|
|
||||||
# append this node's name_map to map
|
|
||||||
self._ctoc_map.append(ctoc_name_map)
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
def _add_structured_ctoc_node(self, node, ctoc, title=None):
|
|
||||||
# Process 'periodical', 'section' and 'article'
|
|
||||||
|
|
||||||
# Fetch the offset referencing the current ctoc_record
|
|
||||||
if node.klass is None :
|
|
||||||
return
|
|
||||||
t = node.title if title is None else title
|
|
||||||
t = self._clean_text_value(t)
|
|
||||||
self._last_toc_entry = t
|
|
||||||
|
|
||||||
# Create an empty dictionary for this node
|
|
||||||
ctoc_name_map = {}
|
|
||||||
|
|
||||||
# Add the klass of this node
|
|
||||||
ctoc_name_map['klass'] = node.klass
|
|
||||||
|
|
||||||
if node.klass == 'chapter':
|
|
||||||
# Add title offset to name map
|
|
||||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
|
||||||
self._chapterCount += 1
|
|
||||||
|
|
||||||
elif node.klass == 'periodical' :
|
|
||||||
# Add title offset
|
|
||||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
|
||||||
|
|
||||||
# Look for existing class entry 'periodical' in _ctoc_map
|
|
||||||
for entry in self._ctoc_map:
|
|
||||||
if entry['klass'] == 'periodical':
|
|
||||||
# Use the pre-existing instance
|
|
||||||
ctoc_name_map['classOffset'] = entry['classOffset']
|
|
||||||
break
|
|
||||||
else :
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
# class names should always be in CNCX 0 - no offset
|
|
||||||
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
|
||||||
|
|
||||||
self._periodicalCount += 1
|
|
||||||
|
|
||||||
elif node.klass == 'section' :
|
|
||||||
# Add title offset
|
|
||||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
|
||||||
|
|
||||||
# Look for existing class entry 'section' in _ctoc_map
|
|
||||||
for entry in self._ctoc_map:
|
|
||||||
if entry['klass'] == 'section':
|
|
||||||
# Use the pre-existing instance
|
|
||||||
ctoc_name_map['classOffset'] = entry['classOffset']
|
|
||||||
break
|
|
||||||
else :
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
# class names should always be in CNCX 0 - no offset
|
|
||||||
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
|
||||||
|
|
||||||
self._sectionCount += 1
|
|
||||||
|
|
||||||
elif node.klass == 'article' :
|
|
||||||
# Add title offset/title
|
|
||||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
|
||||||
|
|
||||||
# Look for existing class entry 'article' in _ctoc_map
|
|
||||||
for entry in self._ctoc_map:
|
|
||||||
if entry['klass'] == 'article':
|
|
||||||
ctoc_name_map['classOffset'] = entry['classOffset']
|
|
||||||
break
|
|
||||||
else :
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
# class names should always be in CNCX 0 - no offset
|
|
||||||
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
|
||||||
|
|
||||||
# Add description offset/description
|
|
||||||
if node.description :
|
|
||||||
d = self._clean_text_value(node.description)
|
|
||||||
ctoc_name_map['descriptionOffset'] = self._add_to_ctoc(d, self._ctoc_offset)
|
|
||||||
else :
|
|
||||||
ctoc_name_map['descriptionOffset'] = None
|
|
||||||
|
|
||||||
# Add author offset/attribution
|
|
||||||
if node.author :
|
|
||||||
a = self._clean_text_value(node.author)
|
|
||||||
ctoc_name_map['authorOffset'] = self._add_to_ctoc(a, self._ctoc_offset)
|
|
||||||
else :
|
|
||||||
ctoc_name_map['authorOffset'] = None
|
|
||||||
|
|
||||||
self._articleCount += 1
|
|
||||||
|
|
||||||
else :
|
|
||||||
raise NotImplementedError( \
|
|
||||||
'writer._generate_ctoc.add_node: title: %s has unrecognized klass: %s, playOrder: %d' % \
|
|
||||||
(node.title, node.klass, node.play_order))
|
|
||||||
|
|
||||||
# append this node's name_map to map
|
|
||||||
self._ctoc_map.append(ctoc_name_map)
|
|
||||||
|
|
||||||
def _generate_ctoc(self):
|
|
||||||
# Generate the compiled TOC strings
|
|
||||||
# Each node has 1-4 CTOC entries:
|
|
||||||
# Periodical (0xDF)
|
|
||||||
# title, class
|
|
||||||
# Section (0xFF)
|
|
||||||
# title, class
|
|
||||||
# Article (0x3F)
|
|
||||||
# title, class, description, author
|
|
||||||
# Chapter (0x0F)
|
|
||||||
# title, class
|
|
||||||
# nb: Chapters don't actually have @class, so we synthesize it
|
|
||||||
# in reader._toc_from_navpoint
|
|
||||||
|
|
||||||
toc = self._oeb.toc
|
|
||||||
reduced_toc = []
|
|
||||||
self._ctoc_map = [] # per node dictionary of {class/title/desc/author} offsets
|
|
||||||
self._last_toc_entry = None
|
|
||||||
#ctoc = StringIO()
|
|
||||||
self._ctoc = StringIO()
|
|
||||||
|
|
||||||
# Track the individual node types
|
|
||||||
self._periodicalCount = 0
|
|
||||||
self._sectionCount = 0
|
|
||||||
self._articleCount = 0
|
|
||||||
self._chapterCount = 0
|
|
||||||
|
|
||||||
#first = True
|
|
||||||
|
|
||||||
if self._conforming_periodical_toc :
|
|
||||||
self._oeb.logger.info('Generating structured CTOC ...')
|
|
||||||
for (child) in toc.iter():
|
|
||||||
if self.opts.verbose > 2 :
|
|
||||||
self._oeb.logger.info(" %s" % child)
|
|
||||||
self._add_structured_ctoc_node(child, self._ctoc)
|
|
||||||
#first = False
|
|
||||||
|
|
||||||
else :
|
|
||||||
self._oeb.logger.info('Generating flat CTOC ...')
|
|
||||||
previousOffset = -1
|
|
||||||
currentOffset = 0
|
|
||||||
for (i, child) in enumerate(toc.iterdescendants()):
|
|
||||||
# Only add chapters or articles at depth==1
|
|
||||||
# no class defaults to 'chapter'
|
|
||||||
if child.klass is None : child.klass = 'chapter'
|
|
||||||
if (child.klass == 'article' or child.klass == 'chapter') and child.depth() == 1 :
|
|
||||||
if self.opts.verbose > 2 :
|
|
||||||
self._oeb.logger.info("adding (klass:%s depth:%d) %s to flat ctoc" % \
|
|
||||||
(child.klass, child.depth(), child) )
|
|
||||||
|
|
||||||
# Test to see if this child's offset is the same as the previous child's
|
|
||||||
# offset, skip it
|
|
||||||
h = child.href
|
|
||||||
|
|
||||||
if h is None:
|
|
||||||
self._oeb.logger.warn(' Ignoring TOC entry with no href:',
|
|
||||||
child.title)
|
|
||||||
continue
|
|
||||||
if h not in self._id_offsets:
|
|
||||||
self._oeb.logger.warn(' Ignoring missing TOC entry:',
|
|
||||||
unicode(child))
|
|
||||||
continue
|
|
||||||
|
|
||||||
currentOffset = self._id_offsets[h]
|
|
||||||
# print "_generate_ctoc: child offset: 0x%X" % currentOffset
|
|
||||||
|
|
||||||
if currentOffset != previousOffset :
|
|
||||||
self._add_flat_ctoc_node(child, self._ctoc)
|
|
||||||
reduced_toc.append(child)
|
|
||||||
previousOffset = currentOffset
|
|
||||||
else :
|
|
||||||
self._oeb.logger.warn(" Ignoring redundant href: %s in '%s'" % (h, child.title))
|
|
||||||
|
|
||||||
else :
|
|
||||||
if self.opts.verbose > 2 :
|
|
||||||
self._oeb.logger.info("skipping class: %s depth %d at position %d" % \
|
|
||||||
(child.klass, child.depth(),i))
|
|
||||||
|
|
||||||
# Update the TOC with our edited version
|
|
||||||
self._oeb.toc.nodes = reduced_toc
|
|
||||||
|
|
||||||
# Instantiate a MobiDocument(mobitype)
|
|
||||||
if (not self._periodicalCount and not self._sectionCount and not self._articleCount) or \
|
|
||||||
not self.opts.mobi_periodical :
|
|
||||||
mobiType = 0x002
|
|
||||||
elif self._periodicalCount:
|
|
||||||
pt = None
|
|
||||||
if self._oeb.metadata.publication_type:
|
|
||||||
x = unicode(self._oeb.metadata.publication_type[0]).split(':')
|
|
||||||
if len(x) > 1:
|
|
||||||
pt = x[1]
|
|
||||||
mobiType = {'newspaper':0x101}.get(pt, 0x103)
|
|
||||||
else :
|
|
||||||
raise NotImplementedError('_generate_ctoc: Unrecognized document structured')
|
|
||||||
|
|
||||||
self._MobiDoc = MobiDocument(mobiType)
|
|
||||||
|
|
||||||
if self.opts.verbose > 2 :
|
|
||||||
structType = 'book'
|
|
||||||
if mobiType > 0x100 :
|
|
||||||
structType = 'flat periodical' if mobiType == 0x102 else 'structured periodical'
|
|
||||||
self._oeb.logger.info("Instantiating a %s MobiDocument of type 0x%X" % (structType, mobiType ) )
|
|
||||||
if mobiType > 0x100 :
|
|
||||||
self._oeb.logger.info("periodicalCount: %d sectionCount: %d articleCount: %d"% \
|
|
||||||
(self._periodicalCount, self._sectionCount, self._articleCount) )
|
|
||||||
else :
|
|
||||||
self._oeb.logger.info("chapterCount: %d" % self._chapterCount)
|
|
||||||
|
|
||||||
# Apparently the CTOC must end with a null byte
|
|
||||||
self._ctoc.write('\0')
|
|
||||||
|
|
||||||
ctoc = self._ctoc.getvalue()
|
|
||||||
rec_count = len(self._ctoc_records)
|
|
||||||
self._oeb.logger.info(" CNCX utilization: %d %s %.0f%% full" % \
|
|
||||||
(rec_count + 1, 'records, last record' if rec_count else 'record,',
|
|
||||||
len(ctoc)/655) )
|
|
||||||
|
|
||||||
return align_block(ctoc)
|
|
||||||
|
|
||||||
def _write_periodical_node(self, indxt, indices, index, offset, length, count, firstSection, lastSection) :
|
def _write_periodical_node(self, indxt, indices, index, offset, length, count, firstSection, lastSection) :
|
||||||
pos = 0xc0 + indxt.tell()
|
pos = 0xc0 + indxt.tell()
|
||||||
indices.write(pack('>H', pos)) # Save the offset for IDXTIndices
|
indices.write(pack('>H', pos)) # Save the offset for IDXTIndices
|
||||||
@ -2176,48 +1962,8 @@ class MobiWriter(object):
|
|||||||
indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
|
indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
|
||||||
indxt.write(decint(0, DECINT_FORWARD)) # unknown byte
|
indxt.write(decint(0, DECINT_FORWARD)) # unknown byte
|
||||||
|
|
||||||
def _compute_offset_length(self, i, node, entries) :
|
# }}}
|
||||||
h = node.href
|
|
||||||
if h not in self._id_offsets:
|
|
||||||
self._oeb.log.warning('Could not find TOC entry:', node.title)
|
|
||||||
return -1, -1
|
|
||||||
|
|
||||||
offset = self._id_offsets[h]
|
|
||||||
length = None
|
|
||||||
# Calculate length based on next entry's offset
|
|
||||||
for sibling in entries[i+1:]:
|
|
||||||
h2 = sibling.href
|
|
||||||
if h2 in self._id_offsets:
|
|
||||||
offset2 = self._id_offsets[h2]
|
|
||||||
if offset2 > offset:
|
|
||||||
length = offset2 - offset
|
|
||||||
break
|
|
||||||
if length is None:
|
|
||||||
length = self._content_length - offset
|
|
||||||
return offset, length
|
|
||||||
|
|
||||||
def _establish_document_structure(self) :
|
|
||||||
documentType = None
|
|
||||||
try :
|
|
||||||
klass = self._ctoc_map[0]['klass']
|
|
||||||
except :
|
|
||||||
klass = None
|
|
||||||
|
|
||||||
if klass == 'chapter' or klass == None :
|
|
||||||
documentType = 'book'
|
|
||||||
if self.opts.verbose > 2 :
|
|
||||||
self._oeb.logger.info("Adding a MobiBook to self._MobiDoc")
|
|
||||||
self._MobiDoc.documentStructure = MobiBook()
|
|
||||||
|
|
||||||
elif klass == 'periodical' :
|
|
||||||
documentType = klass
|
|
||||||
if self.opts.verbose > 2 :
|
|
||||||
self._oeb.logger.info("Adding a MobiPeriodical to self._MobiDoc")
|
|
||||||
self._MobiDoc.documentStructure = MobiPeriodical(self._MobiDoc.getNextNode())
|
|
||||||
self._MobiDoc.documentStructure.startAddress = self._anchor_offset_kindle
|
|
||||||
else :
|
|
||||||
raise NotImplementedError('_establish_document_structure: unrecognized klass: %s' % klass)
|
|
||||||
return documentType
|
|
||||||
|
|
||||||
def _generate_section_indices(self, child, currentSection, myPeriodical, myDoc ) :
|
def _generate_section_indices(self, child, currentSection, myPeriodical, myDoc ) :
|
||||||
sectionTitles = list(child.iter())[1:]
|
sectionTitles = list(child.iter())[1:]
|
||||||
@ -2495,6 +2241,270 @@ class MobiWriter(object):
|
|||||||
last_name, c = self._add_periodical_structured_articles(myDoc, indxt, indices)
|
last_name, c = self._add_periodical_structured_articles(myDoc, indxt, indices)
|
||||||
|
|
||||||
return align_block(indxt.getvalue()), c, align_block(indices.getvalue()), last_name
|
return align_block(indxt.getvalue()), c, align_block(indices.getvalue()), last_name
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# CTOC {{{
|
||||||
|
def _add_to_ctoc(self, ctoc_str, record_offset):
|
||||||
|
# Write vwilen + string to ctoc
|
||||||
|
# Return offset
|
||||||
|
# Is there enough room for this string in the current ctoc record?
|
||||||
|
if 0xfbf8 - self._ctoc.tell() < 2 + len(ctoc_str):
|
||||||
|
# flush this ctoc, start a new one
|
||||||
|
# print "closing ctoc_record at 0x%X" % self._ctoc.tell()
|
||||||
|
# print "starting new ctoc with '%-50.50s ...'" % ctoc_str
|
||||||
|
# pad with 00
|
||||||
|
pad = 0xfbf8 - self._ctoc.tell()
|
||||||
|
# print "padding %d bytes of 00" % pad
|
||||||
|
self._ctoc.write('\0' * (pad))
|
||||||
|
self._ctoc_records.append(self._ctoc.getvalue())
|
||||||
|
self._ctoc.truncate(0)
|
||||||
|
self._ctoc_offset += 0x10000
|
||||||
|
record_offset = self._ctoc_offset
|
||||||
|
|
||||||
|
offset = self._ctoc.tell() + record_offset
|
||||||
|
self._ctoc.write(decint(len(ctoc_str), DECINT_FORWARD) + ctoc_str)
|
||||||
|
return offset
|
||||||
|
|
||||||
|
def _add_flat_ctoc_node(self, node, ctoc, title=None):
|
||||||
|
# Process 'chapter' or 'article' nodes only, force either to 'chapter'
|
||||||
|
t = node.title if title is None else title
|
||||||
|
t = self._clean_text_value(t)
|
||||||
|
self._last_toc_entry = t
|
||||||
|
|
||||||
|
# Create an empty dictionary for this node
|
||||||
|
ctoc_name_map = {}
|
||||||
|
|
||||||
|
# article = chapter
|
||||||
|
if node.klass == 'article' :
|
||||||
|
ctoc_name_map['klass'] = 'chapter'
|
||||||
|
else :
|
||||||
|
ctoc_name_map['klass'] = node.klass
|
||||||
|
|
||||||
|
# Add title offset to name map
|
||||||
|
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||||
|
self._chapterCount += 1
|
||||||
|
|
||||||
|
# append this node's name_map to map
|
||||||
|
self._ctoc_map.append(ctoc_name_map)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def _add_structured_ctoc_node(self, node, ctoc, title=None):
|
||||||
|
# Process 'periodical', 'section' and 'article'
|
||||||
|
|
||||||
|
# Fetch the offset referencing the current ctoc_record
|
||||||
|
if node.klass is None :
|
||||||
|
return
|
||||||
|
t = node.title if title is None else title
|
||||||
|
t = self._clean_text_value(t)
|
||||||
|
self._last_toc_entry = t
|
||||||
|
|
||||||
|
# Create an empty dictionary for this node
|
||||||
|
ctoc_name_map = {}
|
||||||
|
|
||||||
|
# Add the klass of this node
|
||||||
|
ctoc_name_map['klass'] = node.klass
|
||||||
|
|
||||||
|
if node.klass == 'chapter':
|
||||||
|
# Add title offset to name map
|
||||||
|
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||||
|
self._chapterCount += 1
|
||||||
|
|
||||||
|
elif node.klass == 'periodical' :
|
||||||
|
# Add title offset
|
||||||
|
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||||
|
|
||||||
|
# Look for existing class entry 'periodical' in _ctoc_map
|
||||||
|
for entry in self._ctoc_map:
|
||||||
|
if entry['klass'] == 'periodical':
|
||||||
|
# Use the pre-existing instance
|
||||||
|
ctoc_name_map['classOffset'] = entry['classOffset']
|
||||||
|
break
|
||||||
|
else :
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# class names should always be in CNCX 0 - no offset
|
||||||
|
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
||||||
|
|
||||||
|
self._periodicalCount += 1
|
||||||
|
|
||||||
|
elif node.klass == 'section' :
|
||||||
|
# Add title offset
|
||||||
|
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||||
|
|
||||||
|
# Look for existing class entry 'section' in _ctoc_map
|
||||||
|
for entry in self._ctoc_map:
|
||||||
|
if entry['klass'] == 'section':
|
||||||
|
# Use the pre-existing instance
|
||||||
|
ctoc_name_map['classOffset'] = entry['classOffset']
|
||||||
|
break
|
||||||
|
else :
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# class names should always be in CNCX 0 - no offset
|
||||||
|
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
||||||
|
|
||||||
|
self._sectionCount += 1
|
||||||
|
|
||||||
|
elif node.klass == 'article' :
|
||||||
|
# Add title offset/title
|
||||||
|
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||||
|
|
||||||
|
# Look for existing class entry 'article' in _ctoc_map
|
||||||
|
for entry in self._ctoc_map:
|
||||||
|
if entry['klass'] == 'article':
|
||||||
|
ctoc_name_map['classOffset'] = entry['classOffset']
|
||||||
|
break
|
||||||
|
else :
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# class names should always be in CNCX 0 - no offset
|
||||||
|
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
||||||
|
|
||||||
|
# Add description offset/description
|
||||||
|
if node.description :
|
||||||
|
d = self._clean_text_value(node.description)
|
||||||
|
ctoc_name_map['descriptionOffset'] = self._add_to_ctoc(d, self._ctoc_offset)
|
||||||
|
else :
|
||||||
|
ctoc_name_map['descriptionOffset'] = None
|
||||||
|
|
||||||
|
# Add author offset/attribution
|
||||||
|
if node.author :
|
||||||
|
a = self._clean_text_value(node.author)
|
||||||
|
ctoc_name_map['authorOffset'] = self._add_to_ctoc(a, self._ctoc_offset)
|
||||||
|
else :
|
||||||
|
ctoc_name_map['authorOffset'] = None
|
||||||
|
|
||||||
|
self._articleCount += 1
|
||||||
|
|
||||||
|
else :
|
||||||
|
raise NotImplementedError( \
|
||||||
|
'writer._generate_ctoc.add_node: title: %s has unrecognized klass: %s, playOrder: %d' % \
|
||||||
|
(node.title, node.klass, node.play_order))
|
||||||
|
|
||||||
|
# append this node's name_map to map
|
||||||
|
self._ctoc_map.append(ctoc_name_map)
|
||||||
|
|
||||||
|
def _generate_ctoc(self):
|
||||||
|
# Generate the compiled TOC strings
|
||||||
|
# Each node has 1-4 CTOC entries:
|
||||||
|
# Periodical (0xDF)
|
||||||
|
# title, class
|
||||||
|
# Section (0xFF)
|
||||||
|
# title, class
|
||||||
|
# Article (0x3F)
|
||||||
|
# title, class, description, author
|
||||||
|
# Chapter (0x0F)
|
||||||
|
# title, class
|
||||||
|
# nb: Chapters don't actually have @class, so we synthesize it
|
||||||
|
# in reader._toc_from_navpoint
|
||||||
|
|
||||||
|
toc = self._oeb.toc
|
||||||
|
reduced_toc = []
|
||||||
|
self._ctoc_map = [] # per node dictionary of {class/title/desc/author} offsets
|
||||||
|
self._last_toc_entry = None
|
||||||
|
#ctoc = StringIO()
|
||||||
|
self._ctoc = StringIO()
|
||||||
|
|
||||||
|
# Track the individual node types
|
||||||
|
self._periodicalCount = 0
|
||||||
|
self._sectionCount = 0
|
||||||
|
self._articleCount = 0
|
||||||
|
self._chapterCount = 0
|
||||||
|
|
||||||
|
#first = True
|
||||||
|
|
||||||
|
if self._conforming_periodical_toc :
|
||||||
|
self._oeb.logger.info('Generating structured CTOC ...')
|
||||||
|
for (child) in toc.iter():
|
||||||
|
if self.opts.verbose > 2 :
|
||||||
|
self._oeb.logger.info(" %s" % child)
|
||||||
|
self._add_structured_ctoc_node(child, self._ctoc)
|
||||||
|
#first = False
|
||||||
|
|
||||||
|
else :
|
||||||
|
self._oeb.logger.info('Generating flat CTOC ...')
|
||||||
|
previousOffset = -1
|
||||||
|
currentOffset = 0
|
||||||
|
for (i, child) in enumerate(toc.iterdescendants()):
|
||||||
|
# Only add chapters or articles at depth==1
|
||||||
|
# no class defaults to 'chapter'
|
||||||
|
if child.klass is None : child.klass = 'chapter'
|
||||||
|
if (child.klass == 'article' or child.klass == 'chapter') and child.depth() == 1 :
|
||||||
|
if self.opts.verbose > 2 :
|
||||||
|
self._oeb.logger.info("adding (klass:%s depth:%d) %s to flat ctoc" % \
|
||||||
|
(child.klass, child.depth(), child) )
|
||||||
|
|
||||||
|
# Test to see if this child's offset is the same as the previous child's
|
||||||
|
# offset, skip it
|
||||||
|
h = child.href
|
||||||
|
|
||||||
|
if h is None:
|
||||||
|
self._oeb.logger.warn(' Ignoring TOC entry with no href:',
|
||||||
|
child.title)
|
||||||
|
continue
|
||||||
|
if h not in self._id_offsets:
|
||||||
|
self._oeb.logger.warn(' Ignoring missing TOC entry:',
|
||||||
|
unicode(child))
|
||||||
|
continue
|
||||||
|
|
||||||
|
currentOffset = self._id_offsets[h]
|
||||||
|
# print "_generate_ctoc: child offset: 0x%X" % currentOffset
|
||||||
|
|
||||||
|
if currentOffset != previousOffset :
|
||||||
|
self._add_flat_ctoc_node(child, self._ctoc)
|
||||||
|
reduced_toc.append(child)
|
||||||
|
previousOffset = currentOffset
|
||||||
|
else :
|
||||||
|
self._oeb.logger.warn(" Ignoring redundant href: %s in '%s'" % (h, child.title))
|
||||||
|
|
||||||
|
else :
|
||||||
|
if self.opts.verbose > 2 :
|
||||||
|
self._oeb.logger.info("skipping class: %s depth %d at position %d" % \
|
||||||
|
(child.klass, child.depth(),i))
|
||||||
|
|
||||||
|
# Update the TOC with our edited version
|
||||||
|
self._oeb.toc.nodes = reduced_toc
|
||||||
|
|
||||||
|
# Instantiate a MobiDocument(mobitype)
|
||||||
|
if (not self._periodicalCount and not self._sectionCount and not self._articleCount) or \
|
||||||
|
not self.opts.mobi_periodical :
|
||||||
|
mobiType = 0x002
|
||||||
|
elif self._periodicalCount:
|
||||||
|
pt = None
|
||||||
|
if self._oeb.metadata.publication_type:
|
||||||
|
x = unicode(self._oeb.metadata.publication_type[0]).split(':')
|
||||||
|
if len(x) > 1:
|
||||||
|
pt = x[1]
|
||||||
|
mobiType = {'newspaper':0x101}.get(pt, 0x103)
|
||||||
|
else :
|
||||||
|
raise NotImplementedError('_generate_ctoc: Unrecognized document structured')
|
||||||
|
|
||||||
|
self._MobiDoc = MobiDocument(mobiType)
|
||||||
|
|
||||||
|
if self.opts.verbose > 2 :
|
||||||
|
structType = 'book'
|
||||||
|
if mobiType > 0x100 :
|
||||||
|
structType = 'flat periodical' if mobiType == 0x102 else 'structured periodical'
|
||||||
|
self._oeb.logger.info("Instantiating a %s MobiDocument of type 0x%X" % (structType, mobiType ) )
|
||||||
|
if mobiType > 0x100 :
|
||||||
|
self._oeb.logger.info("periodicalCount: %d sectionCount: %d articleCount: %d"% \
|
||||||
|
(self._periodicalCount, self._sectionCount, self._articleCount) )
|
||||||
|
else :
|
||||||
|
self._oeb.logger.info("chapterCount: %d" % self._chapterCount)
|
||||||
|
|
||||||
|
# Apparently the CTOC must end with a null byte
|
||||||
|
self._ctoc.write('\0')
|
||||||
|
|
||||||
|
ctoc = self._ctoc.getvalue()
|
||||||
|
rec_count = len(self._ctoc_records)
|
||||||
|
self._oeb.logger.info(" CNCX utilization: %d %s %.0f%% full" % \
|
||||||
|
(rec_count + 1, 'records, last record' if rec_count else 'record,',
|
||||||
|
len(ctoc)/655) )
|
||||||
|
|
||||||
|
return align_block(ctoc)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
class HTMLRecordData(object):
|
class HTMLRecordData(object):
|
||||||
""" A data structure containing indexing/navigation data for an HTML record """
|
""" A data structure containing indexing/navigation data for an HTML record """
|
||||||
|
Loading…
x
Reference in New Issue
Block a user