mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
...
This commit is contained in:
parent
59d9e15580
commit
dbefbfbd86
@ -1642,6 +1642,61 @@ class MobiWriter(object):
|
||||
for record in self._records:
|
||||
self._write(record)
|
||||
|
||||
def _clean_text_value(self, text):
|
||||
if text is not None and text.strip() :
|
||||
text = text.strip()
|
||||
if not isinstance(text, unicode):
|
||||
text = text.decode('utf-8', 'replace')
|
||||
text = normalize(text).encode('utf-8')
|
||||
else :
|
||||
text = "(none)".encode('utf-8')
|
||||
return text
|
||||
|
||||
def _compute_offset_length(self, i, node, entries) :
|
||||
h = node.href
|
||||
if h not in self._id_offsets:
|
||||
self._oeb.log.warning('Could not find TOC entry:', node.title)
|
||||
return -1, -1
|
||||
|
||||
offset = self._id_offsets[h]
|
||||
length = None
|
||||
# Calculate length based on next entry's offset
|
||||
for sibling in entries[i+1:]:
|
||||
h2 = sibling.href
|
||||
if h2 in self._id_offsets:
|
||||
offset2 = self._id_offsets[h2]
|
||||
if offset2 > offset:
|
||||
length = offset2 - offset
|
||||
break
|
||||
if length is None:
|
||||
length = self._content_length - offset
|
||||
return offset, length
|
||||
|
||||
def _establish_document_structure(self) :
|
||||
documentType = None
|
||||
try :
|
||||
klass = self._ctoc_map[0]['klass']
|
||||
except :
|
||||
klass = None
|
||||
|
||||
if klass == 'chapter' or klass == None :
|
||||
documentType = 'book'
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info("Adding a MobiBook to self._MobiDoc")
|
||||
self._MobiDoc.documentStructure = MobiBook()
|
||||
|
||||
elif klass == 'periodical' :
|
||||
documentType = klass
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info("Adding a MobiPeriodical to self._MobiDoc")
|
||||
self._MobiDoc.documentStructure = MobiPeriodical(self._MobiDoc.getNextNode())
|
||||
self._MobiDoc.documentStructure.startAddress = self._anchor_offset_kindle
|
||||
else :
|
||||
raise NotImplementedError('_establish_document_structure: unrecognized klass: %s' % klass)
|
||||
return documentType
|
||||
|
||||
# Index {{{
|
||||
|
||||
def _generate_index(self):
|
||||
self._oeb.log('Generating INDX ...')
|
||||
self._primary_index_record = None
|
||||
@ -1815,276 +1870,7 @@ class MobiWriter(object):
|
||||
open(os.path.join(t, n+'.bin'), 'wb').write(self._records[-(i+1)])
|
||||
self._oeb.log.debug('Index records dumped to', t)
|
||||
|
||||
def _clean_text_value(self, text):
|
||||
if text is not None and text.strip() :
|
||||
text = text.strip()
|
||||
if not isinstance(text, unicode):
|
||||
text = text.decode('utf-8', 'replace')
|
||||
text = normalize(text).encode('utf-8')
|
||||
else :
|
||||
text = "(none)".encode('utf-8')
|
||||
return text
|
||||
|
||||
def _add_to_ctoc(self, ctoc_str, record_offset):
|
||||
# Write vwilen + string to ctoc
|
||||
# Return offset
|
||||
# Is there enough room for this string in the current ctoc record?
|
||||
if 0xfbf8 - self._ctoc.tell() < 2 + len(ctoc_str):
|
||||
# flush this ctoc, start a new one
|
||||
# print "closing ctoc_record at 0x%X" % self._ctoc.tell()
|
||||
# print "starting new ctoc with '%-50.50s ...'" % ctoc_str
|
||||
# pad with 00
|
||||
pad = 0xfbf8 - self._ctoc.tell()
|
||||
# print "padding %d bytes of 00" % pad
|
||||
self._ctoc.write('\0' * (pad))
|
||||
self._ctoc_records.append(self._ctoc.getvalue())
|
||||
self._ctoc.truncate(0)
|
||||
self._ctoc_offset += 0x10000
|
||||
record_offset = self._ctoc_offset
|
||||
|
||||
offset = self._ctoc.tell() + record_offset
|
||||
self._ctoc.write(decint(len(ctoc_str), DECINT_FORWARD) + ctoc_str)
|
||||
return offset
|
||||
|
||||
def _add_flat_ctoc_node(self, node, ctoc, title=None):
|
||||
# Process 'chapter' or 'article' nodes only, force either to 'chapter'
|
||||
t = node.title if title is None else title
|
||||
t = self._clean_text_value(t)
|
||||
self._last_toc_entry = t
|
||||
|
||||
# Create an empty dictionary for this node
|
||||
ctoc_name_map = {}
|
||||
|
||||
# article = chapter
|
||||
if node.klass == 'article' :
|
||||
ctoc_name_map['klass'] = 'chapter'
|
||||
else :
|
||||
ctoc_name_map['klass'] = node.klass
|
||||
|
||||
# Add title offset to name map
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
self._chapterCount += 1
|
||||
|
||||
# append this node's name_map to map
|
||||
self._ctoc_map.append(ctoc_name_map)
|
||||
|
||||
return
|
||||
|
||||
def _add_structured_ctoc_node(self, node, ctoc, title=None):
|
||||
# Process 'periodical', 'section' and 'article'
|
||||
|
||||
# Fetch the offset referencing the current ctoc_record
|
||||
if node.klass is None :
|
||||
return
|
||||
t = node.title if title is None else title
|
||||
t = self._clean_text_value(t)
|
||||
self._last_toc_entry = t
|
||||
|
||||
# Create an empty dictionary for this node
|
||||
ctoc_name_map = {}
|
||||
|
||||
# Add the klass of this node
|
||||
ctoc_name_map['klass'] = node.klass
|
||||
|
||||
if node.klass == 'chapter':
|
||||
# Add title offset to name map
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
self._chapterCount += 1
|
||||
|
||||
elif node.klass == 'periodical' :
|
||||
# Add title offset
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
|
||||
# Look for existing class entry 'periodical' in _ctoc_map
|
||||
for entry in self._ctoc_map:
|
||||
if entry['klass'] == 'periodical':
|
||||
# Use the pre-existing instance
|
||||
ctoc_name_map['classOffset'] = entry['classOffset']
|
||||
break
|
||||
else :
|
||||
continue
|
||||
else:
|
||||
# class names should always be in CNCX 0 - no offset
|
||||
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
||||
|
||||
self._periodicalCount += 1
|
||||
|
||||
elif node.klass == 'section' :
|
||||
# Add title offset
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
|
||||
# Look for existing class entry 'section' in _ctoc_map
|
||||
for entry in self._ctoc_map:
|
||||
if entry['klass'] == 'section':
|
||||
# Use the pre-existing instance
|
||||
ctoc_name_map['classOffset'] = entry['classOffset']
|
||||
break
|
||||
else :
|
||||
continue
|
||||
else:
|
||||
# class names should always be in CNCX 0 - no offset
|
||||
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
||||
|
||||
self._sectionCount += 1
|
||||
|
||||
elif node.klass == 'article' :
|
||||
# Add title offset/title
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
|
||||
# Look for existing class entry 'article' in _ctoc_map
|
||||
for entry in self._ctoc_map:
|
||||
if entry['klass'] == 'article':
|
||||
ctoc_name_map['classOffset'] = entry['classOffset']
|
||||
break
|
||||
else :
|
||||
continue
|
||||
else:
|
||||
# class names should always be in CNCX 0 - no offset
|
||||
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
||||
|
||||
# Add description offset/description
|
||||
if node.description :
|
||||
d = self._clean_text_value(node.description)
|
||||
ctoc_name_map['descriptionOffset'] = self._add_to_ctoc(d, self._ctoc_offset)
|
||||
else :
|
||||
ctoc_name_map['descriptionOffset'] = None
|
||||
|
||||
# Add author offset/attribution
|
||||
if node.author :
|
||||
a = self._clean_text_value(node.author)
|
||||
ctoc_name_map['authorOffset'] = self._add_to_ctoc(a, self._ctoc_offset)
|
||||
else :
|
||||
ctoc_name_map['authorOffset'] = None
|
||||
|
||||
self._articleCount += 1
|
||||
|
||||
else :
|
||||
raise NotImplementedError( \
|
||||
'writer._generate_ctoc.add_node: title: %s has unrecognized klass: %s, playOrder: %d' % \
|
||||
(node.title, node.klass, node.play_order))
|
||||
|
||||
# append this node's name_map to map
|
||||
self._ctoc_map.append(ctoc_name_map)
|
||||
|
||||
def _generate_ctoc(self):
|
||||
# Generate the compiled TOC strings
|
||||
# Each node has 1-4 CTOC entries:
|
||||
# Periodical (0xDF)
|
||||
# title, class
|
||||
# Section (0xFF)
|
||||
# title, class
|
||||
# Article (0x3F)
|
||||
# title, class, description, author
|
||||
# Chapter (0x0F)
|
||||
# title, class
|
||||
# nb: Chapters don't actually have @class, so we synthesize it
|
||||
# in reader._toc_from_navpoint
|
||||
|
||||
toc = self._oeb.toc
|
||||
reduced_toc = []
|
||||
self._ctoc_map = [] # per node dictionary of {class/title/desc/author} offsets
|
||||
self._last_toc_entry = None
|
||||
#ctoc = StringIO()
|
||||
self._ctoc = StringIO()
|
||||
|
||||
# Track the individual node types
|
||||
self._periodicalCount = 0
|
||||
self._sectionCount = 0
|
||||
self._articleCount = 0
|
||||
self._chapterCount = 0
|
||||
|
||||
#first = True
|
||||
|
||||
if self._conforming_periodical_toc :
|
||||
self._oeb.logger.info('Generating structured CTOC ...')
|
||||
for (child) in toc.iter():
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info(" %s" % child)
|
||||
self._add_structured_ctoc_node(child, self._ctoc)
|
||||
#first = False
|
||||
|
||||
else :
|
||||
self._oeb.logger.info('Generating flat CTOC ...')
|
||||
previousOffset = -1
|
||||
currentOffset = 0
|
||||
for (i, child) in enumerate(toc.iterdescendants()):
|
||||
# Only add chapters or articles at depth==1
|
||||
# no class defaults to 'chapter'
|
||||
if child.klass is None : child.klass = 'chapter'
|
||||
if (child.klass == 'article' or child.klass == 'chapter') and child.depth() == 1 :
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info("adding (klass:%s depth:%d) %s to flat ctoc" % \
|
||||
(child.klass, child.depth(), child) )
|
||||
|
||||
# Test to see if this child's offset is the same as the previous child's
|
||||
# offset, skip it
|
||||
h = child.href
|
||||
|
||||
if h is None:
|
||||
self._oeb.logger.warn(' Ignoring TOC entry with no href:',
|
||||
child.title)
|
||||
continue
|
||||
if h not in self._id_offsets:
|
||||
self._oeb.logger.warn(' Ignoring missing TOC entry:',
|
||||
unicode(child))
|
||||
continue
|
||||
|
||||
currentOffset = self._id_offsets[h]
|
||||
# print "_generate_ctoc: child offset: 0x%X" % currentOffset
|
||||
|
||||
if currentOffset != previousOffset :
|
||||
self._add_flat_ctoc_node(child, self._ctoc)
|
||||
reduced_toc.append(child)
|
||||
previousOffset = currentOffset
|
||||
else :
|
||||
self._oeb.logger.warn(" Ignoring redundant href: %s in '%s'" % (h, child.title))
|
||||
|
||||
else :
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info("skipping class: %s depth %d at position %d" % \
|
||||
(child.klass, child.depth(),i))
|
||||
|
||||
# Update the TOC with our edited version
|
||||
self._oeb.toc.nodes = reduced_toc
|
||||
|
||||
# Instantiate a MobiDocument(mobitype)
|
||||
if (not self._periodicalCount and not self._sectionCount and not self._articleCount) or \
|
||||
not self.opts.mobi_periodical :
|
||||
mobiType = 0x002
|
||||
elif self._periodicalCount:
|
||||
pt = None
|
||||
if self._oeb.metadata.publication_type:
|
||||
x = unicode(self._oeb.metadata.publication_type[0]).split(':')
|
||||
if len(x) > 1:
|
||||
pt = x[1]
|
||||
mobiType = {'newspaper':0x101}.get(pt, 0x103)
|
||||
else :
|
||||
raise NotImplementedError('_generate_ctoc: Unrecognized document structured')
|
||||
|
||||
self._MobiDoc = MobiDocument(mobiType)
|
||||
|
||||
if self.opts.verbose > 2 :
|
||||
structType = 'book'
|
||||
if mobiType > 0x100 :
|
||||
structType = 'flat periodical' if mobiType == 0x102 else 'structured periodical'
|
||||
self._oeb.logger.info("Instantiating a %s MobiDocument of type 0x%X" % (structType, mobiType ) )
|
||||
if mobiType > 0x100 :
|
||||
self._oeb.logger.info("periodicalCount: %d sectionCount: %d articleCount: %d"% \
|
||||
(self._periodicalCount, self._sectionCount, self._articleCount) )
|
||||
else :
|
||||
self._oeb.logger.info("chapterCount: %d" % self._chapterCount)
|
||||
|
||||
# Apparently the CTOC must end with a null byte
|
||||
self._ctoc.write('\0')
|
||||
|
||||
ctoc = self._ctoc.getvalue()
|
||||
rec_count = len(self._ctoc_records)
|
||||
self._oeb.logger.info(" CNCX utilization: %d %s %.0f%% full" % \
|
||||
(rec_count + 1, 'records, last record' if rec_count else 'record,',
|
||||
len(ctoc)/655) )
|
||||
|
||||
return align_block(ctoc)
|
||||
|
||||
# Index nodes {{{
|
||||
def _write_periodical_node(self, indxt, indices, index, offset, length, count, firstSection, lastSection) :
|
||||
pos = 0xc0 + indxt.tell()
|
||||
indices.write(pack('>H', pos)) # Save the offset for IDXTIndices
|
||||
@ -2176,48 +1962,8 @@ class MobiWriter(object):
|
||||
indxt.write(decint(self._ctoc_map[index]['titleOffset'], DECINT_FORWARD)) # vwi title offset in CNCX
|
||||
indxt.write(decint(0, DECINT_FORWARD)) # unknown byte
|
||||
|
||||
def _compute_offset_length(self, i, node, entries) :
|
||||
h = node.href
|
||||
if h not in self._id_offsets:
|
||||
self._oeb.log.warning('Could not find TOC entry:', node.title)
|
||||
return -1, -1
|
||||
# }}}
|
||||
|
||||
offset = self._id_offsets[h]
|
||||
length = None
|
||||
# Calculate length based on next entry's offset
|
||||
for sibling in entries[i+1:]:
|
||||
h2 = sibling.href
|
||||
if h2 in self._id_offsets:
|
||||
offset2 = self._id_offsets[h2]
|
||||
if offset2 > offset:
|
||||
length = offset2 - offset
|
||||
break
|
||||
if length is None:
|
||||
length = self._content_length - offset
|
||||
return offset, length
|
||||
|
||||
def _establish_document_structure(self) :
|
||||
documentType = None
|
||||
try :
|
||||
klass = self._ctoc_map[0]['klass']
|
||||
except :
|
||||
klass = None
|
||||
|
||||
if klass == 'chapter' or klass == None :
|
||||
documentType = 'book'
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info("Adding a MobiBook to self._MobiDoc")
|
||||
self._MobiDoc.documentStructure = MobiBook()
|
||||
|
||||
elif klass == 'periodical' :
|
||||
documentType = klass
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info("Adding a MobiPeriodical to self._MobiDoc")
|
||||
self._MobiDoc.documentStructure = MobiPeriodical(self._MobiDoc.getNextNode())
|
||||
self._MobiDoc.documentStructure.startAddress = self._anchor_offset_kindle
|
||||
else :
|
||||
raise NotImplementedError('_establish_document_structure: unrecognized klass: %s' % klass)
|
||||
return documentType
|
||||
|
||||
def _generate_section_indices(self, child, currentSection, myPeriodical, myDoc ) :
|
||||
sectionTitles = list(child.iter())[1:]
|
||||
@ -2495,6 +2241,270 @@ class MobiWriter(object):
|
||||
last_name, c = self._add_periodical_structured_articles(myDoc, indxt, indices)
|
||||
|
||||
return align_block(indxt.getvalue()), c, align_block(indices.getvalue()), last_name
|
||||
# }}}
|
||||
|
||||
# CTOC {{{
|
||||
def _add_to_ctoc(self, ctoc_str, record_offset):
|
||||
# Write vwilen + string to ctoc
|
||||
# Return offset
|
||||
# Is there enough room for this string in the current ctoc record?
|
||||
if 0xfbf8 - self._ctoc.tell() < 2 + len(ctoc_str):
|
||||
# flush this ctoc, start a new one
|
||||
# print "closing ctoc_record at 0x%X" % self._ctoc.tell()
|
||||
# print "starting new ctoc with '%-50.50s ...'" % ctoc_str
|
||||
# pad with 00
|
||||
pad = 0xfbf8 - self._ctoc.tell()
|
||||
# print "padding %d bytes of 00" % pad
|
||||
self._ctoc.write('\0' * (pad))
|
||||
self._ctoc_records.append(self._ctoc.getvalue())
|
||||
self._ctoc.truncate(0)
|
||||
self._ctoc_offset += 0x10000
|
||||
record_offset = self._ctoc_offset
|
||||
|
||||
offset = self._ctoc.tell() + record_offset
|
||||
self._ctoc.write(decint(len(ctoc_str), DECINT_FORWARD) + ctoc_str)
|
||||
return offset
|
||||
|
||||
def _add_flat_ctoc_node(self, node, ctoc, title=None):
|
||||
# Process 'chapter' or 'article' nodes only, force either to 'chapter'
|
||||
t = node.title if title is None else title
|
||||
t = self._clean_text_value(t)
|
||||
self._last_toc_entry = t
|
||||
|
||||
# Create an empty dictionary for this node
|
||||
ctoc_name_map = {}
|
||||
|
||||
# article = chapter
|
||||
if node.klass == 'article' :
|
||||
ctoc_name_map['klass'] = 'chapter'
|
||||
else :
|
||||
ctoc_name_map['klass'] = node.klass
|
||||
|
||||
# Add title offset to name map
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
self._chapterCount += 1
|
||||
|
||||
# append this node's name_map to map
|
||||
self._ctoc_map.append(ctoc_name_map)
|
||||
|
||||
return
|
||||
|
||||
def _add_structured_ctoc_node(self, node, ctoc, title=None):
|
||||
# Process 'periodical', 'section' and 'article'
|
||||
|
||||
# Fetch the offset referencing the current ctoc_record
|
||||
if node.klass is None :
|
||||
return
|
||||
t = node.title if title is None else title
|
||||
t = self._clean_text_value(t)
|
||||
self._last_toc_entry = t
|
||||
|
||||
# Create an empty dictionary for this node
|
||||
ctoc_name_map = {}
|
||||
|
||||
# Add the klass of this node
|
||||
ctoc_name_map['klass'] = node.klass
|
||||
|
||||
if node.klass == 'chapter':
|
||||
# Add title offset to name map
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
self._chapterCount += 1
|
||||
|
||||
elif node.klass == 'periodical' :
|
||||
# Add title offset
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
|
||||
# Look for existing class entry 'periodical' in _ctoc_map
|
||||
for entry in self._ctoc_map:
|
||||
if entry['klass'] == 'periodical':
|
||||
# Use the pre-existing instance
|
||||
ctoc_name_map['classOffset'] = entry['classOffset']
|
||||
break
|
||||
else :
|
||||
continue
|
||||
else:
|
||||
# class names should always be in CNCX 0 - no offset
|
||||
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
||||
|
||||
self._periodicalCount += 1
|
||||
|
||||
elif node.klass == 'section' :
|
||||
# Add title offset
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
|
||||
# Look for existing class entry 'section' in _ctoc_map
|
||||
for entry in self._ctoc_map:
|
||||
if entry['klass'] == 'section':
|
||||
# Use the pre-existing instance
|
||||
ctoc_name_map['classOffset'] = entry['classOffset']
|
||||
break
|
||||
else :
|
||||
continue
|
||||
else:
|
||||
# class names should always be in CNCX 0 - no offset
|
||||
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
||||
|
||||
self._sectionCount += 1
|
||||
|
||||
elif node.klass == 'article' :
|
||||
# Add title offset/title
|
||||
ctoc_name_map['titleOffset'] = self._add_to_ctoc(t, self._ctoc_offset)
|
||||
|
||||
# Look for existing class entry 'article' in _ctoc_map
|
||||
for entry in self._ctoc_map:
|
||||
if entry['klass'] == 'article':
|
||||
ctoc_name_map['classOffset'] = entry['classOffset']
|
||||
break
|
||||
else :
|
||||
continue
|
||||
else:
|
||||
# class names should always be in CNCX 0 - no offset
|
||||
ctoc_name_map['classOffset'] = self._add_to_ctoc(node.klass, 0)
|
||||
|
||||
# Add description offset/description
|
||||
if node.description :
|
||||
d = self._clean_text_value(node.description)
|
||||
ctoc_name_map['descriptionOffset'] = self._add_to_ctoc(d, self._ctoc_offset)
|
||||
else :
|
||||
ctoc_name_map['descriptionOffset'] = None
|
||||
|
||||
# Add author offset/attribution
|
||||
if node.author :
|
||||
a = self._clean_text_value(node.author)
|
||||
ctoc_name_map['authorOffset'] = self._add_to_ctoc(a, self._ctoc_offset)
|
||||
else :
|
||||
ctoc_name_map['authorOffset'] = None
|
||||
|
||||
self._articleCount += 1
|
||||
|
||||
else :
|
||||
raise NotImplementedError( \
|
||||
'writer._generate_ctoc.add_node: title: %s has unrecognized klass: %s, playOrder: %d' % \
|
||||
(node.title, node.klass, node.play_order))
|
||||
|
||||
# append this node's name_map to map
|
||||
self._ctoc_map.append(ctoc_name_map)
|
||||
|
||||
def _generate_ctoc(self):
|
||||
# Generate the compiled TOC strings
|
||||
# Each node has 1-4 CTOC entries:
|
||||
# Periodical (0xDF)
|
||||
# title, class
|
||||
# Section (0xFF)
|
||||
# title, class
|
||||
# Article (0x3F)
|
||||
# title, class, description, author
|
||||
# Chapter (0x0F)
|
||||
# title, class
|
||||
# nb: Chapters don't actually have @class, so we synthesize it
|
||||
# in reader._toc_from_navpoint
|
||||
|
||||
toc = self._oeb.toc
|
||||
reduced_toc = []
|
||||
self._ctoc_map = [] # per node dictionary of {class/title/desc/author} offsets
|
||||
self._last_toc_entry = None
|
||||
#ctoc = StringIO()
|
||||
self._ctoc = StringIO()
|
||||
|
||||
# Track the individual node types
|
||||
self._periodicalCount = 0
|
||||
self._sectionCount = 0
|
||||
self._articleCount = 0
|
||||
self._chapterCount = 0
|
||||
|
||||
#first = True
|
||||
|
||||
if self._conforming_periodical_toc :
|
||||
self._oeb.logger.info('Generating structured CTOC ...')
|
||||
for (child) in toc.iter():
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info(" %s" % child)
|
||||
self._add_structured_ctoc_node(child, self._ctoc)
|
||||
#first = False
|
||||
|
||||
else :
|
||||
self._oeb.logger.info('Generating flat CTOC ...')
|
||||
previousOffset = -1
|
||||
currentOffset = 0
|
||||
for (i, child) in enumerate(toc.iterdescendants()):
|
||||
# Only add chapters or articles at depth==1
|
||||
# no class defaults to 'chapter'
|
||||
if child.klass is None : child.klass = 'chapter'
|
||||
if (child.klass == 'article' or child.klass == 'chapter') and child.depth() == 1 :
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info("adding (klass:%s depth:%d) %s to flat ctoc" % \
|
||||
(child.klass, child.depth(), child) )
|
||||
|
||||
# Test to see if this child's offset is the same as the previous child's
|
||||
# offset, skip it
|
||||
h = child.href
|
||||
|
||||
if h is None:
|
||||
self._oeb.logger.warn(' Ignoring TOC entry with no href:',
|
||||
child.title)
|
||||
continue
|
||||
if h not in self._id_offsets:
|
||||
self._oeb.logger.warn(' Ignoring missing TOC entry:',
|
||||
unicode(child))
|
||||
continue
|
||||
|
||||
currentOffset = self._id_offsets[h]
|
||||
# print "_generate_ctoc: child offset: 0x%X" % currentOffset
|
||||
|
||||
if currentOffset != previousOffset :
|
||||
self._add_flat_ctoc_node(child, self._ctoc)
|
||||
reduced_toc.append(child)
|
||||
previousOffset = currentOffset
|
||||
else :
|
||||
self._oeb.logger.warn(" Ignoring redundant href: %s in '%s'" % (h, child.title))
|
||||
|
||||
else :
|
||||
if self.opts.verbose > 2 :
|
||||
self._oeb.logger.info("skipping class: %s depth %d at position %d" % \
|
||||
(child.klass, child.depth(),i))
|
||||
|
||||
# Update the TOC with our edited version
|
||||
self._oeb.toc.nodes = reduced_toc
|
||||
|
||||
# Instantiate a MobiDocument(mobitype)
|
||||
if (not self._periodicalCount and not self._sectionCount and not self._articleCount) or \
|
||||
not self.opts.mobi_periodical :
|
||||
mobiType = 0x002
|
||||
elif self._periodicalCount:
|
||||
pt = None
|
||||
if self._oeb.metadata.publication_type:
|
||||
x = unicode(self._oeb.metadata.publication_type[0]).split(':')
|
||||
if len(x) > 1:
|
||||
pt = x[1]
|
||||
mobiType = {'newspaper':0x101}.get(pt, 0x103)
|
||||
else :
|
||||
raise NotImplementedError('_generate_ctoc: Unrecognized document structured')
|
||||
|
||||
self._MobiDoc = MobiDocument(mobiType)
|
||||
|
||||
if self.opts.verbose > 2 :
|
||||
structType = 'book'
|
||||
if mobiType > 0x100 :
|
||||
structType = 'flat periodical' if mobiType == 0x102 else 'structured periodical'
|
||||
self._oeb.logger.info("Instantiating a %s MobiDocument of type 0x%X" % (structType, mobiType ) )
|
||||
if mobiType > 0x100 :
|
||||
self._oeb.logger.info("periodicalCount: %d sectionCount: %d articleCount: %d"% \
|
||||
(self._periodicalCount, self._sectionCount, self._articleCount) )
|
||||
else :
|
||||
self._oeb.logger.info("chapterCount: %d" % self._chapterCount)
|
||||
|
||||
# Apparently the CTOC must end with a null byte
|
||||
self._ctoc.write('\0')
|
||||
|
||||
ctoc = self._ctoc.getvalue()
|
||||
rec_count = len(self._ctoc_records)
|
||||
self._oeb.logger.info(" CNCX utilization: %d %s %.0f%% full" % \
|
||||
(rec_count + 1, 'records, last record' if rec_count else 'record,',
|
||||
len(ctoc)/655) )
|
||||
|
||||
return align_block(ctoc)
|
||||
|
||||
# }}}
|
||||
|
||||
class HTMLRecordData(object):
|
||||
""" A data structure containing indexing/navigation data for an HTML record """
|
||||
|
Loading…
x
Reference in New Issue
Block a user