MOBI Output: Strip inline nav sections from periodicals

This commit is contained in:
Kovid Goyal 2009-07-18 17:34:35 -06:00
parent 50d6e86de8
commit d1aabbaa1f
2 changed files with 109 additions and 47 deletions

View File

@ -67,31 +67,62 @@ class MOBIOutput(OutputFormatPlugin):
self.oeb.manifest.add(id, href, 'image/gif', data=raw)
self.oeb.guide.add('masthead', 'Masthead Image', href)
def dump_toc(self, toc) :
self.log( "\n >>> TOC contents <<<")
self.log( " toc.title: %s" % toc.title)
self.log( " toc.href: %s" % toc.href)
for periodical in toc.nodes :
self.log( "\tperiodical title: %s" % periodical.title)
self.log( "\t href: %s" % periodical.href)
for section in periodical :
self.log( "\t\tsection title: %s" % section.title)
self.log( "\t\tfirst article: %s" % section.href)
for article in section :
self.log( "\t\t\tarticle title: %s" % repr(article.title))
self.log( "\t\t\t href: %s" % article.href)
def dump_manifest(self) :
self.log( "\n >>> Manifest entries <<<")
for href in self.oeb.manifest.hrefs :
self.log ("\t%s" % href)
def periodicalize_toc(self):
from calibre.ebooks.oeb.base import TOC
toc = self.oeb.toc
if toc and toc[0].klass != 'periodical':
start_href = self.oeb.spine[0].href
one, two = self.oeb.spine[0], self.oeb.spine[1]
self.log('Converting TOC for MOBI periodical indexing...')
articles = {}
if toc.depth() < 3:
# single section periodical
self.oeb.manifest.remove(one)
self.oeb.manifest.remove(two)
sections = [TOC(klass='section', title=_('All articles'),
href=start_href)]
href=self.oeb.spine[0].href)]
for x in toc:
sections[0].nodes.append(x)
else:
# multi-section periodical
sections = list(toc)
for x in sections:
for i,x in enumerate(sections):
x.klass = 'section'
articles = list(x)
if articles:
self.oeb.manifest.remove(self.oeb.manifest.hrefs[x.href])
x.href = articles[0].href
for sec in sections:
articles[id(sec)] = []
for a in list(sec):
a.klass = 'article'
articles[id(sec)].append(a)
sec.nodes.remove(a)
root = TOC(klass='periodical', href=start_href,
root = TOC(klass='periodical', href=self.oeb.spine[0].href,
title=unicode(self.oeb.metadata.title[0]))
for s in sections:
if articles[id(s)]:
for a in articles[id(s)]:
@ -103,6 +134,13 @@ class MOBIOutput(OutputFormatPlugin):
toc.nodes.append(root)
# Fix up the periodical href to point to first section href
toc.nodes[0].href = toc.nodes[0].nodes[0].href
# GR diagnostics
#self.dump_toc(toc)
#self.dump_manifest()
def convert(self, oeb, output_path, input_plugin, opts, log):
self.log, self.opts, self.oeb = log, opts, oeb

View File

@ -31,6 +31,7 @@ from calibre.ebooks.compression.palmdoc import compress_doc
INDEXING = True
FCIS_FLIS = True
WRITE_PBREAKS = True
# TODO:
# - Optionally rasterize tables
@ -190,15 +191,10 @@ class Serializer(object):
if hrefs[path].media_type not in OEB_DOCS:
continue
if ref.type == 'other.start' :
# Kindle-specific 'Start Reading' directive
buffer.write('<reference title="Startup Page" ')
buffer.write('type="start" ')
self.serialize_href(ref.href)
# Space required or won't work, I kid you not
buffer.write(' />')
else:
buffer.write('<reference type="')
if ref.type.startswith('other.') :
self.serialize_text(ref.type.replace('other.',''), quot=True)
else :
self.serialize_text(ref.type, quot=True)
buffer.write('" ')
if ref.title is not None:
@ -208,6 +204,7 @@ class Serializer(object):
self.serialize_href(ref.href)
# Space required or won't work, I kid you not
buffer.write(' />')
buffer.write('</guide>')
def serialize_href(self, href, base=None):
@ -651,7 +648,9 @@ class MobiWriter(object):
# Commented out because structured docs don't count section changes in nodeCount
# self._HTMLRecords[thisRecord].currentSectionNodeCount += 1
'''
# *** This should check currentSectionNumber, because content could start late
GR's tweaked code for b14
if thisRecord > 0:
# If next article falls into a later record, bump thisRecord
thisRecordPrime = thisRecord
@ -667,6 +666,19 @@ class MobiWriter(object):
continue
else :
continue
'''
# *** This should check currentSectionNumber, because content could start late
if thisRecord > 0:
sectionChangesInThisRecord = True
sectionChangesInRecordNumber = thisRecord
self._currentSectionIndex += 1
self._HTMLRecords[thisRecord].nextSectionNumber = self._currentSectionIndex
# The following node opens the nextSection
self._HTMLRecords[thisRecord].nextSectionOpeningNode = myIndex
continue
else :
continue
# If no one has taken the openingNode slot, it must be us
# This could happen before detecting a section change
@ -1267,12 +1279,12 @@ class MobiWriter(object):
record.write(data)
# Marshall's utf-8 break code.
if WRITE_PBREAKS :
record.write(overlap)
record.write(pack('>B', len(overlap)))
nextra = 0
pbreak = 0
running = offset
while breaks and (breaks[0] - offset) < RECORD_SIZE:
# .pop returns item, removes it from list
pbreak = (breaks.pop(0) - running) >> 3
@ -1282,14 +1294,12 @@ class MobiWriter(object):
record.write(encoded)
running += pbreak << 3
nextra += len(encoded)
lsize = 1
while True:
size = decint(nextra + lsize, DECINT_BACKWARD)
if len(size) == lsize:
break
lsize += 1
record.write(size)
# Write Trailing Byte Sequence
@ -1370,8 +1380,13 @@ class MobiWriter(object):
metadata = self._oeb.metadata
exth = self._build_exth()
last_content_record = len(self._records) - 1
'''
if INDEXING and self._indexable:
self._generate_end_records()
'''
self._generate_end_records()
record0 = StringIO()
# The PalmDOC Header
record0.write(pack('>HHIHHHH', self._compression, 0,
@ -1468,7 +1483,7 @@ class MobiWriter(object):
record0.write('\0\0\0\x01')
# 0xb8 - 0xbb : FCIS record number
if FCIS_FLIS and self._indexable:
if FCIS_FLIS :
# Write these if FCIS/FLIS turned on
# 0xb8 - 0xbb : FCIS record number
record0.write(pack('>I', self._fcis_number))
@ -1501,16 +1516,25 @@ class MobiWriter(object):
record0.write(pack('>IIII', 0xffffffff, 0, 0xffffffff, 0xffffffff))
# 0xe0 - 0xe3 : Extra record data
# The '5' is a bitmask of extra record data at the end:
# Extra record data flags:
# - 0x1: <extra multibyte bytes><size> (?)
# - 0x2: <TBS indexing description of this HTML record><size> GR
# - 0x4: <uncrossable breaks><size>
# Of course, the formats aren't quite the same.
# GR: Use 7 for indexed files, 5 for unindexed
# Setting bit 2 (0x4) disables <guide><reference type="start"> functionality
'''
if INDEXING and self._indexable :
record0.write(pack('>I', 7))
else:
record0.write(pack('>I', 5))
'''
trailingDataFlags = 1
if self._indexable :
trailingDataFlags |= 2
if WRITE_PBREAKS :
trailingDataFlags |= 4
record0.write(pack('>I', trailingDataFlags))
# 0xe4 - 0xe7 : Primary index record
record0.write(pack('>I', 0xffffffff if self._primary_index_record is