MOBI Output: Strip inline nav sections from periodicals

This commit is contained in:
Kovid Goyal 2009-07-18 17:34:35 -06:00
parent 50d6e86de8
commit d1aabbaa1f
2 changed files with 109 additions and 47 deletions

View File

@ -67,31 +67,62 @@ class MOBIOutput(OutputFormatPlugin):
self.oeb.manifest.add(id, href, 'image/gif', data=raw) self.oeb.manifest.add(id, href, 'image/gif', data=raw)
self.oeb.guide.add('masthead', 'Masthead Image', href) self.oeb.guide.add('masthead', 'Masthead Image', href)
def dump_toc(self, toc) :
self.log( "\n >>> TOC contents <<<")
self.log( " toc.title: %s" % toc.title)
self.log( " toc.href: %s" % toc.href)
for periodical in toc.nodes :
self.log( "\tperiodical title: %s" % periodical.title)
self.log( "\t href: %s" % periodical.href)
for section in periodical :
self.log( "\t\tsection title: %s" % section.title)
self.log( "\t\tfirst article: %s" % section.href)
for article in section :
self.log( "\t\t\tarticle title: %s" % repr(article.title))
self.log( "\t\t\t href: %s" % article.href)
def dump_manifest(self) :
self.log( "\n >>> Manifest entries <<<")
for href in self.oeb.manifest.hrefs :
self.log ("\t%s" % href)
def periodicalize_toc(self): def periodicalize_toc(self):
from calibre.ebooks.oeb.base import TOC from calibre.ebooks.oeb.base import TOC
toc = self.oeb.toc toc = self.oeb.toc
if toc and toc[0].klass != 'periodical': if toc and toc[0].klass != 'periodical':
start_href = self.oeb.spine[0].href one, two = self.oeb.spine[0], self.oeb.spine[1]
self.log('Converting TOC for MOBI periodical indexing...') self.log('Converting TOC for MOBI periodical indexing...')
articles = {} articles = {}
if toc.depth() < 3: if toc.depth() < 3:
# single section periodical
self.oeb.manifest.remove(one)
self.oeb.manifest.remove(two)
sections = [TOC(klass='section', title=_('All articles'), sections = [TOC(klass='section', title=_('All articles'),
href=start_href)] href=self.oeb.spine[0].href)]
for x in toc: for x in toc:
sections[0].nodes.append(x) sections[0].nodes.append(x)
else: else:
# multi-section periodical
sections = list(toc) sections = list(toc)
for x in sections: for i,x in enumerate(sections):
x.klass = 'section' x.klass = 'section'
articles = list(x)
if articles:
self.oeb.manifest.remove(self.oeb.manifest.hrefs[x.href])
x.href = articles[0].href
for sec in sections: for sec in sections:
articles[id(sec)] = [] articles[id(sec)] = []
for a in list(sec): for a in list(sec):
a.klass = 'article' a.klass = 'article'
articles[id(sec)].append(a) articles[id(sec)].append(a)
sec.nodes.remove(a) sec.nodes.remove(a)
root = TOC(klass='periodical', href=start_href,
root = TOC(klass='periodical', href=self.oeb.spine[0].href,
title=unicode(self.oeb.metadata.title[0])) title=unicode(self.oeb.metadata.title[0]))
for s in sections: for s in sections:
if articles[id(s)]: if articles[id(s)]:
for a in articles[id(s)]: for a in articles[id(s)]:
@ -103,6 +134,13 @@ class MOBIOutput(OutputFormatPlugin):
toc.nodes.append(root) toc.nodes.append(root)
# Fix up the periodical href to point to first section href
toc.nodes[0].href = toc.nodes[0].nodes[0].href
# GR diagnostics
#self.dump_toc(toc)
#self.dump_manifest()
def convert(self, oeb, output_path, input_plugin, opts, log): def convert(self, oeb, output_path, input_plugin, opts, log):
self.log, self.opts, self.oeb = log, opts, oeb self.log, self.opts, self.oeb = log, opts, oeb

View File

@ -31,6 +31,7 @@ from calibre.ebooks.compression.palmdoc import compress_doc
INDEXING = True INDEXING = True
FCIS_FLIS = True FCIS_FLIS = True
WRITE_PBREAKS = True
# TODO: # TODO:
# - Optionally rasterize tables # - Optionally rasterize tables
@ -190,24 +191,20 @@ class Serializer(object):
if hrefs[path].media_type not in OEB_DOCS: if hrefs[path].media_type not in OEB_DOCS:
continue continue
if ref.type == 'other.start' : buffer.write('<reference type="')
# Kindle-specific 'Start Reading' directive if ref.type.startswith('other.') :
buffer.write('<reference title="Startup Page" ') self.serialize_text(ref.type.replace('other.',''), quot=True)
buffer.write('type="start" ') else :
self.serialize_href(ref.href)
# Space required or won't work, I kid you not
buffer.write(' />')
else:
buffer.write('<reference type="')
self.serialize_text(ref.type, quot=True) self.serialize_text(ref.type, quot=True)
buffer.write('" ')
if ref.title is not None:
buffer.write('title="')
self.serialize_text(ref.title, quot=True)
buffer.write('" ') buffer.write('" ')
if ref.title is not None: self.serialize_href(ref.href)
buffer.write('title="') # Space required or won't work, I kid you not
self.serialize_text(ref.title, quot=True) buffer.write(' />')
buffer.write('" ')
self.serialize_href(ref.href)
# Space required or won't work, I kid you not
buffer.write(' />')
buffer.write('</guide>') buffer.write('</guide>')
def serialize_href(self, href, base=None): def serialize_href(self, href, base=None):
@ -651,7 +648,9 @@ class MobiWriter(object):
# Commented out because structured docs don't count section changes in nodeCount # Commented out because structured docs don't count section changes in nodeCount
# self._HTMLRecords[thisRecord].currentSectionNodeCount += 1 # self._HTMLRecords[thisRecord].currentSectionNodeCount += 1
'''
# *** This should check currentSectionNumber, because content could start late # *** This should check currentSectionNumber, because content could start late
GR's tweaked code for b14
if thisRecord > 0: if thisRecord > 0:
# If next article falls into a later record, bump thisRecord # If next article falls into a later record, bump thisRecord
thisRecordPrime = thisRecord thisRecordPrime = thisRecord
@ -667,6 +666,19 @@ class MobiWriter(object):
continue continue
else : else :
continue continue
'''
# *** This should check currentSectionNumber, because content could start late
if thisRecord > 0:
sectionChangesInThisRecord = True
sectionChangesInRecordNumber = thisRecord
self._currentSectionIndex += 1
self._HTMLRecords[thisRecord].nextSectionNumber = self._currentSectionIndex
# The following node opens the nextSection
self._HTMLRecords[thisRecord].nextSectionOpeningNode = myIndex
continue
else :
continue
# If no one has taken the openingNode slot, it must be us # If no one has taken the openingNode slot, it must be us
# This could happen before detecting a section change # This could happen before detecting a section change
@ -1267,30 +1279,28 @@ class MobiWriter(object):
record.write(data) record.write(data)
# Marshall's utf-8 break code. # Marshall's utf-8 break code.
record.write(overlap) if WRITE_PBREAKS :
record.write(pack('>B', len(overlap))) record.write(overlap)
nextra = 0 record.write(pack('>B', len(overlap)))
pbreak = 0 nextra = 0
running = offset pbreak = 0
running = offset
while breaks and (breaks[0] - offset) < RECORD_SIZE: while breaks and (breaks[0] - offset) < RECORD_SIZE:
# .pop returns item, removes it from list # .pop returns item, removes it from list
pbreak = (breaks.pop(0) - running) >> 3 pbreak = (breaks.pop(0) - running) >> 3
if self.opts.verbose > 2 : if self.opts.verbose > 2 :
self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) ) self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) )
encoded = decint(pbreak, DECINT_FORWARD) encoded = decint(pbreak, DECINT_FORWARD)
record.write(encoded) record.write(encoded)
running += pbreak << 3 running += pbreak << 3
nextra += len(encoded) nextra += len(encoded)
lsize = 1
lsize = 1 while True:
while True: size = decint(nextra + lsize, DECINT_BACKWARD)
size = decint(nextra + lsize, DECINT_BACKWARD) if len(size) == lsize:
if len(size) == lsize: break
break lsize += 1
lsize += 1 record.write(size)
record.write(size)
# Write Trailing Byte Sequence # Write Trailing Byte Sequence
if INDEXING and self._indexable: if INDEXING and self._indexable:
@ -1370,8 +1380,13 @@ class MobiWriter(object):
metadata = self._oeb.metadata metadata = self._oeb.metadata
exth = self._build_exth() exth = self._build_exth()
last_content_record = len(self._records) - 1 last_content_record = len(self._records) - 1
'''
if INDEXING and self._indexable: if INDEXING and self._indexable:
self._generate_end_records() self._generate_end_records()
'''
self._generate_end_records()
record0 = StringIO() record0 = StringIO()
# The PalmDOC Header # The PalmDOC Header
record0.write(pack('>HHIHHHH', self._compression, 0, record0.write(pack('>HHIHHHH', self._compression, 0,
@ -1468,7 +1483,7 @@ class MobiWriter(object):
record0.write('\0\0\0\x01') record0.write('\0\0\0\x01')
# 0xb8 - 0xbb : FCIS record number # 0xb8 - 0xbb : FCIS record number
if FCIS_FLIS and self._indexable: if FCIS_FLIS :
# Write these if FCIS/FLIS turned on # Write these if FCIS/FLIS turned on
# 0xb8 - 0xbb : FCIS record number # 0xb8 - 0xbb : FCIS record number
record0.write(pack('>I', self._fcis_number)) record0.write(pack('>I', self._fcis_number))
@ -1501,16 +1516,25 @@ class MobiWriter(object):
record0.write(pack('>IIII', 0xffffffff, 0, 0xffffffff, 0xffffffff)) record0.write(pack('>IIII', 0xffffffff, 0, 0xffffffff, 0xffffffff))
# 0xe0 - 0xe3 : Extra record data # 0xe0 - 0xe3 : Extra record data
# The '5' is a bitmask of extra record data at the end: # Extra record data flags:
# - 0x1: <extra multibyte bytes><size> (?) # - 0x1: <extra multibyte bytes><size> (?)
# - 0x2: <TBS indexing description of this HTML record><size> GR # - 0x2: <TBS indexing description of this HTML record><size> GR
# - 0x4: <uncrossable breaks><size> # - 0x4: <uncrossable breaks><size>
# Of course, the formats aren't quite the same.
# GR: Use 7 for indexed files, 5 for unindexed # GR: Use 7 for indexed files, 5 for unindexed
# Setting bit 2 (0x4) disables <guide><reference type="start"> functionality
'''
if INDEXING and self._indexable : if INDEXING and self._indexable :
record0.write(pack('>I', 7)) record0.write(pack('>I', 7))
else: else:
record0.write(pack('>I', 5)) record0.write(pack('>I', 5))
'''
trailingDataFlags = 1
if self._indexable :
trailingDataFlags |= 2
if WRITE_PBREAKS :
trailingDataFlags |= 4
record0.write(pack('>I', trailingDataFlags))
# 0xe4 - 0xe7 : Primary index record # 0xe4 - 0xe7 : Primary index record
record0.write(pack('>I', 0xffffffff if self._primary_index_record is record0.write(pack('>I', 0xffffffff if self._primary_index_record is