From d1aabbaa1f3e5de06a7d32a057c4dc858b2ec8f8 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 18 Jul 2009 17:34:35 -0600 Subject: [PATCH] MOBI Output: Strip inline nav sections from periodicals --- src/calibre/ebooks/mobi/output.py | 46 +++++++++++-- src/calibre/ebooks/mobi/writer.py | 110 ++++++++++++++++++------------ 2 files changed, 109 insertions(+), 47 deletions(-) diff --git a/src/calibre/ebooks/mobi/output.py b/src/calibre/ebooks/mobi/output.py index 5f1aa37e19..9da435539c 100644 --- a/src/calibre/ebooks/mobi/output.py +++ b/src/calibre/ebooks/mobi/output.py @@ -67,31 +67,62 @@ class MOBIOutput(OutputFormatPlugin): self.oeb.manifest.add(id, href, 'image/gif', data=raw) self.oeb.guide.add('masthead', 'Masthead Image', href) + def dump_toc(self, toc) : + self.log( "\n >>> TOC contents <<<") + self.log( " toc.title: %s" % toc.title) + self.log( " toc.href: %s" % toc.href) + for periodical in toc.nodes : + self.log( "\tperiodical title: %s" % periodical.title) + self.log( "\t href: %s" % periodical.href) + for section in periodical : + self.log( "\t\tsection title: %s" % section.title) + self.log( "\t\tfirst article: %s" % section.href) + for article in section : + self.log( "\t\t\tarticle title: %s" % repr(article.title)) + self.log( "\t\t\t href: %s" % article.href) + + def dump_manifest(self) : + self.log( "\n >>> Manifest entries <<<") + for href in self.oeb.manifest.hrefs : + self.log ("\t%s" % href) def periodicalize_toc(self): from calibre.ebooks.oeb.base import TOC toc = self.oeb.toc if toc and toc[0].klass != 'periodical': - start_href = self.oeb.spine[0].href + one, two = self.oeb.spine[0], self.oeb.spine[1] self.log('Converting TOC for MOBI periodical indexing...') + articles = {} if toc.depth() < 3: + # single section periodical + self.oeb.manifest.remove(one) + self.oeb.manifest.remove(two) sections = [TOC(klass='section', title=_('All articles'), - href=start_href)] + href=self.oeb.spine[0].href)] for x in toc: sections[0].nodes.append(x) else: + # multi-section periodical sections = list(toc) - for x in sections: + for i,x in enumerate(sections): x.klass = 'section' + articles = list(x) + if articles: + self.oeb.manifest.remove(self.oeb.manifest.hrefs[x.href]) + x.href = articles[0].href + + for sec in sections: articles[id(sec)] = [] for a in list(sec): a.klass = 'article' articles[id(sec)].append(a) sec.nodes.remove(a) - root = TOC(klass='periodical', href=start_href, + + root = TOC(klass='periodical', href=self.oeb.spine[0].href, title=unicode(self.oeb.metadata.title[0])) + for s in sections: if articles[id(s)]: for a in articles[id(s)]: @@ -103,6 +134,13 @@ class MOBIOutput(OutputFormatPlugin): toc.nodes.append(root) + # Fix up the periodical href to point to first section href + toc.nodes[0].href = toc.nodes[0].nodes[0].href + + # GR diagnostics + #self.dump_toc(toc) + #self.dump_manifest() + def convert(self, oeb, output_path, input_plugin, opts, log): self.log, self.opts, self.oeb = log, opts, oeb diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 1a58628764..0e32c471bf 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -31,6 +31,7 @@ from calibre.ebooks.compression.palmdoc import compress_doc INDEXING = True FCIS_FLIS = True +WRITE_PBREAKS = True # TODO: # - Optionally rasterize tables @@ -190,24 +191,20 @@ class Serializer(object): if hrefs[path].media_type not in OEB_DOCS: continue - if ref.type == 'other.start' : - # Kindle-specific 'Start Reading' directive - buffer.write('') - else: - buffer.write('') + self.serialize_href(ref.href) + # Space required or won't work, I kid you not + buffer.write(' />') + buffer.write('') def serialize_href(self, href, base=None): @@ -651,7 +648,9 @@ class MobiWriter(object): # Commented out because structured docs don't count section changes in nodeCount # self._HTMLRecords[thisRecord].currentSectionNodeCount += 1 + ''' # *** This should check currentSectionNumber, because content could start late + GR's tweaked code for b14 if thisRecord > 0: # If next article falls into a later record, bump thisRecord thisRecordPrime = thisRecord @@ -667,6 +666,19 @@ class MobiWriter(object): continue else : continue + ''' + # *** This should check currentSectionNumber, because content could start late + if thisRecord > 0: + sectionChangesInThisRecord = True + sectionChangesInRecordNumber = thisRecord + self._currentSectionIndex += 1 + self._HTMLRecords[thisRecord].nextSectionNumber = self._currentSectionIndex + # The following node opens the nextSection + self._HTMLRecords[thisRecord].nextSectionOpeningNode = myIndex + continue + else : + continue + # If no one has taken the openingNode slot, it must be us # This could happen before detecting a section change @@ -1267,30 +1279,28 @@ class MobiWriter(object): record.write(data) # Marshall's utf-8 break code. - record.write(overlap) - record.write(pack('>B', len(overlap))) - nextra = 0 - pbreak = 0 - running = offset - - while breaks and (breaks[0] - offset) < RECORD_SIZE: - # .pop returns item, removes it from list - pbreak = (breaks.pop(0) - running) >> 3 - if self.opts.verbose > 2 : - self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) ) - encoded = decint(pbreak, DECINT_FORWARD) - record.write(encoded) - running += pbreak << 3 - nextra += len(encoded) - - lsize = 1 - while True: - size = decint(nextra + lsize, DECINT_BACKWARD) - if len(size) == lsize: - break - lsize += 1 - - record.write(size) + if WRITE_PBREAKS : + record.write(overlap) + record.write(pack('>B', len(overlap))) + nextra = 0 + pbreak = 0 + running = offset + while breaks and (breaks[0] - offset) < RECORD_SIZE: + # .pop returns item, removes it from list + pbreak = (breaks.pop(0) - running) >> 3 + if self.opts.verbose > 2 : + self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) ) + encoded = decint(pbreak, DECINT_FORWARD) + record.write(encoded) + running += pbreak << 3 + nextra += len(encoded) + lsize = 1 + while True: + size = decint(nextra + lsize, DECINT_BACKWARD) + if len(size) == lsize: + break + lsize += 1 + record.write(size) # Write Trailing Byte Sequence if INDEXING and self._indexable: @@ -1370,8 +1380,13 @@ class MobiWriter(object): metadata = self._oeb.metadata exth = self._build_exth() last_content_record = len(self._records) - 1 + + ''' if INDEXING and self._indexable: self._generate_end_records() + ''' + self._generate_end_records() + record0 = StringIO() # The PalmDOC Header record0.write(pack('>HHIHHHH', self._compression, 0, @@ -1468,7 +1483,7 @@ class MobiWriter(object): record0.write('\0\0\0\x01') # 0xb8 - 0xbb : FCIS record number - if FCIS_FLIS and self._indexable: + if FCIS_FLIS : # Write these if FCIS/FLIS turned on # 0xb8 - 0xbb : FCIS record number record0.write(pack('>I', self._fcis_number)) @@ -1501,16 +1516,25 @@ class MobiWriter(object): record0.write(pack('>IIII', 0xffffffff, 0, 0xffffffff, 0xffffffff)) # 0xe0 - 0xe3 : Extra record data - # The '5' is a bitmask of extra record data at the end: + # Extra record data flags: # - 0x1: (?) # - 0x2: GR # - 0x4: - # Of course, the formats aren't quite the same. # GR: Use 7 for indexed files, 5 for unindexed + # Setting bit 2 (0x4) disables functionality + ''' if INDEXING and self._indexable : record0.write(pack('>I', 7)) else: record0.write(pack('>I', 5)) + ''' + + trailingDataFlags = 1 + if self._indexable : + trailingDataFlags |= 2 + if WRITE_PBREAKS : + trailingDataFlags |= 4 + record0.write(pack('>I', trailingDataFlags)) # 0xe4 - 0xe7 : Primary index record record0.write(pack('>I', 0xffffffff if self._primary_index_record is