MOBI Output: Strip inline nav sections from periodicals

2025-12-10 15:15:03 -05:00 · 2009-07-18 17:34:35 -06:00 · 2009-07-18 17:34:35 -06:00 · d1aabbaa1f
commit d1aabbaa1f
parent 50d6e86de8
2 changed files with 109 additions and 47 deletions
--- a/src/calibre/ebooks/mobi/output.py
+++ b/src/calibre/ebooks/mobi/output.py
@ -67,31 +67,62 @@ class MOBIOutput(OutputFormatPlugin):
            self.oeb.manifest.add(id, href, 'image/gif', data=raw)
            self.oeb.guide.add('masthead', 'Masthead Image', href)

+    def dump_toc(self, toc) :
+        self.log( "\n         >>> TOC contents <<<")
+        self.log( "     toc.title: %s" % toc.title)
+        self.log( "      toc.href: %s" % toc.href)
+        for periodical in toc.nodes :
+            self.log( "\tperiodical title: %s" % periodical.title)
+            self.log( "\t            href: %s" % periodical.href)
+            for section in periodical :
+                self.log( "\t\tsection title: %s" % section.title)
+                self.log( "\t\tfirst article: %s" % section.href)
+                for article in section :
+                    self.log( "\t\t\tarticle title: %s" % repr(article.title))
+                    self.log( "\t\t\t         href: %s" % article.href)
+
+    def dump_manifest(self) :
+        self.log( "\n         >>> Manifest entries <<<")
+        for href in self.oeb.manifest.hrefs :
+            self.log ("\t%s" % href)

    def periodicalize_toc(self):
        from calibre.ebooks.oeb.base import TOC
        toc = self.oeb.toc
        if toc and toc[0].klass != 'periodical':
-            start_href = self.oeb.spine[0].href
+            one, two = self.oeb.spine[0], self.oeb.spine[1]
            self.log('Converting TOC for MOBI periodical indexing...')
+
            articles = {}
            if toc.depth() < 3:
+                # single section periodical
+                self.oeb.manifest.remove(one)
+                self.oeb.manifest.remove(two)
                sections = [TOC(klass='section', title=_('All articles'),
-                    href=start_href)]
+                    href=self.oeb.spine[0].href)]
                for x in toc:
                    sections[0].nodes.append(x)
            else:
+                # multi-section periodical
                sections = list(toc)
-                for x in sections:
+                for i,x in enumerate(sections):
                    x.klass = 'section'
+                    articles = list(x)
+                    if articles:
+                        self.oeb.manifest.remove(self.oeb.manifest.hrefs[x.href])
+                        x.href = articles[0].href
+
+
            for sec in sections:
                articles[id(sec)] = []
                for a in list(sec):
                    a.klass = 'article'
                    articles[id(sec)].append(a)
                    sec.nodes.remove(a)
-            root = TOC(klass='periodical', href=start_href,
+
+            root = TOC(klass='periodical', href=self.oeb.spine[0].href,
                    title=unicode(self.oeb.metadata.title[0]))
+
            for s in sections:
                if articles[id(s)]:
                    for a in articles[id(s)]:
@ -103,6 +134,13 @@ class MOBIOutput(OutputFormatPlugin):

            toc.nodes.append(root)

+            # Fix up the periodical href to point to first section href
+            toc.nodes[0].href = toc.nodes[0].nodes[0].href
+
+            # GR diagnostics
+            #self.dump_toc(toc)
+            #self.dump_manifest()
+

    def convert(self, oeb, output_path, input_plugin, opts, log):
        self.log, self.opts, self.oeb = log, opts, oeb
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -31,6 +31,7 @@ from calibre.ebooks.compression.palmdoc import compress_doc

 INDEXING = True
 FCIS_FLIS = True
+WRITE_PBREAKS = True

 # TODO:
 # - Optionally rasterize tables
@ -190,24 +191,20 @@ class Serializer(object):
            if hrefs[path].media_type not in OEB_DOCS:
                continue

-            if ref.type == 'other.start' :
-                # Kindle-specific 'Start Reading' directive
-                buffer.write('<reference title="Startup Page" ')
-                buffer.write('type="start" ')
-                self.serialize_href(ref.href)
-                # Space required or won't work, I kid you not
-                buffer.write(' />')
-            else:
-                buffer.write('<reference type="')
+            buffer.write('<reference type="')
+            if ref.type.startswith('other.') :
+                self.serialize_text(ref.type.replace('other.',''), quot=True)
+            else :
                self.serialize_text(ref.type, quot=True)
+            buffer.write('" ')
+            if ref.title is not None:
+                buffer.write('title="')
+                self.serialize_text(ref.title, quot=True)
                buffer.write('" ')
-                if ref.title is not None:
-                    buffer.write('title="')
-                    self.serialize_text(ref.title, quot=True)
-                    buffer.write('" ')
-                self.serialize_href(ref.href)
-                # Space required or won't work, I kid you not
-                buffer.write(' />')
+            self.serialize_href(ref.href)
+            # Space required or won't work, I kid you not
+            buffer.write(' />')
+
        buffer.write('</guide>')

    def serialize_href(self, href, base=None):
@ -651,7 +648,9 @@ class MobiWriter(object):
                # Commented out because structured docs don't count section changes in nodeCount
                # self._HTMLRecords[thisRecord].currentSectionNodeCount += 1

+                '''
                # *** This should check currentSectionNumber, because content could start late
+                GR's tweaked code for b14
                if thisRecord > 0:
                    # If next article falls into a later record, bump thisRecord
                    thisRecordPrime = thisRecord
@ -667,6 +666,19 @@ class MobiWriter(object):
                    continue
                else :
                    continue
+                '''
+                # *** This should check currentSectionNumber, because content could start late
+                if thisRecord > 0:
+                    sectionChangesInThisRecord = True
+                    sectionChangesInRecordNumber = thisRecord
+                    self._currentSectionIndex += 1
+                    self._HTMLRecords[thisRecord].nextSectionNumber = self._currentSectionIndex
+                    # The following node opens the nextSection
+                    self._HTMLRecords[thisRecord].nextSectionOpeningNode = myIndex
+                    continue
+                else :
+                    continue
+

            # If no one has taken the openingNode slot, it must be us
            # This could happen before detecting a section change
@ -1267,30 +1279,28 @@ class MobiWriter(object):
            record.write(data)

            # Marshall's utf-8 break code.
-            record.write(overlap)
-            record.write(pack('>B', len(overlap)))
-            nextra = 0
-            pbreak = 0
-            running = offset
-
-            while breaks and (breaks[0] - offset) < RECORD_SIZE:
-                # .pop returns item, removes it from list
-                pbreak = (breaks.pop(0) - running) >> 3
-                if self.opts.verbose > 2 :
-                    self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) )
-                encoded = decint(pbreak, DECINT_FORWARD)
-                record.write(encoded)
-                running += pbreak << 3
-                nextra += len(encoded)
-
-            lsize = 1
-            while True:
-                size = decint(nextra + lsize, DECINT_BACKWARD)
-                if len(size) == lsize:
-                    break
-                lsize += 1
-
-            record.write(size)
+            if WRITE_PBREAKS :
+                record.write(overlap)
+                record.write(pack('>B', len(overlap)))
+                nextra = 0
+                pbreak = 0
+                running = offset
+                while breaks and (breaks[0] - offset) < RECORD_SIZE:
+                    # .pop returns item, removes it from list
+                    pbreak = (breaks.pop(0) - running) >> 3
+                    if self.opts.verbose > 2 :
+                        self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) )
+                    encoded = decint(pbreak, DECINT_FORWARD)
+                    record.write(encoded)
+                    running += pbreak << 3
+                    nextra += len(encoded)
+                lsize = 1
+                while True:
+                    size = decint(nextra + lsize, DECINT_BACKWARD)
+                    if len(size) == lsize:
+                        break
+                    lsize += 1
+                record.write(size)

            # Write Trailing Byte Sequence
            if INDEXING and self._indexable:
@ -1370,8 +1380,13 @@ class MobiWriter(object):
        metadata = self._oeb.metadata
        exth = self._build_exth()
        last_content_record = len(self._records) - 1
+
+        '''
        if INDEXING and self._indexable:
            self._generate_end_records()
+        '''
+        self._generate_end_records()
+
        record0 = StringIO()
        # The PalmDOC Header
        record0.write(pack('>HHIHHHH', self._compression, 0,
@ -1468,7 +1483,7 @@ class MobiWriter(object):
        record0.write('\0\0\0\x01')

        # 0xb8 - 0xbb : FCIS record number
-        if FCIS_FLIS and self._indexable:
+        if FCIS_FLIS :
            # Write these if FCIS/FLIS turned on
            # 0xb8 - 0xbb : FCIS record number
            record0.write(pack('>I', self._fcis_number))
@ -1501,16 +1516,25 @@ class MobiWriter(object):
        record0.write(pack('>IIII', 0xffffffff, 0, 0xffffffff, 0xffffffff))

        # 0xe0 - 0xe3 : Extra record data
-        # The '5' is a bitmask of extra record data at the end:
+        # Extra record data flags:
        #   - 0x1: <extra multibyte bytes><size> (?)
        #   - 0x2: <TBS indexing description of this HTML record><size> GR
        #   - 0x4: <uncrossable breaks><size>
-        # Of course, the formats aren't quite the same.
        # GR: Use 7 for indexed files, 5 for unindexed
+        # Setting bit 2 (0x4) disables <guide><reference type="start"> functionality
+        '''
        if INDEXING and self._indexable :
            record0.write(pack('>I', 7))
        else:
            record0.write(pack('>I', 5))
+        '''
+
+        trailingDataFlags = 1
+        if self._indexable :
+            trailingDataFlags |= 2
+        if WRITE_PBREAKS :
+            trailingDataFlags |= 4
+        record0.write(pack('>I', trailingDataFlags))

        # 0xe4 - 0xe7 : Primary index record
        record0.write(pack('>I', 0xffffffff if self._primary_index_record is