MOBI Output: Strip inline nav sections from periodicals

2025-12-11 15:45:03 -05:00 · 2009-07-18 17:34:35 -06:00 · 2009-07-18 17:34:35 -06:00 · d1aabbaa1f
commit d1aabbaa1f
parent 50d6e86de8
2 changed files with 109 additions and 47 deletions
--- a/src/calibre/ebooks/mobi/output.py
+++ b/src/calibre/ebooks/mobi/output.py
@ -67,31 +67,62 @@ class MOBIOutput(OutputFormatPlugin):
            self.oeb.manifest.add(id, href, 'image/gif', data=raw)
            self.oeb.guide.add('masthead', 'Masthead Image', href)
    def dump_toc(self, toc) :
        self.log( "\n         >>> TOC contents <<<")
        self.log( "     toc.title: %s" % toc.title)
        self.log( "      toc.href: %s" % toc.href)
        for periodical in toc.nodes :
            self.log( "\tperiodical title: %s" % periodical.title)
            self.log( "\t            href: %s" % periodical.href)
            for section in periodical :
                self.log( "\t\tsection title: %s" % section.title)
                self.log( "\t\tfirst article: %s" % section.href)
                for article in section :
                    self.log( "\t\t\tarticle title: %s" % repr(article.title))
                    self.log( "\t\t\t         href: %s" % article.href)
    def dump_manifest(self) :
        self.log( "\n         >>> Manifest entries <<<")
        for href in self.oeb.manifest.hrefs :
            self.log ("\t%s" % href)
    def periodicalize_toc(self):
        from calibre.ebooks.oeb.base import TOC
        toc = self.oeb.toc
        if toc and toc[0].klass != 'periodical':
-            start_href = self.oeb.spine[0].href
+            one, two = self.oeb.spine[0], self.oeb.spine[1]
            self.log('Converting TOC for MOBI periodical indexing...')
            articles = {}
            if toc.depth() < 3:
                # single section periodical
                self.oeb.manifest.remove(one)
                self.oeb.manifest.remove(two)
                sections = [TOC(klass='section', title=_('All articles'),
-                    href=start_href)]
+                    href=self.oeb.spine[0].href)]
                for x in toc:
                    sections[0].nodes.append(x)
            else:
                # multi-section periodical
                sections = list(toc)
-                for x in sections:
+                for i,x in enumerate(sections):
                    x.klass = 'section'
                    articles = list(x)
                    if articles:
                        self.oeb.manifest.remove(self.oeb.manifest.hrefs[x.href])
                        x.href = articles[0].href
            for sec in sections:
                articles[id(sec)] = []
                for a in list(sec):
                    a.klass = 'article'
                    articles[id(sec)].append(a)
                    sec.nodes.remove(a)
-            root = TOC(klass='periodical', href=start_href,
+
            root = TOC(klass='periodical', href=self.oeb.spine[0].href,
                    title=unicode(self.oeb.metadata.title[0]))
            for s in sections:
                if articles[id(s)]:
                    for a in articles[id(s)]:
@ -103,6 +134,13 @@ class MOBIOutput(OutputFormatPlugin):
            toc.nodes.append(root)
            # Fix up the periodical href to point to first section href
            toc.nodes[0].href = toc.nodes[0].nodes[0].href
            # GR diagnostics
            #self.dump_toc(toc)
            #self.dump_manifest()
    def convert(self, oeb, output_path, input_plugin, opts, log):
        self.log, self.opts, self.oeb = log, opts, oeb
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -31,6 +31,7 @@ from calibre.ebooks.compression.palmdoc import compress_doc
 INDEXING = True
 FCIS_FLIS = True
 WRITE_PBREAKS = True
 # TODO:
 # - Optionally rasterize tables
@ -190,24 +191,20 @@ class Serializer(object):
            if hrefs[path].media_type not in OEB_DOCS:
                continue
-            if ref.type == 'other.start' :
+            buffer.write('<reference type="')
-                # Kindle-specific 'Start Reading' directive
+            if ref.type.startswith('other.') :
-                buffer.write('<reference title="Startup Page" ')
+                self.serialize_text(ref.type.replace('other.',''), quot=True)
-                buffer.write('type="start" ')
+            else :
                self.serialize_href(ref.href)
                # Space required or won't work, I kid you not
                buffer.write(' />')
            else:
                buffer.write('<reference type="')
                self.serialize_text(ref.type, quot=True)
            buffer.write('" ')
            if ref.title is not None:
                buffer.write('title="')
                self.serialize_text(ref.title, quot=True)
                buffer.write('" ')
-                if ref.title is not None:
+            self.serialize_href(ref.href)
-                    buffer.write('title="')
+            # Space required or won't work, I kid you not
-                    self.serialize_text(ref.title, quot=True)
+            buffer.write(' />')
-                    buffer.write('" ')
+
                self.serialize_href(ref.href)
                # Space required or won't work, I kid you not
                buffer.write(' />')
        buffer.write('</guide>')
    def serialize_href(self, href, base=None):
@ -651,7 +648,9 @@ class MobiWriter(object):
                # Commented out because structured docs don't count section changes in nodeCount
                # self._HTMLRecords[thisRecord].currentSectionNodeCount += 1
                '''
                # *** This should check currentSectionNumber, because content could start late
                GR's tweaked code for b14
                if thisRecord > 0:
                    # If next article falls into a later record, bump thisRecord
                    thisRecordPrime = thisRecord
@ -667,6 +666,19 @@ class MobiWriter(object):
                    continue
                else :
                    continue
                '''
                # *** This should check currentSectionNumber, because content could start late
                if thisRecord > 0:
                    sectionChangesInThisRecord = True
                    sectionChangesInRecordNumber = thisRecord
                    self._currentSectionIndex += 1
                    self._HTMLRecords[thisRecord].nextSectionNumber = self._currentSectionIndex
                    # The following node opens the nextSection
                    self._HTMLRecords[thisRecord].nextSectionOpeningNode = myIndex
                    continue
                else :
                    continue
            # If no one has taken the openingNode slot, it must be us
            # This could happen before detecting a section change
@ -1267,30 +1279,28 @@ class MobiWriter(object):
            record.write(data)
            # Marshall's utf-8 break code.
-            record.write(overlap)
+            if WRITE_PBREAKS :
-            record.write(pack('>B', len(overlap)))
+                record.write(overlap)
-            nextra = 0
+                record.write(pack('>B', len(overlap)))
-            pbreak = 0
+                nextra = 0
-            running = offset
+                pbreak = 0
-
+                running = offset
-            while breaks and (breaks[0] - offset) < RECORD_SIZE:
+                while breaks and (breaks[0] - offset) < RECORD_SIZE:
-                # .pop returns item, removes it from list
+                    # .pop returns item, removes it from list
-                pbreak = (breaks.pop(0) - running) >> 3
+                    pbreak = (breaks.pop(0) - running) >> 3
-                if self.opts.verbose > 2 :
+                    if self.opts.verbose > 2 :
-                    self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) )
+                        self._oeb.logger.info('pbreak = 0x%X at 0x%X' % (pbreak, record.tell()) )
-                encoded = decint(pbreak, DECINT_FORWARD)
+                    encoded = decint(pbreak, DECINT_FORWARD)
-                record.write(encoded)
+                    record.write(encoded)
-                running += pbreak << 3
+                    running += pbreak << 3
-                nextra += len(encoded)
+                    nextra += len(encoded)
-
+                lsize = 1
-            lsize = 1
+                while True:
-            while True:
+                    size = decint(nextra + lsize, DECINT_BACKWARD)
-                size = decint(nextra + lsize, DECINT_BACKWARD)
+                    if len(size) == lsize:
-                if len(size) == lsize:
+                        break
-                    break
+                    lsize += 1
-                lsize += 1
+                record.write(size)
            record.write(size)
            # Write Trailing Byte Sequence
            if INDEXING and self._indexable:
@ -1370,8 +1380,13 @@ class MobiWriter(object):
        metadata = self._oeb.metadata
        exth = self._build_exth()
        last_content_record = len(self._records) - 1
        '''
        if INDEXING and self._indexable:
            self._generate_end_records()
        '''
        self._generate_end_records()
        record0 = StringIO()
        # The PalmDOC Header
        record0.write(pack('>HHIHHHH', self._compression, 0,
@ -1468,7 +1483,7 @@ class MobiWriter(object):
        record0.write('\0\0\0\x01')
        # 0xb8 - 0xbb : FCIS record number
-        if FCIS_FLIS and self._indexable:
+        if FCIS_FLIS :
            # Write these if FCIS/FLIS turned on
            # 0xb8 - 0xbb : FCIS record number
            record0.write(pack('>I', self._fcis_number))
@ -1501,16 +1516,25 @@ class MobiWriter(object):
        record0.write(pack('>IIII', 0xffffffff, 0, 0xffffffff, 0xffffffff))
        # 0xe0 - 0xe3 : Extra record data
-        # The '5' is a bitmask of extra record data at the end:
+        # Extra record data flags:
        #   - 0x1: <extra multibyte bytes><size> (?)
        #   - 0x2: <TBS indexing description of this HTML record><size> GR
        #   - 0x4: <uncrossable breaks><size>
        # Of course, the formats aren't quite the same.
        # GR: Use 7 for indexed files, 5 for unindexed
        # Setting bit 2 (0x4) disables <guide><reference type="start"> functionality
        '''
        if INDEXING and self._indexable :
            record0.write(pack('>I', 7))
        else:
            record0.write(pack('>I', 5))
        '''
        trailingDataFlags = 1
        if self._indexable :
            trailingDataFlags |= 2
        if WRITE_PBREAKS :
            trailingDataFlags |= 4
        record0.write(pack('>I', trailingDataFlags))
        # 0xe4 - 0xe7 : Primary index record
        record0.write(pack('>I', 0xffffffff if self._primary_index_record is