New mobi output: Periodicals now work completely on the Kindle

2025-07-09 03:04:10 -04:00 · 2011-08-18 17:52:37 -06:00 · 2011-08-18 17:52:37 -06:00 · 19b42db52b
commit 19b42db52b
parent 7e6a8c9093
3 changed files with 76 additions and 4 deletions
--- a/src/calibre/ebooks/mobi/writer2/indexer.py
+++ b/src/calibre/ebooks/mobi/writer2/indexer.py
@ -305,9 +305,7 @@ class TBS(object): # {{{
    def __init__(self, data, is_periodical, first=False, section_map={},
            after_first=False):
        self.section_map = section_map
-        #import pprint
+
        #pprint.pprint(data)
        #print()
        if is_periodical:
            # The starting bytes.
            # The value is zero which I think indicates the periodical
@ -420,6 +418,8 @@ class TBS(object): # {{{
                first_article = articles[0]
                last_article = articles[-1]
                num = len(articles)
                last_article_ends = (last_article in data['ends'] or
                        last_article in data['completes'])
                try:
                    next_sec = sections[i+1]
@ -440,6 +440,19 @@ class TBS(object): # {{{
                if next_sec is not None:
                    buf.write(encode_tbs(last_article.index-next_sec.index,
                        {0b1000: 0}))
                # If a section TOC starts and extends into the next record add
                # a trailing vwi. We detect this by TBS type==3, processing last
                # section present in the record, and the last article in that
                # section either ends or completes and doesn't finish
                # on the last byte of the record.
                elif (typ == self.type_011 and last_article_ends and
                      ((last_article.offset+last_article.size) % RECORD_SIZE > 0)
                     ):
                    buf.write(encode_tbs(last_article.index-section.index-1,
                        {0b1000: 0}))
        else:
            buf.write(encode_tbs(spanner.index - parent_section_index,
                {0b0001: 0}))
@ -518,6 +531,7 @@ class Indexer(object): # {{{
        for i in indices:
            offsets.append(buf.tell())
            buf.write(i.bytestring)
        index_block = align_block(buf.getvalue())
        # Write offsets to index entries as an IDXT block
@ -772,9 +786,11 @@ class Indexer(object): # {{{
                continue
            if offset in seen_sec_offsets:
                continue
            seen_sec_offsets.add(offset)
            section = PeriodicalIndexEntry(offset, label, klass, 1)
            section.parent_index = 0
            for art in sec:
                try:
                    offset = id_offsets[art.href]
@ -830,6 +846,7 @@ class Indexer(object): # {{{
            for art in articles:
                i += 1
                art.index = i
                art.parent_index = sec.index
        for sec, normalized_articles in normalized_sections:
@ -905,6 +922,7 @@ class Indexer(object): # {{{
                    'spans':None, 'offset':offset, 'record_number':i+1}
            for index in self.indices:
                if index.offset >= next_offset:
                    # Node starts after current record
                    if index.depth == deepest:
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@ -197,6 +197,7 @@ class MobiWriter(object):
    def generate_text(self):
        self.oeb.logger.info('Serializing markup content...')
        self.serializer = Serializer(self.oeb, self.image_map,
                self.is_periodical,
                write_page_breaks_after_item=self.write_page_breaks_after_item)
        text = self.serializer()
        self.text_length = len(text)
--- a/src/calibre/ebooks/mobi/writer2/serializer.py
+++ b/src/calibre/ebooks/mobi/writer2/serializer.py
@ -7,6 +7,8 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import re
 from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS,
        namespace, prefixname, urlnormalize)
 from calibre.ebooks.mobi.mobiml import MBP_NS
@ -19,7 +21,7 @@ from cStringIO import StringIO
 class Serializer(object):
    NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
-    def __init__(self, oeb, images, write_page_breaks_after_item=True):
+    def __init__(self, oeb, images, is_periodical, write_page_breaks_after_item=True):
        '''
        Write all the HTML markup in oeb into a single in memory buffer
        containing a single html document with links replaced by offsets into
@ -37,6 +39,7 @@ class Serializer(object):
        self.oeb = oeb
        self.images = images
        self.logger = oeb.logger
        self.is_periodical = is_periodical
        self.write_page_breaks_after_item = write_page_breaks_after_item
        # If not None, this is a number pointing to the location at which to
@ -187,13 +190,63 @@ class Serializer(object):
        moved to the end.
        '''
        buf = self.buf
        def serialize_toc_level(tocref, href=None):
            # add the provided toc level to the output stream
            # if href is provided add a link ref to the toc level output (e.g. feed_0/index.html)
            if href is not None:
                # resolve the section url in id_offsets
                buf.write('<mbp:pagebreak/>')
                self.id_offsets[urlnormalize(href)] = buf.tell()
            if tocref.klass == "periodical":
                buf.write('<div> <div height="1em"></div>')
            else:
                buf.write('<div></div> <div> <h2 height="1em"><font size="+2"><b>'+tocref.title+'</b></font></h2> <div height="1em"></div>')
            buf.write('<ul>')
            for tocitem in tocref.nodes:
                buf.write('<li><a filepos=')
                itemhref = tocitem.href
                if tocref.klass == 'periodical':
                    # This is a section node.
                    # For periodical toca, the section urls are like r'feed_\d+/index.html'
                    # We dont want to point to the start of the first article
                    # so we change the href.
                    itemhref = re.sub(r'article_\d+/', '', itemhref)
                self.href_offsets[itemhref].append(buf.tell())
                buf.write('0000000000')
                buf.write(' ><font size="+1" color="blue"><b><u>')
                buf.write(tocitem.title)
                buf.write('</u></b></font></a></li>')
            buf.write('</ul><div height="1em"></div></div>')
        self.anchor_offset = buf.tell()
        buf.write(b'<body>')
        self.body_start_offset = buf.tell()
        if self.is_periodical:
            top_toc = self.oeb.toc.nodes[0]
            serialize_toc_level(top_toc)
        spine = [item for item in self.oeb.spine if item.linear]
        spine.extend([item for item in self.oeb.spine if not item.linear])
        for item in spine:
            if self.is_periodical and item.is_section_start:
                for section_toc in top_toc.nodes:
                    if urlnormalize(item.href) == section_toc.href:
                        # create section url of the form r'feed_\d+/index.html'
                        section_url = re.sub(r'article_\d+/', '', section_toc.href)
                        serialize_toc_level(section_toc, section_url)
                        section_toc.href = section_url
                        break
            self.serialize_item(item)
        self.body_end_offset = buf.tell()
        buf.write(b'</body>')