From 19b42db52b5ffebdd2107772859742c31742c610 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 18 Aug 2011 17:52:37 -0600
Subject: [PATCH] New mobi output: Periodicals now work completely on the
 Kindle

---
 src/calibre/ebooks/mobi/writer2/indexer.py    | 24 +++++++-
 src/calibre/ebooks/mobi/writer2/main.py       |  1 +
 src/calibre/ebooks/mobi/writer2/serializer.py | 55 ++++++++++++++++++-
 3 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py
index 6b7939dc30..9ba532eb3c 100644
--- a/src/calibre/ebooks/mobi/writer2/indexer.py
+++ b/src/calibre/ebooks/mobi/writer2/indexer.py
@@ -305,9 +305,7 @@ class TBS(object): # {{{
     def __init__(self, data, is_periodical, first=False, section_map={},
             after_first=False):
         self.section_map = section_map
-        #import pprint
-        #pprint.pprint(data)
-        #print()
+
         if is_periodical:
             # The starting bytes.
             # The value is zero which I think indicates the periodical
@@ -420,6 +418,8 @@ class TBS(object): # {{{
                 first_article = articles[0]
                 last_article = articles[-1]
                 num = len(articles)
+                last_article_ends = (last_article in data['ends'] or
+                        last_article in data['completes'])
 
                 try:
                     next_sec = sections[i+1]
@@ -440,6 +440,19 @@ class TBS(object): # {{{
                 if next_sec is not None:
                     buf.write(encode_tbs(last_article.index-next_sec.index,
                         {0b1000: 0}))
+
+
+                # If a section TOC starts and extends into the next record add
+                # a trailing vwi. We detect this by TBS type==3, processing last
+                # section present in the record, and the last article in that
+                # section either ends or completes and doesn't finish
+                # on the last byte of the record.
+                elif (typ == self.type_011 and last_article_ends and
+                      ((last_article.offset+last_article.size) % RECORD_SIZE > 0)
+                     ):
+                    buf.write(encode_tbs(last_article.index-section.index-1,
+                        {0b1000: 0}))
+
         else:
             buf.write(encode_tbs(spanner.index - parent_section_index,
                 {0b0001: 0}))
@@ -518,6 +531,7 @@ class Indexer(object): # {{{
         for i in indices:
             offsets.append(buf.tell())
             buf.write(i.bytestring)
+
         index_block = align_block(buf.getvalue())
 
         # Write offsets to index entries as an IDXT block
@@ -772,9 +786,11 @@ class Indexer(object): # {{{
                 continue
             if offset in seen_sec_offsets:
                 continue
+
             seen_sec_offsets.add(offset)
             section = PeriodicalIndexEntry(offset, label, klass, 1)
             section.parent_index = 0
+
             for art in sec:
                 try:
                     offset = id_offsets[art.href]
@@ -830,6 +846,7 @@ class Indexer(object): # {{{
             for art in articles:
                 i += 1
                 art.index = i
+
                 art.parent_index = sec.index
 
         for sec, normalized_articles in normalized_sections:
@@ -905,6 +922,7 @@ class Indexer(object): # {{{
                     'spans':None, 'offset':offset, 'record_number':i+1}
 
             for index in self.indices:
+
                 if index.offset >= next_offset:
                     # Node starts after current record
                     if index.depth == deepest:
diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py
index ef6fc3485e..00a333ce9d 100644
--- a/src/calibre/ebooks/mobi/writer2/main.py
+++ b/src/calibre/ebooks/mobi/writer2/main.py
@@ -197,6 +197,7 @@ class MobiWriter(object):
     def generate_text(self):
         self.oeb.logger.info('Serializing markup content...')
         self.serializer = Serializer(self.oeb, self.image_map,
+                self.is_periodical,
                 write_page_breaks_after_item=self.write_page_breaks_after_item)
         text = self.serializer()
         self.text_length = len(text)
diff --git a/src/calibre/ebooks/mobi/writer2/serializer.py b/src/calibre/ebooks/mobi/writer2/serializer.py
index 06aacc1b09..fe5dee3258 100644
--- a/src/calibre/ebooks/mobi/writer2/serializer.py
+++ b/src/calibre/ebooks/mobi/writer2/serializer.py
@@ -7,6 +7,8 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
+import re
+
 from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS,
         namespace, prefixname, urlnormalize)
 from calibre.ebooks.mobi.mobiml import MBP_NS
@@ -19,7 +21,7 @@ from cStringIO import StringIO
 class Serializer(object):
     NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
 
-    def __init__(self, oeb, images, write_page_breaks_after_item=True):
+    def __init__(self, oeb, images, is_periodical, write_page_breaks_after_item=True):
         '''
         Write all the HTML markup in oeb into a single in memory buffer
         containing a single html document with links replaced by offsets into
@@ -37,6 +39,7 @@ class Serializer(object):
         self.oeb = oeb
         self.images = images
         self.logger = oeb.logger
+        self.is_periodical = is_periodical
         self.write_page_breaks_after_item = write_page_breaks_after_item
 
         # If not None, this is a number pointing to the location at which to
@@ -187,13 +190,63 @@ class Serializer(object):
         moved to the end.
         '''
         buf = self.buf
+
+        def serialize_toc_level(tocref, href=None):
+            # add the provided toc level to the output stream
+            # if href is provided add a link ref to the toc level output (e.g. feed_0/index.html)
+            if href is not None:
+                # resolve the section url in id_offsets
+                buf.write('<mbp:pagebreak/>')
+                self.id_offsets[urlnormalize(href)] = buf.tell()
+
+            if tocref.klass == "periodical":
+                buf.write('<div> <div height="1em"></div>')
+            else:
+                buf.write('<div></div> <div> <h2 height="1em"><font size="+2"><b>'+tocref.title+'</b></font></h2> <div height="1em"></div>')
+
+            buf.write('<ul>')
+
+            for tocitem in tocref.nodes:
+                buf.write('<li><a filepos=')
+                itemhref = tocitem.href
+                if tocref.klass == 'periodical':
+                    # This is a section node.
+                    # For periodical toca, the section urls are like r'feed_\d+/index.html'
+                    # We dont want to point to the start of the first article
+                    # so we change the href.
+                    itemhref = re.sub(r'article_\d+/', '', itemhref)
+                self.href_offsets[itemhref].append(buf.tell())
+                buf.write('0000000000')
+                buf.write(' ><font size="+1" color="blue"><b><u>')
+                buf.write(tocitem.title)
+                buf.write('</u></b></font></a></li>')
+
+            buf.write('</ul><div height="1em"></div></div>')
+
         self.anchor_offset = buf.tell()
         buf.write(b'<body>')
         self.body_start_offset = buf.tell()
+
+        if self.is_periodical:
+            top_toc = self.oeb.toc.nodes[0]
+            serialize_toc_level(top_toc)
+
         spine = [item for item in self.oeb.spine if item.linear]
         spine.extend([item for item in self.oeb.spine if not item.linear])
+
         for item in spine:
+
+            if self.is_periodical and item.is_section_start:
+                for section_toc in top_toc.nodes:
+                    if urlnormalize(item.href) == section_toc.href:
+                        # create section url of the form r'feed_\d+/index.html'
+                        section_url = re.sub(r'article_\d+/', '', section_toc.href)
+                        serialize_toc_level(section_toc, section_url)
+                        section_toc.href = section_url
+                        break
+
             self.serialize_item(item)
+
         self.body_end_offset = buf.tell()
         buf.write(b'</body>')