From 19b42db52b5ffebdd2107772859742c31742c610 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 18 Aug 2011 17:52:37 -0600 Subject: [PATCH] New mobi output: Periodicals now work completely on the Kindle --- src/calibre/ebooks/mobi/writer2/indexer.py | 24 +++++++- src/calibre/ebooks/mobi/writer2/main.py | 1 + src/calibre/ebooks/mobi/writer2/serializer.py | 55 ++++++++++++++++++- 3 files changed, 76 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index 6b7939dc30..9ba532eb3c 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -305,9 +305,7 @@ class TBS(object): # {{{ def __init__(self, data, is_periodical, first=False, section_map={}, after_first=False): self.section_map = section_map - #import pprint - #pprint.pprint(data) - #print() + if is_periodical: # The starting bytes. # The value is zero which I think indicates the periodical @@ -420,6 +418,8 @@ class TBS(object): # {{{ first_article = articles[0] last_article = articles[-1] num = len(articles) + last_article_ends = (last_article in data['ends'] or + last_article in data['completes']) try: next_sec = sections[i+1] @@ -440,6 +440,19 @@ class TBS(object): # {{{ if next_sec is not None: buf.write(encode_tbs(last_article.index-next_sec.index, {0b1000: 0})) + + + # If a section TOC starts and extends into the next record add + # a trailing vwi. We detect this by TBS type==3, processing last + # section present in the record, and the last article in that + # section either ends or completes and doesn't finish + # on the last byte of the record. + elif (typ == self.type_011 and last_article_ends and + ((last_article.offset+last_article.size) % RECORD_SIZE > 0) + ): + buf.write(encode_tbs(last_article.index-section.index-1, + {0b1000: 0})) + else: buf.write(encode_tbs(spanner.index - parent_section_index, {0b0001: 0})) @@ -518,6 +531,7 @@ class Indexer(object): # {{{ for i in indices: offsets.append(buf.tell()) buf.write(i.bytestring) + index_block = align_block(buf.getvalue()) # Write offsets to index entries as an IDXT block @@ -772,9 +786,11 @@ class Indexer(object): # {{{ continue if offset in seen_sec_offsets: continue + seen_sec_offsets.add(offset) section = PeriodicalIndexEntry(offset, label, klass, 1) section.parent_index = 0 + for art in sec: try: offset = id_offsets[art.href] @@ -830,6 +846,7 @@ class Indexer(object): # {{{ for art in articles: i += 1 art.index = i + art.parent_index = sec.index for sec, normalized_articles in normalized_sections: @@ -905,6 +922,7 @@ class Indexer(object): # {{{ 'spans':None, 'offset':offset, 'record_number':i+1} for index in self.indices: + if index.offset >= next_offset: # Node starts after current record if index.depth == deepest: diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index ef6fc3485e..00a333ce9d 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -197,6 +197,7 @@ class MobiWriter(object): def generate_text(self): self.oeb.logger.info('Serializing markup content...') self.serializer = Serializer(self.oeb, self.image_map, + self.is_periodical, write_page_breaks_after_item=self.write_page_breaks_after_item) text = self.serializer() self.text_length = len(text) diff --git a/src/calibre/ebooks/mobi/writer2/serializer.py b/src/calibre/ebooks/mobi/writer2/serializer.py index 06aacc1b09..fe5dee3258 100644 --- a/src/calibre/ebooks/mobi/writer2/serializer.py +++ b/src/calibre/ebooks/mobi/writer2/serializer.py @@ -7,6 +7,8 @@ __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import re + from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS, namespace, prefixname, urlnormalize) from calibre.ebooks.mobi.mobiml import MBP_NS @@ -19,7 +21,7 @@ from cStringIO import StringIO class Serializer(object): NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'} - def __init__(self, oeb, images, write_page_breaks_after_item=True): + def __init__(self, oeb, images, is_periodical, write_page_breaks_after_item=True): ''' Write all the HTML markup in oeb into a single in memory buffer containing a single html document with links replaced by offsets into @@ -37,6 +39,7 @@ class Serializer(object): self.oeb = oeb self.images = images self.logger = oeb.logger + self.is_periodical = is_periodical self.write_page_breaks_after_item = write_page_breaks_after_item # If not None, this is a number pointing to the location at which to @@ -187,13 +190,63 @@ class Serializer(object): moved to the end. ''' buf = self.buf + + def serialize_toc_level(tocref, href=None): + # add the provided toc level to the output stream + # if href is provided add a link ref to the toc level output (e.g. feed_0/index.html) + if href is not None: + # resolve the section url in id_offsets + buf.write('') + self.id_offsets[urlnormalize(href)] = buf.tell() + + if tocref.klass == "periodical": + buf.write('
') + else: + buf.write('

'+tocref.title+'

') + + buf.write('
') + self.anchor_offset = buf.tell() buf.write(b'') self.body_start_offset = buf.tell() + + if self.is_periodical: + top_toc = self.oeb.toc.nodes[0] + serialize_toc_level(top_toc) + spine = [item for item in self.oeb.spine if item.linear] spine.extend([item for item in self.oeb.spine if not item.linear]) + for item in spine: + + if self.is_periodical and item.is_section_start: + for section_toc in top_toc.nodes: + if urlnormalize(item.href) == section_toc.href: + # create section url of the form r'feed_\d+/index.html' + section_url = re.sub(r'article_\d+/', '', section_toc.href) + serialize_toc_level(section_toc, section_url) + section_toc.href = section_url + break + self.serialize_item(item) + self.body_end_offset = buf.tell() buf.write(b'')