New mobi output: Periodicals now work completely on the Kindle

This commit is contained in:
Kovid Goyal 2011-08-18 17:52:37 -06:00
parent 7e6a8c9093
commit 19b42db52b
3 changed files with 76 additions and 4 deletions

View File

@ -305,9 +305,7 @@ class TBS(object): # {{{
def __init__(self, data, is_periodical, first=False, section_map={}, def __init__(self, data, is_periodical, first=False, section_map={},
after_first=False): after_first=False):
self.section_map = section_map self.section_map = section_map
#import pprint
#pprint.pprint(data)
#print()
if is_periodical: if is_periodical:
# The starting bytes. # The starting bytes.
# The value is zero which I think indicates the periodical # The value is zero which I think indicates the periodical
@ -420,6 +418,8 @@ class TBS(object): # {{{
first_article = articles[0] first_article = articles[0]
last_article = articles[-1] last_article = articles[-1]
num = len(articles) num = len(articles)
last_article_ends = (last_article in data['ends'] or
last_article in data['completes'])
try: try:
next_sec = sections[i+1] next_sec = sections[i+1]
@ -440,6 +440,19 @@ class TBS(object): # {{{
if next_sec is not None: if next_sec is not None:
buf.write(encode_tbs(last_article.index-next_sec.index, buf.write(encode_tbs(last_article.index-next_sec.index,
{0b1000: 0})) {0b1000: 0}))
# If a section TOC starts and extends into the next record add
# a trailing vwi. We detect this by TBS type==3, processing last
# section present in the record, and the last article in that
# section either ends or completes and doesn't finish
# on the last byte of the record.
elif (typ == self.type_011 and last_article_ends and
((last_article.offset+last_article.size) % RECORD_SIZE > 0)
):
buf.write(encode_tbs(last_article.index-section.index-1,
{0b1000: 0}))
else: else:
buf.write(encode_tbs(spanner.index - parent_section_index, buf.write(encode_tbs(spanner.index - parent_section_index,
{0b0001: 0})) {0b0001: 0}))
@ -518,6 +531,7 @@ class Indexer(object): # {{{
for i in indices: for i in indices:
offsets.append(buf.tell()) offsets.append(buf.tell())
buf.write(i.bytestring) buf.write(i.bytestring)
index_block = align_block(buf.getvalue()) index_block = align_block(buf.getvalue())
# Write offsets to index entries as an IDXT block # Write offsets to index entries as an IDXT block
@ -772,9 +786,11 @@ class Indexer(object): # {{{
continue continue
if offset in seen_sec_offsets: if offset in seen_sec_offsets:
continue continue
seen_sec_offsets.add(offset) seen_sec_offsets.add(offset)
section = PeriodicalIndexEntry(offset, label, klass, 1) section = PeriodicalIndexEntry(offset, label, klass, 1)
section.parent_index = 0 section.parent_index = 0
for art in sec: for art in sec:
try: try:
offset = id_offsets[art.href] offset = id_offsets[art.href]
@ -830,6 +846,7 @@ class Indexer(object): # {{{
for art in articles: for art in articles:
i += 1 i += 1
art.index = i art.index = i
art.parent_index = sec.index art.parent_index = sec.index
for sec, normalized_articles in normalized_sections: for sec, normalized_articles in normalized_sections:
@ -905,6 +922,7 @@ class Indexer(object): # {{{
'spans':None, 'offset':offset, 'record_number':i+1} 'spans':None, 'offset':offset, 'record_number':i+1}
for index in self.indices: for index in self.indices:
if index.offset >= next_offset: if index.offset >= next_offset:
# Node starts after current record # Node starts after current record
if index.depth == deepest: if index.depth == deepest:

View File

@ -197,6 +197,7 @@ class MobiWriter(object):
def generate_text(self): def generate_text(self):
self.oeb.logger.info('Serializing markup content...') self.oeb.logger.info('Serializing markup content...')
self.serializer = Serializer(self.oeb, self.image_map, self.serializer = Serializer(self.oeb, self.image_map,
self.is_periodical,
write_page_breaks_after_item=self.write_page_breaks_after_item) write_page_breaks_after_item=self.write_page_breaks_after_item)
text = self.serializer() text = self.serializer()
self.text_length = len(text) self.text_length = len(text)

View File

@ -7,6 +7,8 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import re
from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS, from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS,
namespace, prefixname, urlnormalize) namespace, prefixname, urlnormalize)
from calibre.ebooks.mobi.mobiml import MBP_NS from calibre.ebooks.mobi.mobiml import MBP_NS
@ -19,7 +21,7 @@ from cStringIO import StringIO
class Serializer(object): class Serializer(object):
NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'} NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
def __init__(self, oeb, images, write_page_breaks_after_item=True): def __init__(self, oeb, images, is_periodical, write_page_breaks_after_item=True):
''' '''
Write all the HTML markup in oeb into a single in memory buffer Write all the HTML markup in oeb into a single in memory buffer
containing a single html document with links replaced by offsets into containing a single html document with links replaced by offsets into
@ -37,6 +39,7 @@ class Serializer(object):
self.oeb = oeb self.oeb = oeb
self.images = images self.images = images
self.logger = oeb.logger self.logger = oeb.logger
self.is_periodical = is_periodical
self.write_page_breaks_after_item = write_page_breaks_after_item self.write_page_breaks_after_item = write_page_breaks_after_item
# If not None, this is a number pointing to the location at which to # If not None, this is a number pointing to the location at which to
@ -187,13 +190,63 @@ class Serializer(object):
moved to the end. moved to the end.
''' '''
buf = self.buf buf = self.buf
def serialize_toc_level(tocref, href=None):
# add the provided toc level to the output stream
# if href is provided add a link ref to the toc level output (e.g. feed_0/index.html)
if href is not None:
# resolve the section url in id_offsets
buf.write('<mbp:pagebreak/>')
self.id_offsets[urlnormalize(href)] = buf.tell()
if tocref.klass == "periodical":
buf.write('<div> <div height="1em"></div>')
else:
buf.write('<div></div> <div> <h2 height="1em"><font size="+2"><b>'+tocref.title+'</b></font></h2> <div height="1em"></div>')
buf.write('<ul>')
for tocitem in tocref.nodes:
buf.write('<li><a filepos=')
itemhref = tocitem.href
if tocref.klass == 'periodical':
# This is a section node.
# For periodical toca, the section urls are like r'feed_\d+/index.html'
# We dont want to point to the start of the first article
# so we change the href.
itemhref = re.sub(r'article_\d+/', '', itemhref)
self.href_offsets[itemhref].append(buf.tell())
buf.write('0000000000')
buf.write(' ><font size="+1" color="blue"><b><u>')
buf.write(tocitem.title)
buf.write('</u></b></font></a></li>')
buf.write('</ul><div height="1em"></div></div>')
self.anchor_offset = buf.tell() self.anchor_offset = buf.tell()
buf.write(b'<body>') buf.write(b'<body>')
self.body_start_offset = buf.tell() self.body_start_offset = buf.tell()
if self.is_periodical:
top_toc = self.oeb.toc.nodes[0]
serialize_toc_level(top_toc)
spine = [item for item in self.oeb.spine if item.linear] spine = [item for item in self.oeb.spine if item.linear]
spine.extend([item for item in self.oeb.spine if not item.linear]) spine.extend([item for item in self.oeb.spine if not item.linear])
for item in spine: for item in spine:
if self.is_periodical and item.is_section_start:
for section_toc in top_toc.nodes:
if urlnormalize(item.href) == section_toc.href:
# create section url of the form r'feed_\d+/index.html'
section_url = re.sub(r'article_\d+/', '', section_toc.href)
serialize_toc_level(section_toc, section_url)
section_toc.href = section_url
break
self.serialize_item(item) self.serialize_item(item)
self.body_end_offset = buf.tell() self.body_end_offset = buf.tell()
buf.write(b'</body>') buf.write(b'</body>')