From e8b919db81bc8975a1c41c8fc8ec5b02c727840d Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Fri, 9 Jan 2009 10:19:17 -0500 Subject: [PATCH] Mobipocket support: - All bells and whistles to manifest trimmer - Added generation of HTML TOCs - Regression to Mobi linking -- wtf? --- src/calibre/ebooks/mobi/mobiml.py | 2 +- src/calibre/ebooks/mobi/writer.py | 3 ++ .../ebooks/oeb/transforms/trimmanifest.py | 47 ++++++++++++++----- 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py index b544d6d2e0..973ee34b36 100644 --- a/src/calibre/ebooks/mobi/mobiml.py +++ b/src/calibre/ebooks/mobi/mobiml.py @@ -171,7 +171,7 @@ class MobiMLizer(object): para = etree.SubElement(para, XHTML('blockquote')) emleft -= 1 else: - ptag = tag if tag in HEADER_TAGS else 'p' + ptag = 'p' #tag if tag in HEADER_TAGS else 'p' para = wrapper = etree.SubElement(parent, XHTML(ptag)) bstate.inline = bstate.para = para vspace = bstate.vpadding + bstate.vmargin diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index a939243f8d..9332e13ac5 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -27,6 +27,7 @@ from calibre.ebooks.oeb.profile import Context from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer +from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder from calibre.ebooks.mobi.palmdoc import compress_doc from calibre.ebooks.mobi.langcodes import iana2mobi from calibre.ebooks.mobi.mobiml import MBP_NS, MBP, MobiMLizer @@ -476,11 +477,13 @@ def main(argv=sys.argv): #writer = DirWriter() fbase = context.dest.fbase fkey = context.dest.fnums.values() + tocadder = HTMLTOCAdder() flattener = CSSFlattener( fbase=fbase, fkey=fkey, unfloat=True, untable=True) rasterizer = SVGRasterizer() trimmer = ManifestTrimmer() mobimlizer = MobiMLizer() + tocadder.transform(oeb, context) flattener.transform(oeb, context) rasterizer.transform(oeb, context) mobimlizer.transform(oeb, context) diff --git a/src/calibre/ebooks/oeb/transforms/trimmanifest.py b/src/calibre/ebooks/oeb/transforms/trimmanifest.py index 062eed359b..b65116d16b 100644 --- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py +++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py @@ -8,8 +8,10 @@ __copyright__ = '2008, Marshall T. Vandegrift ' import sys import os +from itertools import chain from lxml import etree -from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME +import cssutils +from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS LINK_SELECTORS = [] for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data', @@ -20,21 +22,42 @@ class ManifestTrimmer(object): def transform(self, oeb, context): oeb.logger.info('Trimming unused files from manifest...') used = set() - for item in oeb.spine: - used.add(item.href) - for selector in LINK_SELECTORS: - for href in selector(item.data): - used.add(item.abshref(href)) - # TODO: Things mentioned in CSS - # TODO: Things mentioned in SVG - # Who knows what people will do... + hrefs = oeb.manifest.hrefs for term in oeb.metadata: for item in oeb.metadata[term]: if item.value in oeb.manifest.hrefs: - used.add(item.value) + used.add(oeb.manifest.hrefs[item.value]) elif item.value in oeb.manifest.ids: - used.add(oeb.manifest.ids[item.value].href) + used.add(oeb.manifest.ids[item.value]) + for item in oeb.spine: + used.add(item) + unchecked = used + while unchecked: + new = set() + for item in unchecked: + if item.media_type in OEB_DOCS or \ + item.media_type[-4:] in ('/xml', '+xml'): + hrefs = [sel(item.data) for sel in LINK_SELECTORS] + for href in chain(*hrefs): + href = item.abshref(href) + if href in oeb.manifest.hrefs: + found = oeb.manifest.hrefs[href] + if found not in used: + new.add(found) + elif item.media_type == CSS_MIME: + def replacer(uri): + absuri = item.abshref(uri) + if absuri in oeb.manifest.hrefs: + found = oeb.manifest.hrefs[href] + if found not in used: + new.add(found) + return uri + sheet = cssutils.parseString(item.data, href=item.href) + cssutils.replaceUrls(sheet, replacer) + used.update(new) + unchecked = new + # All guide and TOC items are required to be in the spine for item in oeb.manifest.values(): - if item.href not in used: + if item not in used: oeb.logger.info('Trimming %r from manifest' % item.href) oeb.manifest.remove(item)