Mobipocket support:

- All bells and whistles to manifest trimmer
  - Added generation of HTML TOCs
  - Regression to Mobi <guide/> linking -- wtf?
This commit is contained in:
Marshall T. Vandegrift 2009-01-09 10:19:17 -05:00
parent 8414414517
commit e8b919db81
3 changed files with 39 additions and 13 deletions

View File

@ -171,7 +171,7 @@ class MobiMLizer(object):
para = etree.SubElement(para, XHTML('blockquote'))
emleft -= 1
else:
ptag = tag if tag in HEADER_TAGS else 'p'
ptag = 'p' #tag if tag in HEADER_TAGS else 'p'
para = wrapper = etree.SubElement(parent, XHTML(ptag))
bstate.inline = bstate.para = para
vspace = bstate.vpadding + bstate.vmargin

View File

@ -27,6 +27,7 @@ from calibre.ebooks.oeb.profile import Context
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
from calibre.ebooks.mobi.palmdoc import compress_doc
from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.ebooks.mobi.mobiml import MBP_NS, MBP, MobiMLizer
@ -476,11 +477,13 @@ def main(argv=sys.argv):
#writer = DirWriter()
fbase = context.dest.fbase
fkey = context.dest.fnums.values()
tocadder = HTMLTOCAdder()
flattener = CSSFlattener(
fbase=fbase, fkey=fkey, unfloat=True, untable=True)
rasterizer = SVGRasterizer()
trimmer = ManifestTrimmer()
mobimlizer = MobiMLizer()
tocadder.transform(oeb, context)
flattener.transform(oeb, context)
rasterizer.transform(oeb, context)
mobimlizer.transform(oeb, context)

View File

@ -8,8 +8,10 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys
import os
from itertools import chain
from lxml import etree
from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME
import cssutils
from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS
LINK_SELECTORS = []
for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data',
@ -20,21 +22,42 @@ class ManifestTrimmer(object):
def transform(self, oeb, context):
oeb.logger.info('Trimming unused files from manifest...')
used = set()
for item in oeb.spine:
used.add(item.href)
for selector in LINK_SELECTORS:
for href in selector(item.data):
used.add(item.abshref(href))
# TODO: Things mentioned in CSS
# TODO: Things mentioned in SVG
# Who knows what people will do...
hrefs = oeb.manifest.hrefs
for term in oeb.metadata:
for item in oeb.metadata[term]:
if item.value in oeb.manifest.hrefs:
used.add(item.value)
used.add(oeb.manifest.hrefs[item.value])
elif item.value in oeb.manifest.ids:
used.add(oeb.manifest.ids[item.value].href)
used.add(oeb.manifest.ids[item.value])
for item in oeb.spine:
used.add(item)
unchecked = used
while unchecked:
new = set()
for item in unchecked:
if item.media_type in OEB_DOCS or \
item.media_type[-4:] in ('/xml', '+xml'):
hrefs = [sel(item.data) for sel in LINK_SELECTORS]
for href in chain(*hrefs):
href = item.abshref(href)
if href in oeb.manifest.hrefs:
found = oeb.manifest.hrefs[href]
if found not in used:
new.add(found)
elif item.media_type == CSS_MIME:
def replacer(uri):
absuri = item.abshref(uri)
if absuri in oeb.manifest.hrefs:
found = oeb.manifest.hrefs[href]
if found not in used:
new.add(found)
return uri
sheet = cssutils.parseString(item.data, href=item.href)
cssutils.replaceUrls(sheet, replacer)
used.update(new)
unchecked = new
# All guide and TOC items are required to be in the spine
for item in oeb.manifest.values():
if item.href not in used:
if item not in used:
oeb.logger.info('Trimming %r from manifest' % item.href)
oeb.manifest.remove(item)