Mobipocket support:

- All bells and whistles to manifest trimmer
  - Added generation of HTML TOCs
  - Regression to Mobi <guide/> linking -- wtf?
This commit is contained in:
Marshall T. Vandegrift 2009-01-09 10:19:17 -05:00
parent 8414414517
commit e8b919db81
3 changed files with 39 additions and 13 deletions

View File

@ -171,7 +171,7 @@ class MobiMLizer(object):
para = etree.SubElement(para, XHTML('blockquote')) para = etree.SubElement(para, XHTML('blockquote'))
emleft -= 1 emleft -= 1
else: else:
ptag = tag if tag in HEADER_TAGS else 'p' ptag = 'p' #tag if tag in HEADER_TAGS else 'p'
para = wrapper = etree.SubElement(parent, XHTML(ptag)) para = wrapper = etree.SubElement(parent, XHTML(ptag))
bstate.inline = bstate.para = para bstate.inline = bstate.para = para
vspace = bstate.vpadding + bstate.vmargin vspace = bstate.vpadding + bstate.vmargin

View File

@ -27,6 +27,7 @@ from calibre.ebooks.oeb.profile import Context
from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener from calibre.ebooks.oeb.transforms.flatcss import CSSFlattener
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer from calibre.ebooks.oeb.transforms.trimmanifest import ManifestTrimmer
from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
from calibre.ebooks.mobi.palmdoc import compress_doc from calibre.ebooks.mobi.palmdoc import compress_doc
from calibre.ebooks.mobi.langcodes import iana2mobi from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.ebooks.mobi.mobiml import MBP_NS, MBP, MobiMLizer from calibre.ebooks.mobi.mobiml import MBP_NS, MBP, MobiMLizer
@ -476,11 +477,13 @@ def main(argv=sys.argv):
#writer = DirWriter() #writer = DirWriter()
fbase = context.dest.fbase fbase = context.dest.fbase
fkey = context.dest.fnums.values() fkey = context.dest.fnums.values()
tocadder = HTMLTOCAdder()
flattener = CSSFlattener( flattener = CSSFlattener(
fbase=fbase, fkey=fkey, unfloat=True, untable=True) fbase=fbase, fkey=fkey, unfloat=True, untable=True)
rasterizer = SVGRasterizer() rasterizer = SVGRasterizer()
trimmer = ManifestTrimmer() trimmer = ManifestTrimmer()
mobimlizer = MobiMLizer() mobimlizer = MobiMLizer()
tocadder.transform(oeb, context)
flattener.transform(oeb, context) flattener.transform(oeb, context)
rasterizer.transform(oeb, context) rasterizer.transform(oeb, context)
mobimlizer.transform(oeb, context) mobimlizer.transform(oeb, context)

View File

@ -8,8 +8,10 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
import sys import sys
import os import os
from itertools import chain
from lxml import etree from lxml import etree
from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME import cssutils
from calibre.ebooks.oeb.base import XPNSMAP, CSS_MIME, OEB_DOCS
LINK_SELECTORS = [] LINK_SELECTORS = []
for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data', for expr in ('//h:link/@href', '//h:img/@src', '//h:object/@data',
@ -20,21 +22,42 @@ class ManifestTrimmer(object):
def transform(self, oeb, context): def transform(self, oeb, context):
oeb.logger.info('Trimming unused files from manifest...') oeb.logger.info('Trimming unused files from manifest...')
used = set() used = set()
for item in oeb.spine: hrefs = oeb.manifest.hrefs
used.add(item.href)
for selector in LINK_SELECTORS:
for href in selector(item.data):
used.add(item.abshref(href))
# TODO: Things mentioned in CSS
# TODO: Things mentioned in SVG
# Who knows what people will do...
for term in oeb.metadata: for term in oeb.metadata:
for item in oeb.metadata[term]: for item in oeb.metadata[term]:
if item.value in oeb.manifest.hrefs: if item.value in oeb.manifest.hrefs:
used.add(item.value) used.add(oeb.manifest.hrefs[item.value])
elif item.value in oeb.manifest.ids: elif item.value in oeb.manifest.ids:
used.add(oeb.manifest.ids[item.value].href) used.add(oeb.manifest.ids[item.value])
for item in oeb.spine:
used.add(item)
unchecked = used
while unchecked:
new = set()
for item in unchecked:
if item.media_type in OEB_DOCS or \
item.media_type[-4:] in ('/xml', '+xml'):
hrefs = [sel(item.data) for sel in LINK_SELECTORS]
for href in chain(*hrefs):
href = item.abshref(href)
if href in oeb.manifest.hrefs:
found = oeb.manifest.hrefs[href]
if found not in used:
new.add(found)
elif item.media_type == CSS_MIME:
def replacer(uri):
absuri = item.abshref(uri)
if absuri in oeb.manifest.hrefs:
found = oeb.manifest.hrefs[href]
if found not in used:
new.add(found)
return uri
sheet = cssutils.parseString(item.data, href=item.href)
cssutils.replaceUrls(sheet, replacer)
used.update(new)
unchecked = new
# All guide and TOC items are required to be in the spine
for item in oeb.manifest.values(): for item in oeb.manifest.values():
if item.href not in used: if item not in used:
oeb.logger.info('Trimming %r from manifest' % item.href) oeb.logger.info('Trimming %r from manifest' % item.href)
oeb.manifest.remove(item) oeb.manifest.remove(item)