Produce more correct OEBPS 1.x output, and support OPF fallbacks.

This commit is contained in:
Marshall T. Vandegrift 2008-12-10 09:13:11 -05:00
parent 475a5eb899
commit 361d294232
2 changed files with 26 additions and 12 deletions

View File

@ -33,10 +33,11 @@ XHTML_MIME = 'application/xhtml+xml'
CSS_MIME = 'text/css' CSS_MIME = 'text/css'
NCX_MIME = 'application/x-dtbncx+xml' NCX_MIME = 'application/x-dtbncx+xml'
OPF_MIME = 'application/oebps-package+xml' OPF_MIME = 'application/oebps-package+xml'
OEB_DOC_MIME = 'text/x-oeb1-document'
OEB_CSS_MIME = 'text/x-oeb1-css'
OEB_STYLES = set([CSS_MIME, 'text/x-oeb1-css', 'text/x-oeb-css']) OEB_STYLES = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css'])
OEB_DOCS = set([XHTML_MIME, 'text/html', 'text/x-oeb1-document', OEB_DOCS = set([XHTML_MIME, 'text/html', OEB_DOC_MIME, 'text/x-oeb-document'])
'text/x-oeb-document'])
def element(parent, *args, **kwargs): def element(parent, *args, **kwargs):
@ -205,10 +206,11 @@ class Metadata(object):
class Manifest(object): class Manifest(object):
class Item(object): class Item(object):
def __init__(self, id, href, media_type, loader=str): def __init__(self, id, href, media_type, fallback=None, loader=str):
self.id = id self.id = id
self.href = self.path = urlnormalize(href) self.href = self.path = urlnormalize(href)
self.media_type = media_type self.media_type = media_type
self.fallback = fallback
self.spine_position = None self.spine_position = None
self.linear = True self.linear = True
self._loader = loader self._loader = loader
@ -251,8 +253,9 @@ class Manifest(object):
self.items = {} self.items = {}
self.hrefs = {} self.hrefs = {}
def add(self, id, href, media_type): def add(self, id, href, media_type, fallback=None):
item = self.Item(id, href, media_type, self.oeb.container.read) item = self.Item(
id, href, media_type, fallback, self.oeb.container.read)
self.items[item.id] = item self.items[item.id] = item
self.hrefs[item.href] = item self.hrefs[item.href] = item
return item return item
@ -283,16 +286,25 @@ class Manifest(object):
def to_opf1(self, parent=None): def to_opf1(self, parent=None):
elem = element(parent, 'manifest') elem = element(parent, 'manifest')
for item in self.items.values(): for item in self.items.values():
media_type = item.media_type
if media_type == XHTML_MIME:
media_type = OEB_DOC_MIME
elif media_type == CSS_MIME:
media_type = OEB_CSS_MIME
attrib = {'id': item.id, 'href': item.href, attrib = {'id': item.id, 'href': item.href,
'media-type': item.media_type} 'media-type': media_type}
if item.fallback:
attrib['fallback'] = item.fallback
element(elem, 'item', attrib=attrib) element(elem, 'item', attrib=attrib)
return elem return elem
def to_opf2(self, parent=None): def to_opf2(self, parent=None):
elem = element(parent, OPF('manifest')) elem = element(parent, OPF('manifest'))
for item in self.items.values(): for item in self.items.values():
attrib = {'id': item.id, 'href': item.href, attrib = {'id': item.id, 'href': item.href,
'media-type': item.media_type} 'media-type': item.media_type}
if item.fallback:
attrib['fallback'] = item.fallback
element(elem, OPF('item'), attrib=attrib) element(elem, OPF('item'), attrib=attrib)
return elem return elem
@ -520,7 +532,7 @@ class Oeb(object):
self.manifest = manifest = Manifest(self) self.manifest = manifest = Manifest(self)
for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'): for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):
manifest.add(elem.get('id'), elem.get('href'), manifest.add(elem.get('id'), elem.get('href'),
elem.get('media-type')) elem.get('media-type'), elem.get('fallback'))
def _spine_from_opf(self, opf): def _spine_from_opf(self, opf):
self.spine = spine = Spine(self) self.spine = spine = Spine(self)

View File

@ -15,7 +15,8 @@ from urllib import unquote as urlunquote
from lxml import etree from lxml import etree
from calibre.ebooks.lit.reader import msguid, DirectoryEntry from calibre.ebooks.lit.reader import msguid, DirectoryEntry
import calibre.ebooks.lit.maps as maps import calibre.ebooks.lit.maps as maps
from calibre.ebooks.lit.oeb import CSS_MIME, OPF_MIME, XML_NS, XML from calibre.ebooks.lit.oeb import OEB_STYLES, OEB_CSS_MIME, CSS_MIME, \
OPF_MIME, XML_NS, XML
from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize from calibre.ebooks.lit.oeb import namespace, barename, urlnormalize
from calibre.ebooks.lit.oeb import Oeb from calibre.ebooks.lit.oeb import Oeb
from calibre.ebooks.lit.stylizer import Stylizer from calibre.ebooks.lit.stylizer import Stylizer
@ -194,6 +195,8 @@ class ReBinary(object):
self.anchors.append((value, tag_offset)) self.anchors.append((value, tag_offset))
elif attr.startswith('ms--'): elif attr.startswith('ms--'):
attr = '%' + attr[4:] attr = '%' + attr[4:]
elif attr == 'type' and value in OEB_STYLES:
value = OEB_CSS_MIME
if attr in tattrs: if attr in tattrs:
self.write(tattrs[attr]) self.write(tattrs[attr])
else: else:
@ -220,8 +223,7 @@ class ReBinary(object):
child = cstyle = nstyle = None child = cstyle = nstyle = None
for next in chain(elem, [None]): for next in chain(elem, [None]):
if self.stylizer: if self.stylizer:
nstyle = self.stylizer.style(next) \ nstyle = None if next is None else self.stylizer.style(next)
if (next is not None) else None
if child is not None: if child is not None:
if not preserve \ if not preserve \
and (inhead or not nstyle and (inhead or not nstyle