This commit is contained in:
Kovid Goyal 2009-01-26 23:16:09 -08:00
commit cc059c06ae

View File

@ -26,6 +26,7 @@ from calibre.ebooks.oeb.entitydefs import ENTITYDEFS
XML_NS = 'http://www.w3.org/XML/1998/namespace' XML_NS = 'http://www.w3.org/XML/1998/namespace'
XHTML_NS = 'http://www.w3.org/1999/xhtml' XHTML_NS = 'http://www.w3.org/1999/xhtml'
OEB_DOC_NS = 'http://openebook.org/namespaces/oeb-document/1.0/'
OPF1_NS = 'http://openebook.org/namespaces/oeb-package/1.0/' OPF1_NS = 'http://openebook.org/namespaces/oeb-package/1.0/'
OPF2_NS = 'http://www.idpf.org/2007/opf' OPF2_NS = 'http://www.idpf.org/2007/opf'
DC09_NS = 'http://purl.org/metadata/dublin_core' DC09_NS = 'http://purl.org/metadata/dublin_core'
@ -334,8 +335,6 @@ class Manifest(object):
# Possibly decode in user-specified encoding # Possibly decode in user-specified encoding
if self.oeb.encoding is not None: if self.oeb.encoding is not None:
data = data.decode(self.oeb.encoding, 'replace') data = data.decode(self.oeb.encoding, 'replace')
# Force to UNIX line encodings
data = data.replace('\r\n', '\n').replace('\r', '\n')
# Handle broken XHTML w/ SVG (ugh) # Handle broken XHTML w/ SVG (ugh)
if 'svg:' in data and SVG_NS not in data: if 'svg:' in data and SVG_NS not in data:
data = data.replace( data = data.replace(
@ -358,10 +357,26 @@ class Manifest(object):
data = etree.tostring(data, encoding=unicode) data = etree.tostring(data, encoding=unicode)
data = etree.fromstring(data) data = etree.fromstring(data)
# Force into the XHTML namespace # Force into the XHTML namespace
if namespace(data.tag) != XHTML_NS: if barename(data.tag) != 'html':
raise OEBError(
'File %r does not appear to be (X)HTML' % self.href)
elif not namespace(data.tag):
data.attrib['xmlns'] = XHTML_NS data.attrib['xmlns'] = XHTML_NS
data = etree.tostring(data, encoding=unicode) data = etree.tostring(data, encoding=unicode)
data = etree.fromstring(data) data = etree.fromstring(data)
elif namespace(data.tag) != XHTML_NS:
# OEB_DOC_NS, but possibly others
ns = namespace(data.tag)
attrib = dict(data.attrib)
nroot = etree.Element(XHTML('html'),
nsmap={None: XHTML_NS}, attrib=attrib)
for elem in data.iterdescendants():
if isinstance(elem.tag, basestring) and \
namespace(elem.tag) == ns:
elem.tag = XHTML(barename(elem.tag))
for elem in data:
nroot.append(elem)
data = nroot
# Remove any encoding-specifying <meta/> elements # Remove any encoding-specifying <meta/> elements
for meta in self.META_XP(data): for meta in self.META_XP(data):
meta.getparent().remove(meta) meta.getparent().remove(meta)
@ -824,7 +839,6 @@ class OEBBook(object):
def _read_opf(self, opfpath): def _read_opf(self, opfpath):
opf = self.container.read(opfpath) opf = self.container.read(opfpath)
opf = opf.replace('\r\n', '\n').replace('\r', '\n')
try: try:
opf = etree.fromstring(opf) opf = etree.fromstring(opf)
except etree.XMLSyntaxError: except etree.XMLSyntaxError: