Conversion: Add workaround for converting EPUB files with broken (probably hand edited) OPF files. See #1389188 (Private bug)

This commit is contained in:
Kovid Goyal 2014-11-04 22:22:52 +05:30
parent 533dcb41d6
commit d372ffa238

View File

@ -76,7 +76,7 @@ class OEBReader(object):
for elem in opf.iter(tag=etree.Element):
nsmap.update(elem.nsmap)
for elem in opf.iter(tag=etree.Element):
if namespace(elem.tag) in ('', OPF1_NS):
if namespace(elem.tag) in ('', OPF1_NS) and ':' not in barename(elem.tag):
elem.tag = OPF(barename(elem.tag))
nsmap.update(OPF2_NSMAP)
attrib = dict(opf.attrib)
@ -90,6 +90,9 @@ class OEBReader(object):
if namespace(elem.tag) in DC_NSES:
tag = barename(elem.tag).lower()
elem.tag = '{%s}%s' % (DC11_NS, tag)
if elem.tag.startswith('dc:'):
tag = elem.tag.partition(':')[-1].lower()
elem.tag = '{%s}%s' % (DC11_NS, tag)
metadata.append(elem)
for element in xpath(opf, 'o2:metadata//o2:meta'):
metadata.append(element)
@ -115,8 +118,13 @@ class OEBReader(object):
data = re.sub(r'(?is)<tours>.+</tours>', '', data)
data = data.replace('<dc-metadata>',
'<dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core">')
try:
opf = etree.fromstring(data)
self.logger.warn('OPF contains invalid tours section')
except etree.XMLSyntaxError:
from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER
opf = etree.fromstring(data, parser=RECOVER_PARSER)
self.logger.warn('OPF contains invalid markup, trying to parse it anyway')
ns = namespace(opf.tag)
if ns not in ('', OPF1_NS, OPF2_NS):