diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index 49b95a9c19..b432db8203 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -76,7 +76,7 @@ class OEBReader(object): for elem in opf.iter(tag=etree.Element): nsmap.update(elem.nsmap) for elem in opf.iter(tag=etree.Element): - if namespace(elem.tag) in ('', OPF1_NS): + if namespace(elem.tag) in ('', OPF1_NS) and ':' not in barename(elem.tag): elem.tag = OPF(barename(elem.tag)) nsmap.update(OPF2_NSMAP) attrib = dict(opf.attrib) @@ -90,6 +90,9 @@ class OEBReader(object): if namespace(elem.tag) in DC_NSES: tag = barename(elem.tag).lower() elem.tag = '{%s}%s' % (DC11_NS, tag) + if elem.tag.startswith('dc:'): + tag = elem.tag.partition(':')[-1].lower() + elem.tag = '{%s}%s' % (DC11_NS, tag) metadata.append(elem) for element in xpath(opf, 'o2:metadata//o2:meta'): metadata.append(element) @@ -115,8 +118,13 @@ class OEBReader(object): data = re.sub(r'(?is).+', '', data) data = data.replace('', '') - opf = etree.fromstring(data) - self.logger.warn('OPF contains invalid tours section') + try: + opf = etree.fromstring(data) + self.logger.warn('OPF contains invalid tours section') + except etree.XMLSyntaxError: + from calibre.ebooks.oeb.parse_utils import RECOVER_PARSER + opf = etree.fromstring(data, parser=RECOVER_PARSER) + self.logger.warn('OPF contains invalid markup, trying to parse it anyway') ns = namespace(opf.tag) if ns not in ('', OPF1_NS, OPF2_NS): @@ -691,7 +699,7 @@ class OEBReader(object): item = self._find_ncx(opf) self._toc_from_opf(opf, item) self._pages_from_opf(opf, item) - #self._ensure_cover_image() + # self._ensure_cover_image() def main(argv=sys.argv):