diff --git a/src/calibre/ebooks/oeb/parse_utils.py b/src/calibre/ebooks/oeb/parse_utils.py index 5883241883..3fc37580bd 100644 --- a/src/calibre/ebooks/oeb/parse_utils.py +++ b/src/calibre/ebooks/oeb/parse_utils.py @@ -146,6 +146,15 @@ def clean_word_doc(data, log): return data +def ensure_namespace_prefixes(node, nsmap): + namespace_uris = frozenset(nsmap.itervalues()) + fnsmap = {k:v for k, v in node.nsmap.iteritems() if v not in namespace_uris} + fnsmap.update(nsmap) + if fnsmap != dict(node.nsmap): + node = clone_element(node, nsmap=fnsmap, in_context=False) + return node + + class HTML5Doc(ValueError): pass @@ -307,11 +316,8 @@ def parse_html(data, log=None, decoder=None, preprocessor=None, nroot.append(elem) data = nroot - fnsmap = {k:v for k, v in data.nsmap.iteritems() if v != XHTML_NS} - fnsmap[None] = XHTML_NS - if fnsmap != dict(data.nsmap): - # Remove non default prefixes referring to the XHTML namespace - data = clone_element(data, nsmap=fnsmap, in_context=False) + # Remove non default prefixes referring to the XHTML namespace + data = ensure_namespace_prefixes(data, {None: XHTML_NS}) data = merge_multiple_html_heads_and_bodies(data, log) # Ensure has a diff --git a/src/calibre/ebooks/oeb/polish/upgrade.py b/src/calibre/ebooks/oeb/polish/upgrade.py index 64859343cf..200634ca33 100644 --- a/src/calibre/ebooks/oeb/polish/upgrade.py +++ b/src/calibre/ebooks/oeb/polish/upgrade.py @@ -7,7 +7,8 @@ from __future__ import absolute_import, division, print_function, unicode_litera import sys from calibre.ebooks.metadata.opf_2_to_3 import upgrade_metadata -from calibre.ebooks.oeb.base import OEB_DOCS, xpath +from calibre.ebooks.oeb.base import EPUB_NS, OEB_DOCS, xpath +from calibre.ebooks.oeb.parse_utils import ensure_namespace_prefixes from calibre.ebooks.oeb.polish.container import OEB_FONTS from calibre.ebooks.oeb.polish.opf import get_book_language from calibre.ebooks.oeb.polish.toc import ( @@ -36,8 +37,9 @@ def collect_properties(container): continue name = container.href_to_name(item.get('href'), container.opf_name) root = container.parsed(name) + root = ensure_namespace_prefixes(root, {'epub': EPUB_NS}) properties = set() - container.dirty(name) # Ensure entities are converted + container.replace(name, root) # Ensure entities are converted if xpath(root, '//svg:svg'): properties.add('svg') if xpath(root, '//h:script'):