diff --git a/src/calibre/ebooks/docx/names.py b/src/calibre/ebooks/docx/names.py index f7e72e1a3f..3db3f5e961 100644 --- a/src/calibre/ebooks/docx/names.py +++ b/src/calibre/ebooks/docx/names.py @@ -39,6 +39,7 @@ TRANSITIONAL_NAMESPACES = { 'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main', 'o': 'urn:schemas-microsoft-com:office:office', 've': 'http://schemas.openxmlformats.org/markup-compatibility/2006', + 'mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006', # Text Content 'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main', 'w10': 'urn:schemas-microsoft-com:office:word', diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 2ad3abd188..f1301f1f93 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -120,6 +120,7 @@ class Convert(object): self.log.debug('Converting Word markup to HTML') self.read_page_properties(doc) + self.resolve_alternate_content(doc) self.current_rels = relationships_by_id for wp, page_properties in self.page_map.iteritems(): self.current_page = page_properties @@ -267,6 +268,17 @@ class Convert(object): for x in current: self.page_map[x] = pr + def resolve_alternate_content(self, doc): + # For proprietary extensions in Word documents use the fallback, spec + # compliant form + # See https://wiki.openoffice.org/wiki/OOXML/Markup_Compatibility_and_Extensibility + for ac in self.namespace.descendants(doc, 'mc:AlternateContent'): + choices = self.namespace.XPath('./mc:Choice')(ac) + fallbacks = self.namespace.XPath('./mc:Fallback')(ac) + if fallbacks: + for choice in choices: + ac.remove(choice) + def read_styles(self, relationships_by_type): def get_name(rtype, defname):