mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
DOCX Input: Fix figures in newer Word documents being duplicated. Fixes #1789238 [textboxes duplcated after import from docx](https://bugs.launchpad.net/calibre/+bug/1789238)
Newer Word versions use a proprietary image markup with a fallback to the standard form. Ignore the proprietary form and use only the fallback, standard version when converting.
This commit is contained in:
parent
03126d37f8
commit
30b1f442a6
@ -39,6 +39,7 @@ TRANSITIONAL_NAMESPACES = {
|
|||||||
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
|
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
|
||||||
'o': 'urn:schemas-microsoft-com:office:office',
|
'o': 'urn:schemas-microsoft-com:office:office',
|
||||||
've': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
|
've': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
|
||||||
|
'mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
|
||||||
# Text Content
|
# Text Content
|
||||||
'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
|
'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
|
||||||
'w10': 'urn:schemas-microsoft-com:office:word',
|
'w10': 'urn:schemas-microsoft-com:office:word',
|
||||||
|
@ -120,6 +120,7 @@ class Convert(object):
|
|||||||
self.log.debug('Converting Word markup to HTML')
|
self.log.debug('Converting Word markup to HTML')
|
||||||
|
|
||||||
self.read_page_properties(doc)
|
self.read_page_properties(doc)
|
||||||
|
self.resolve_alternate_content(doc)
|
||||||
self.current_rels = relationships_by_id
|
self.current_rels = relationships_by_id
|
||||||
for wp, page_properties in self.page_map.iteritems():
|
for wp, page_properties in self.page_map.iteritems():
|
||||||
self.current_page = page_properties
|
self.current_page = page_properties
|
||||||
@ -267,6 +268,17 @@ class Convert(object):
|
|||||||
for x in current:
|
for x in current:
|
||||||
self.page_map[x] = pr
|
self.page_map[x] = pr
|
||||||
|
|
||||||
|
def resolve_alternate_content(self, doc):
|
||||||
|
# For proprietary extensions in Word documents use the fallback, spec
|
||||||
|
# compliant form
|
||||||
|
# See https://wiki.openoffice.org/wiki/OOXML/Markup_Compatibility_and_Extensibility
|
||||||
|
for ac in self.namespace.descendants(doc, 'mc:AlternateContent'):
|
||||||
|
choices = self.namespace.XPath('./mc:Choice')(ac)
|
||||||
|
fallbacks = self.namespace.XPath('./mc:Fallback')(ac)
|
||||||
|
if fallbacks:
|
||||||
|
for choice in choices:
|
||||||
|
ac.remove(choice)
|
||||||
|
|
||||||
def read_styles(self, relationships_by_type):
|
def read_styles(self, relationships_by_type):
|
||||||
|
|
||||||
def get_name(rtype, defname):
|
def get_name(rtype, defname):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user