DOCX Input: Change handling of sequences of space characters to more closely follow Microsoft Word.

This commit is contained in:
Kovid Goyal 2015-02-03 11:14:23 +05:30
parent 28e6946de4
commit 4618304369

View File

@ -581,17 +581,21 @@ class Convert(object):
continue continue
space = child.get(XML('space'), None) space = child.get(XML('space'), None)
preserve = False preserve = False
if space == 'preserve': ctext = child.text
# Only use a <span> with white-space:pre-wrap if this element if space != 'preserve':
# actually needs it, i.e. if it has more than one # Remove leading and trailing whitespace. Word ignores
# consecutive space or it has newlines or tabs. # leading and trailing whitespace without preserve
multi_spaces = self.ms_pat.search(child.text) is not None ctext = ctext.strip()
preserve = multi_spaces or self.ws_pat.search(child.text) is not None # Only use a <span> with white-space:pre-wrap if this element
# actually needs it, i.e. if it has more than one
# consecutive space or it has newlines or tabs.
multi_spaces = self.ms_pat.search(ctext) is not None
preserve = multi_spaces or self.ws_pat.search(ctext) is not None
if preserve: if preserve:
text.add_elem(SPAN(child.text, style="white-space:pre-wrap")) text.add_elem(SPAN(ctext, style="white-space:pre-wrap"))
ans.append(text.elem) ans.append(text.elem)
else: else:
text.buf.append(child.text) text.buf.append(ctext)
elif is_tag(child, 'w:cr'): elif is_tag(child, 'w:cr'):
text.add_elem(BR()) text.add_elem(BR())
ans.append(text.elem) ans.append(text.elem)