From 4618304369be5a8b119dd62c7f5db8f36e3af7c1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 3 Feb 2015 11:14:23 +0530 Subject: [PATCH] DOCX Input: Change handling of sequences of space characters to more closely follow Microsoft Word. --- src/calibre/ebooks/docx/to_html.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 4681508380..8ad589c771 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -581,17 +581,21 @@ class Convert(object): continue space = child.get(XML('space'), None) preserve = False - if space == 'preserve': - # Only use a with white-space:pre-wrap if this element - # actually needs it, i.e. if it has more than one - # consecutive space or it has newlines or tabs. - multi_spaces = self.ms_pat.search(child.text) is not None - preserve = multi_spaces or self.ws_pat.search(child.text) is not None + ctext = child.text + if space != 'preserve': + # Remove leading and trailing whitespace. Word ignores + # leading and trailing whitespace without preserve + ctext = ctext.strip() + # Only use a with white-space:pre-wrap if this element + # actually needs it, i.e. if it has more than one + # consecutive space or it has newlines or tabs. + multi_spaces = self.ms_pat.search(ctext) is not None + preserve = multi_spaces or self.ws_pat.search(ctext) is not None if preserve: - text.add_elem(SPAN(child.text, style="white-space:pre-wrap")) + text.add_elem(SPAN(ctext, style="white-space:pre-wrap")) ans.append(text.elem) else: - text.buf.append(child.text) + text.buf.append(ctext) elif is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem)