From a447b0818d628000c7cf75ccafc42819f829a6cf Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 12 Jun 2013 09:45:59 +0530 Subject: [PATCH] DOCX Input: Simplify generated markup DOCX Input: Simplify generated markup by avoiding the use of unnecessary tags for whitespace preservation. --- src/calibre/ebooks/docx/to_html.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 379faf0639..7ee6e9e242 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -41,6 +41,8 @@ class Convert(object): def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None): self.docx = DOCX(path_or_stream, log=log) + self.ms_pat = re.compile(r'\s{2,}') + self.ws_pat = re.compile(r'[\n\r\t]') self.log = self.docx.log self.notes_text = notes_text or _('Notes') self.dest_dir = dest_dir or os.getcwdu() @@ -414,7 +416,14 @@ class Convert(object): if not child.text: continue space = child.get(XML('space'), None) + preserve = False if space == 'preserve': + # Only use a with white-space:pre-wrap if this element + # actually needs it, i.e. if it has more than one + # consecutive space or it has newlines or tabs. + multi_spaces = self.ms_pat.search(child.text) is not None + preserve = multi_spaces or self.ws_pat.search(child.text) is not None + if preserve: text.add_elem(SPAN(child.text, style="white-space:pre-wrap")) ans.append(text.elem) else: