From b37e9326685c1cefa4c06bee370988bfce8f495d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 20 May 2014 22:19:25 +0530 Subject: [PATCH] DOCX Input: Workaround buggy version of Microsoft Word converting newlines in the document summary into _x000d_. They are now ignored when reading metadata from docx files. Fixes #1321343 [DOCX input known characters metadata](https://bugs.launchpad.net/calibre/+bug/1321343) --- src/calibre/ebooks/docx/container.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/calibre/ebooks/docx/container.py b/src/calibre/ebooks/docx/container.py index deaf5bd4d0..835e4c4c0a 100644 --- a/src/calibre/ebooks/docx/container.py +++ b/src/calibre/ebooks/docx/container.py @@ -54,6 +54,7 @@ def read_doc_props(raw, mi): desc = XPath('//dc:description')(root) if desc: raw = etree.tostring(desc[0], method='text', encoding=unicode) + raw = raw.replace('_x000d_', '') # Word 2007 mangles newlines in the summary mi.comments = raw langs = []