From b9c6f154c0d2c2fbe249d2e2701043214deeb5fc Mon Sep 17 00:00:00 2001 From: Sengian Date: Sun, 16 Oct 2011 13:55:54 +0200 Subject: [PATCH] RTF: Improve empty paragraphs handling & clean html file --- resources/templates/rtf.xsl | 24 ++++++++++++++++-------- src/calibre/ebooks/rtf/input.py | 10 ++++++---- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/resources/templates/rtf.xsl b/resources/templates/rtf.xsl index 7d48418776..61474701dc 100644 --- a/resources/templates/rtf.xsl +++ b/resources/templates/rtf.xsl @@ -1,7 +1,7 @@ - - - - - + + + + + + +   + + + @@ -149,7 +154,7 @@ - unamed + unnamed @@ -445,7 +450,10 @@ - # + + # + + diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py index c1e649851b..5858824434 100644 --- a/src/calibre/ebooks/rtf/input.py +++ b/src/calibre/ebooks/rtf/input.py @@ -305,11 +305,13 @@ class RTFInput(InputFormatPlugin): html = 'index.xhtml' with open(html, 'wb') as f: res = transform.tostring(result) - res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] + # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] + #clean multiple \n + res = re.sub('\n+', '\n', res) # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines - res = re.sub('\s*', '', res) - res = re.sub('(?<=\n)\n{2}', - u'

\u00a0

\n'.encode('utf-8'), res) + # res = re.sub('\s*', '', res) + # res = re.sub('(?<=\n)\n{2}', + # u'

\u00a0

\n'.encode('utf-8'), res) f.write(res) self.write_inline_css(inline_class, border_styles) stream.seek(0)