From 2a40afbd8e819e8fee0261e1f35ba54af235be8d Mon Sep 17 00:00:00 2001 From: ldolse Date: Fri, 19 Nov 2010 12:54:25 +0800 Subject: [PATCH] blanklines are preserved in rtf2xml, then converted to empty html paragraphs to preserver softbreaks --- src/calibre/ebooks/rtf/input.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py index 078b30627f..d7619d471a 100644 --- a/src/calibre/ebooks/rtf/input.py +++ b/src/calibre/ebooks/rtf/input.py @@ -84,7 +84,7 @@ class RTFInput(InputFormatPlugin): group_borders = 1, # Write or do not write paragraphs. Default is 0. - empty_paragraphs = 0, + empty_paragraphs = 1, ) parser.parse_rtf() ans = open('out.xml').read() @@ -228,6 +228,10 @@ class RTFInput(InputFormatPlugin): with open(html, 'wb') as f: res = transform.tostring(result) res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] + # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines + if not getattr(self.options, 'remove_paragraph_spacing', False): + res = re.sub('\s*', '', res) + res = re.sub('\n{4}', u'\n

\u00a0

\n', res) if self.options.preprocess_html: preprocessor = PreProcessor(self.options, log=getattr(self, 'log', None)) res = preprocessor(res)