From 9119157be58569b54cd8386384ef262c3e36ce19 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 29 May 2010 12:01:25 -0400 Subject: [PATCH] Fix RTF Output: Newline characters should be turned into spaces not ignored. --- src/calibre/ebooks/rtf/rtfml.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/rtf/rtfml.py b/src/calibre/ebooks/rtf/rtfml.py index e466885c6f..d6b20402ce 100644 --- a/src/calibre/ebooks/rtf/rtfml.py +++ b/src/calibre/ebooks/rtf/rtfml.py @@ -19,6 +19,8 @@ except ImportError: import cStringIO +from lxml import etree + from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace, \ OEB_RASTER_IMAGES from calibre.ebooks.oeb.stylizer import Stylizer @@ -118,13 +120,23 @@ class RTFMLizer(object): for item in self.oeb_book.spine: self.log.debug('Converting %s to RTF markup...' % item.href) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) - output += self.dump_text(item.data.find(XHTML('body')), stylizer) + content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode)) + content = self.remove_newlines(content) + output += self.dump_text(etree.fromstring(content), stylizer) output += self.footer() output = self.insert_images(output) output = self.clean_text(output) return output + def remove_newlines(self, text): + self.log.debug('\tRemove newlines for processing...') + text = text.replace('\r\n', ' ') + text = text.replace('\n', ' ') + text = text.replace('\r', ' ') + + return text + def header(self): return u'{\\rtf1{\\info{\\title %s}{\\author %s}}\\ansi\\ansicpg1252\\deff0\\deflang1033' % (self.oeb_book.metadata.title[0].value, authors_to_string([x.value for x in self.oeb_book.metadata.creator]))