From 03f70c156c7d557d61db1e348f11bb2a997d90e1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 10 Jan 2011 10:44:09 -0700
Subject: [PATCH] RTF Input: Fix regression that broke the Preprocess HTML
 option

---
 src/calibre/ebooks/conversion/utils.py | 6 +++---
 src/calibre/ebooks/rtf/input.py        | 2 +-
 src/calibre/ebooks/txt/input.py        | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 52d1bcc619..dac93fa2e2 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -191,15 +191,15 @@ class PreProcessor(object):
         blanklines = "\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*"
         line_opening = "<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*"
         txt_line_wrap = u"((\u0020|\u0009)*\n){1,4}"
-        
+
         unwrap_regex = lookahead+line_ending+blanklines+line_opening
         if format == 'txt':
             unwrap_regex = lookahead+txt_line_wrap
-        
+
         unwrap = re.compile(u"%s" % unwrap_regex, re.UNICODE)
         content = unwrap.sub(' ', content)
         return content
-       
+
 
     def __call__(self, html):
         self.log("*********  Preprocessing HTML  *********")
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 8c7561f68c..5154373eda 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -296,7 +296,7 @@ class RTFInput(InputFormatPlugin):
                         u'<p>\u00a0</p>\n'.encode('utf-8'), res)
             if self.opts.preprocess_html:
                 preprocessor = PreProcessor(self.opts, log=getattr(self, 'log', None))
-                res = preprocessor(res)
+                res = preprocessor(res.decode('utf-8')).encode('utf-8')
             f.write(res)
         self.write_inline_css(inline_class, border_styles)
         stream.seek(0)
diff --git a/src/calibre/ebooks/txt/input.py b/src/calibre/ebooks/txt/input.py
index 3957391494..aaff8b55c0 100644
--- a/src/calibre/ebooks/txt/input.py
+++ b/src/calibre/ebooks/txt/input.py
@@ -53,7 +53,7 @@ class TXTInput(InputFormatPlugin):
     def convert(self, stream, options, file_ext, log,
                 accelerators):
         log.debug('Reading text from file...')
-        
+
         txt = stream.read()
         # Get the encoding of the document.
         if options.input_encoding:
@@ -80,7 +80,7 @@ class TXTInput(InputFormatPlugin):
         # Get length for hyphen removal and punctuation unwrap
         docanalysis = DocAnalysis('txt', txt)
         length = docanalysis.line_length(.5)
-            
+
         if options.formatting_type == 'auto':
             options.formatting_type = detect_formatting_type(txt)
 
@@ -122,7 +122,7 @@ class TXTInput(InputFormatPlugin):
                 txt = preprocessor.punctuation_unwrap(length, txt, 'txt')
 
             flow_size = getattr(options, 'flow_size', 0)
-            
+
             if options.formatting_type == 'heuristic':
                 html = convert_heuristic(txt, epub_split_size_kb=flow_size)
             else: