From b73e1b3da50810e151d10a5d62251754a077e605 Mon Sep 17 00:00:00 2001
From: ldolse <ldolse@yahoo.com>
Date: Tue, 14 Sep 2010 02:56:56 +1000
Subject: [PATCH] tweaked preprocess for $, added rtf to new preprocess logic,
 changed last pdf default

---
 src/calibre/ebooks/conversion/preprocess.py |  2 +-
 src/calibre/ebooks/rtf/input.py             | 13 +++----------
 src/calibre/gui2/convert/pdf_input.ui       |  2 +-
 3 files changed, 5 insertions(+), 12 deletions(-)
diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index f6277956c8..9464be1210 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -340,7 +340,7 @@ class HTMLPreProcessor(object):
                 # print "The pdf line length returned is " + str(length)
                 end_rules.append(
                     # Un wrap using punctuation
-                    (re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines),
+                    (re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines),
                 )
 
         for rule in self.PREPROCESS + start_rules:
diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py
index 216ccf591d..d229b80c16 100644
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@@ -8,6 +8,7 @@ from lxml import etree
 
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.conversion.preprocess import line_length
+from calibre.ebooks.conversion.utils import PreProcessor
 
 class InlineClass(etree.XSLTExtension):
 
@@ -229,16 +230,8 @@ class RTFInput(InputFormatPlugin):
             res = transform.tostring(result)
             res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
             if self.options.preprocess_html:
-                self.log("*********  Preprocessing HTML  *********")
-                # Detect Chapters to match the xpath in the GUI
-                chapdetect = re.compile(r'<p[^>]*>\s*<span[^>]*>\s*(?P<chap>(<(i|b)>(<(i|b)>)?)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)>(<(/i|b)>)?)?)\s*</span>\s*</p>', re.IGNORECASE)
-                res = chapdetect.sub('<h2>'+'\g<chap>'+'</h2>\n', res)
-                # Unwrap lines using punctation if the median length of all lines is less than 150
-                length = line_length('html', res, 0.4)
-                self.log("*** Median length is " + str(length) + " ***")
-                unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</span>\s*</p>\s*(?P<up2threeblanks><p[^>]*>\s*(<span[^>]*>\s*</span>\s*)</p>\s*){0,3}\s*<p[^>]*>\s*<span[^>]*>\s*" % length, re.UNICODE)
-                if length < 150:
-                    res = unwrap.sub(' ', res)
+                preprocessor = PreProcessor(res)
+                res = preprocessor(res)
             f.write(res)
         self.write_inline_css(inline_class)
         stream.seek(0)
diff --git a/src/calibre/gui2/convert/pdf_input.ui b/src/calibre/gui2/convert/pdf_input.ui
index 626c68ea63..b2ee421922 100644
--- a/src/calibre/gui2/convert/pdf_input.ui
+++ b/src/calibre/gui2/convert/pdf_input.ui
@@ -46,7 +46,7 @@
       <double>0.010000000000000</double>
      </property>
      <property name="value">
-      <double>0.500000000000000</double>
+      <double>0.450000000000000</double>
      </property>
     </widget>
    </item>