diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index 4a2d56d957..960dbf0242 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -75,7 +75,7 @@ class DocAnalysis(object):
         if format == 'html':
             linere = re.compile('(?<=<p)(?![^>]*>\s*</p>).*?(?=</p>)', re.DOTALL)
         elif format == 'pdf':
-            linere = re.compile('(?<=<br>).*?(?=<br>)', re.DOTALL)
+            linere = re.compile('(?<=<br>)(?!\s*<br>).*?(?=<br>)', re.DOTALL)
         elif format == 'spanned_html':
             linere = re.compile('(?<=<span).*?(?=</span>)', re.DOTALL)
         self.lines = linere.findall(raw)
@@ -191,18 +191,21 @@ class Dehyphenator(object):
         lookupword = self.removesuffixes.sub('', dehyphenated)
         if self.prefixes.match(firsthalf) is None:
            lookupword = self.removeprefix.sub('', lookupword)
-        #print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
-        booklookup = re.compile(u'%s' % lookupword, re.IGNORECASE)
+        print "lookup word is: "+str(lookupword)+", orig is: " + str(hyphenated)
+        try:
+            searchresult = self.html.find(str.lower(lookupword))
+        except:
+            return hyphenated                
         if self.format == 'html_cleanup':
-           if self.html.find(lookupword) != -1 or self.html.find(str.lower(lookupword)) != -1:
-               #print "Cleanup:returned dehyphenated word: " + str(dehyphenated)
-               return dehyphenated
-           elif self.html.find(hyphenated) != -1:
-               #print "Cleanup:returned hyphenated word: " + str(hyphenated)
-               return hyphenated
-           else:
-               #print "Cleanup:returning original text "+str(firsthalf)+" + linefeed "+str(secondhalf)
-               return firsthalf+u'\u2014'+wraptags+secondhalf
+            if self.html.find(lookupword) != -1 or self.html.find(str.lower(lookupword)) != -1:
+                #print "Cleanup:returned dehyphenated word: " + str(dehyphenated)
+                return dehyphenated
+            elif self.html.find(hyphenated) != -1:
+                #print "Cleanup:returned hyphenated word: " + str(hyphenated)
+                return hyphenated
+            else:
+                #print "Cleanup:returning original text "+str(firsthalf)+" + linefeed "+str(secondhalf)
+                return firsthalf+u'\u2014'+wraptags+secondhalf
                
         else:
             if self.html.find(lookupword) != -1 or self.html.find(str.lower(lookupword)) != -1:
diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 96df37f631..b6969a3659 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -145,7 +145,7 @@ class PreProcessor(object):
         #
         # Build the Regular Expressions in pieces
         lookahead = "(?=<(p|div))"
-        chapter_line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>span|[ibu])[^>]*>)?\s*(<(?P<inner2>span|[ibu])[^>]*>)?\s*(<(?P<inner3>span|[ibu])[^>]*>)?\s*"
+        chapter_line_open = "<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*"
         chapter_header_open = r"(?P<chap>"
         chapter_header_close = ")\s*"
         chapter_line_close = "(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)\s[^>]*>)?\s*</(?P=outer)>\s*"
@@ -154,7 +154,7 @@ class PreProcessor(object):
         else:
             blank_lines = ""
         opt_title_open = "("
-        title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>span|[ibu])[^>]*>)?\s*(<(?P<inner5>span|[ibu])[^>]*>)?\s*(<(?P<inner6>span|[ibu])[^>]*>)?\s*"
+        title_line_open = "<(?P<outer2>p|div)[^>]*>\s*(<(?P<inner4>font|span|[ibu])[^>]*>)?\s*(<(?P<inner5>font|span|[ibu])[^>]*>)?\s*(<(?P<inner6>font|span|[ibu])[^>]*>)?\s*"
         title_header_open = "(?P<title>"
         title_header_close = ")\s*"
         title_line_close = "(</(?P=inner6)>)?\s*(</(?P=inner5)>)?\s*(</(?P=inner4)\s[^>]*>)?\s*</(?P=outer2)>"