diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 305346d496..9825585cbf 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -236,7 +236,7 @@ class PreProcessor(object):
print unicode(self.chapters_with_title)+" chapters with titles"
else:
html = chapdetect.sub(self.chapter_head, html)
- return html
+ return html
recurse_patterns(html, True)
html = recurse_patterns(html, False)
@@ -322,7 +322,8 @@ class PreProcessor(object):
html = re.sub(ur'\s*\s*', ' ', html)
# Delete microsoft 'smart' tags
html = re.sub('(?i)?st1:\w+>', '', html)
- # Get rid of empty span, bold, & italics tags
+ # Get rid of empty span, bold, font, & italics tags
+ html = re.sub(r'\s*]*>\s*\s*', '', html)
html = re.sub(r"\s*]*>\s*(]*>\s*){0,2}\s*\s*", " ", html)
html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*[ibu]>\s*){0,2}\s*[ibu]>", " ", html)
html = re.sub(r"\s*]*>\s*(]>\s*){0,2}\s*\s*", " ", html)
diff --git a/src/calibre/ebooks/mobi/input.py b/src/calibre/ebooks/mobi/input.py
index 584be71fe4..4f3a087065 100644
--- a/src/calibre/ebooks/mobi/input.py
+++ b/src/calibre/ebooks/mobi/input.py
@@ -5,6 +5,7 @@ __docformat__ = 'restructuredtext en'
import re
from calibre.customize.conversion import InputFormatPlugin
+from calibre.ebooks.conversion.utils import PreProcessor
class MOBIInput(InputFormatPlugin):
@@ -40,10 +41,6 @@ class MOBIInput(InputFormatPlugin):
return mr.created_opf_path
def heuristics(self, options, html):
- # search for places where a first or second level heading is immediately followed by another
- # top level heading. demote the second heading to h3 to prevent splitting between chapter
- # headings and titles, images, etc
- doubleheading = re.compile(r'(?P]*>.+?\s*(<(?!h\d)[^>]*>\s*)*)[^>]*>.+?)', re.IGNORECASE)
- html = doubleheading.sub('\g'+'\n'+'
', html)
- return html
-
+ self.options = options
+ preprocessor = PreProcessor(self.options, log=getattr(self, 'log', None))
+ return preprocessor(html)