diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 48806e78e7..96a9a4783d 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -236,7 +236,7 @@ class HeuristicProcessor(object): print unicode(self.chapters_with_title)+" chapters with titles" else: html = chapdetect.sub(self.chapter_head, html) - return html + return html recurse_patterns(html, True) html = recurse_patterns(html, False) @@ -322,7 +322,8 @@ class HeuristicProcessor(object): html = re.sub(ur'\s*\s*', ' ', html) # Delete microsoft 'smart' tags html = re.sub('(?i)', '', html) - # Get rid of empty span, bold, & italics tags + # Get rid of empty span, bold, font, & italics tags + html = re.sub(r'\s*]*>\s*\s*', '', html) html = re.sub(r"\s*]*>\s*(]*>\s*){0,2}\s*\s*", " ", html) html = re.sub(r"\s*<[ibu][^>]*>\s*(<[ibu][^>]*>\s*\s*){0,2}\s*", " ", html) html = re.sub(r"\s*]*>\s*(]>\s*){0,2}\s*\s*", " ", html) diff --git a/src/calibre/ebooks/mobi/input.py b/src/calibre/ebooks/mobi/input.py index 584be71fe4..8188027e01 100644 --- a/src/calibre/ebooks/mobi/input.py +++ b/src/calibre/ebooks/mobi/input.py @@ -39,11 +39,3 @@ class MOBIInput(InputFormatPlugin): accelerators['pagebreaks'] = '//h:div[@class="mbp_pagebreak"]' return mr.created_opf_path - def heuristics(self, options, html): - # search for places where a first or second level heading is immediately followed by another - # top level heading. demote the second heading to h3 to prevent splitting between chapter - # headings and titles, images, etc - doubleheading = re.compile(r'(?P]*>.+?\s*(<(?!h\d)[^>]*>\s*)*)[^>]*>.+?)', re.IGNORECASE) - html = doubleheading.sub('\g'+'\n'+'', html) - return html -