From 84431ab4ddf9d7cd93a8e04d1ce19a14524091a2 Mon Sep 17 00:00:00 2001 From: ldolse Date: Fri, 4 Feb 2011 20:34:07 +0800 Subject: [PATCH] ... --- src/calibre/ebooks/conversion/utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index f541701480..a9f733277a 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -379,13 +379,13 @@ class HeuristicProcessor(object): html = re.sub('(?i)', '', html) # Re-open self closing paragraph tags html = re.sub('/]*/>', '

', html) - # delete surrounding divs from empty paragraphs - html = re.sub(']*>\s*]*>\s*

\s*', '

', html) # Get rid of empty span, bold, font, em, & italics tags html = re.sub(r"\s*]*>\s*(]*>\s*){0,2}\s*\s*", " ", html) html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*\s*){0,2}\s*", " ", html) html = re.sub(r"\s*]*>\s*(]>\s*){0,2}\s*\s*", " ", html) html = re.sub(r"\s*<(font|[ibu]|em|strong)[^>]*>\s*(<(font|[ibu]|em|strong)[^>]*>\s*\s*){0,2}\s*", " ", html) + # delete surrounding divs from empty paragraphs + html = re.sub(']*>\s*]*>\s*

\s*', '

', html) # Empty heading tags html = re.sub(r'(?i)\s*', '', html) self.deleted_nbsps = True @@ -563,7 +563,6 @@ class HeuristicProcessor(object): # Determine whether the document uses interleaved blank lines self.blanks_between_paragraphs = self.analyze_blanks(html) - #self.dump(html, 'before_chapter_markup') # detect chapters/sections to match xpath or splitting logic if getattr(self.extra_opts, 'markup_chapter_headings', False): @@ -639,7 +638,6 @@ class HeuristicProcessor(object): blanks_count = len(self.any_multi_blank.findall(html)) if blanks_count >= 1: html = self.merge_blanks(html, blanks_count) - self.dump(html, 'before_after_merge_blanks') scene_break_regex = self.line_open+'(?![\w\'\"])(?P((?P((?!\s)\W))\s*(?P=break_char)?)+)\s*'+self.line_close scene_break = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE) # If the user has enabled scene break replacement, then either softbreaks