From 6b421643705fb3e575bdda1225171485ba01965a Mon Sep 17 00:00:00 2001 From: ldolse Date: Sun, 30 Jan 2011 18:11:15 +0800 Subject: [PATCH] adjusted margins for scene break heuristics --- src/calibre/ebooks/conversion/utils.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 5beefb5bd9..a115e584b6 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -26,7 +26,7 @@ class HeuristicProcessor(object): self.blanks_deleted = False self.blanks_between_paragraphs = False self.linereg = re.compile('(?<=)', re.IGNORECASE|re.DOTALL) - self.blankreg = re.compile(r'\s*(?P]*>)\s*(?P

)', re.IGNORECASE) + self.blankreg = re.compile(r'\s*(?P]*>)\s*(?P

)', re.IGNORECASE) self.anyblank = re.compile(r'\s*(?P]*>)\s*(?P

)', re.IGNORECASE) self.multi_blank = re.compile(r'(\s*]*>\s*

){2,}(?!\s*]*>\s*

){1,}(?=\s*)(\s*]*>\s*

){1,}', re.IGNORECASE) - def markup_spacers(match): + def markup_whitespaces(match): blanks = match.group(0) - blanks = self.blankreg.sub('\n

', blanks) + blanks = self.blankreg.sub('\n

', blanks) return blanks - html = blanks_before_headings.sub(markup_spacers, html) - html = blanks_after_headings.sub(markup_spacers, html) + html = blanks_before_headings.sub(markup_whitespaces, html) + html = blanks_after_headings.sub(markup_whitespaces, html) if self.html_preprocess_sections > self.min_chapters: - html = re.sub('(?si)^.*?(?=

', html) + html = self.multi_blank.sub('\n

', html) else: - html = self.blankreg.sub('\n

', html) + html = self.blankreg.sub('\n

', html) return html @@ -489,6 +489,7 @@ class HeuristicProcessor(object): if getattr(self.extra_opts, 'markup_chapter_headings', False): html = self.markup_chapters(html, self.totalwords, self.blanks_between_paragraphs) + self.dump(html, 'after_chapter_markup') if getattr(self.extra_opts, 'italicize_common_cases', False): html = self.markup_italicis(html) @@ -498,7 +499,7 @@ class HeuristicProcessor(object): if self.blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False): self.log.debug("deleting blank lines") self.blanks_deleted = True - html = self.multi_blank.sub('\n

', html) + html = self.multi_blank.sub('\n

', html) html = self.blankreg.sub('', html) # Determine line ending type @@ -553,7 +554,7 @@ class HeuristicProcessor(object): html = self.detect_blank_formatting(html) html = self.detect_soft_breaks(html) # Center separator lines - html = re.sub(u'<(?Pp|div)[^>]*>\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(?P([*#•=✦]+\s*)+)\s*()?\s*()?\s*()?\s*', '

' + '\g' + '

', html) + html = re.sub(u'<(?Pp|div)[^>]*>\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(?P([*#•=✦]+\s*)+)\s*()?\s*()?\s*()?\s*', '

' + '\g' + '

', html) #html = re.sub(']*>\s*

', '

', html) if self.deleted_nbsps: