From 6b421643705fb3e575bdda1225171485ba01965a Mon Sep 17 00:00:00 2001
From: ldolse
Date: Sun, 30 Jan 2011 18:11:15 +0800
Subject: [PATCH] adjusted margins for scene break heuristics
---
src/calibre/ebooks/conversion/utils.py | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 5beefb5bd9..a115e584b6 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -26,7 +26,7 @@ class HeuristicProcessor(object):
self.blanks_deleted = False
self.blanks_between_paragraphs = False
self.linereg = re.compile('(?<=)', re.IGNORECASE|re.DOTALL)
- self.blankreg = re.compile(r'\s*(?P]*>)\s*(?P
)', re.IGNORECASE)
+ self.blankreg = re.compile(r'\s*(?P]*>)\s*(?P
)', re.IGNORECASE)
self.anyblank = re.compile(r'\s*(?P]*>)\s*(?P
)', re.IGNORECASE)
self.multi_blank = re.compile(r'(\s*]*>\s*
){2,}(?!\s*]*>\s*
){1,}(?=\s*)(\s*]*>\s*
){1,}', re.IGNORECASE)
- def markup_spacers(match):
+ def markup_whitespaces(match):
blanks = match.group(0)
- blanks = self.blankreg.sub('\n
', blanks)
+ blanks = self.blankreg.sub('\n
', blanks)
return blanks
- html = blanks_before_headings.sub(markup_spacers, html)
- html = blanks_after_headings.sub(markup_spacers, html)
+ html = blanks_before_headings.sub(markup_whitespaces, html)
+ html = blanks_after_headings.sub(markup_whitespaces, html)
if self.html_preprocess_sections > self.min_chapters:
- html = re.sub('(?si)^.*?(?=
', html)
+ html = self.multi_blank.sub('\n
', html)
else:
- html = self.blankreg.sub('\n
', html)
+ html = self.blankreg.sub('\n
', html)
return html
@@ -489,6 +489,7 @@ class HeuristicProcessor(object):
if getattr(self.extra_opts, 'markup_chapter_headings', False):
html = self.markup_chapters(html, self.totalwords, self.blanks_between_paragraphs)
+ self.dump(html, 'after_chapter_markup')
if getattr(self.extra_opts, 'italicize_common_cases', False):
html = self.markup_italicis(html)
@@ -498,7 +499,7 @@ class HeuristicProcessor(object):
if self.blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
self.log.debug("deleting blank lines")
self.blanks_deleted = True
- html = self.multi_blank.sub('\n
', html)
+ html = self.multi_blank.sub('\n
', html)
html = self.blankreg.sub('', html)
# Determine line ending type
@@ -553,7 +554,7 @@ class HeuristicProcessor(object):
html = self.detect_blank_formatting(html)
html = self.detect_soft_breaks(html)
# Center separator lines
- html = re.sub(u'<(?Pp|div)[^>]*>\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(?P([*#•=✦]+\s*)+)\s*((?P=inner3)>)?\s*((?P=inner2)>)?\s*((?P=inner1)>)?\s*(?P=outer)>', '' + '\g' + '
', html)
+ html = re.sub(u'<(?Pp|div)[^>]*>\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(?P([*#•=✦]+\s*)+)\s*((?P=inner3)>)?\s*((?P=inner2)>)?\s*((?P=inner1)>)?\s*(?P=outer)>', '' + '\g' + '
', html)
#html = re.sub(']*>\s*
', '
', html)
if self.deleted_nbsps: