diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py
index db1ec0857d..c9612d97b9 100644
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@@ -137,7 +137,7 @@ def add_pipeline_options(parser, plumber):
'italicize_common_cases', 'fix_indents',
'html_unwrap_factor', 'unwrap_lines',
'delete_blank_paragraphs', 'format_scene_breaks',
- 'dehyphenate',
+ 'dehyphenate', 'renumber_headings',
]
),
diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 48b965f624..b8c45dfa14 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -532,7 +532,13 @@ OptionRecommendation(name='dehyphenate',
help=_('Analyses hyphenated words throughout the document. The '
'document itself is used as a dictionary to determine whether hyphens '
'should be retained or removed.')),
-
+
+OptionRecommendation(name='renumber_headings',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Looks for occurences of sequential
or tags. '
+ 'The tags are renumbered to prevent splitting in the middle '
+ 'of chapter headings.')),
+
OptionRecommendation(name='sr1_search',
recommended_value='', level=OptionRecommendation.LOW,
help=_('Search pattern (regular expression) to be replaced with '
diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 56d4339d8c..305346d496 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -272,9 +272,11 @@ class PreProcessor(object):
unwrap = re.compile(u"%s" % unwrap_regex, re.UNICODE)
em_en_unwrap = re.compile(u"%s" % em_en_unwrap_regex, re.UNICODE)
+ shy_unwrap = re.compile(u"%s" % shy_unwrap_regex, re.UNICODE)
content = unwrap.sub(' ', content)
content = em_en_unwrap.sub('', content)
+ content = shy_unwrap.sub('', content)
return content
def txt_process(self, match):
@@ -461,11 +463,12 @@ class PreProcessor(object):
chapdetect3 = re.compile(r'<(?P(p|div)[^>]*)>\s*(?P(]*>)?\s*(?!([*#•]+\s*)+)(<[ibu][^>]*>){0,2}\s*(]*>)?\s*(<[ibu][^>]*>){0,2}\s*(]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*()?([ibu]>){0,2}\s*()?\s*([ibu]>){0,2}\s*()?\s*(p|div)>)', re.IGNORECASE)
html = chapdetect3.sub(self.chapter_break, html)
- # search for places where a first or second level heading is immediately followed by another
- # top level heading. demote the second heading to h3 to prevent splitting between chapter
- # headings and titles, images, etc
- doubleheading = re.compile(r'(?P]*>.+?\s*(<(?!h\d)[^>]*>\s*)*)[^>]*>.+?)', re.IGNORECASE)
- html = doubleheading.sub('\g'+'\n'+'
', html)
+ if getattr(self.extra_opts, 'renumber_headings', False):
+ # search for places where a first or second level heading is immediately followed by another
+ # top level heading. demote the second heading to h3 to prevent splitting between chapter
+ # headings and titles, images, etc
+ doubleheading = re.compile(r'(?P]*>.+?\s*(<(?!h\d)[^>]*>\s*)*)[^>]*>.+?)', re.IGNORECASE)
+ html = doubleheading.sub('\g'+'\n'+'
', html)
if getattr(self.extra_opts, 'format_scene_breaks', False):
# Center separator lines