diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py
index db1ec0857d..c9612d97b9 100644
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@@ -137,7 +137,7 @@ def add_pipeline_options(parser, plumber):
'italicize_common_cases', 'fix_indents',
'html_unwrap_factor', 'unwrap_lines',
'delete_blank_paragraphs', 'format_scene_breaks',
- 'dehyphenate',
+ 'dehyphenate', 'renumber_headings',
]
),
diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index 48b965f624..b8c45dfa14 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -532,7 +532,13 @@ OptionRecommendation(name='dehyphenate',
help=_('Analyses hyphenated words throughout the document. The '
'document itself is used as a dictionary to determine whether hyphens '
'should be retained or removed.')),
-
+
+OptionRecommendation(name='renumber_headings',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('Looks for occurences of sequential
or tags. '
+ 'The tags are renumbered to prevent splitting in the middle '
+ 'of chapter headings.')),
+
OptionRecommendation(name='sr1_search',
recommended_value='', level=OptionRecommendation.LOW,
help=_('Search pattern (regular expression) to be replaced with '
diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py
index 2a88d371cc..4c62d2c06f 100644
--- a/src/calibre/ebooks/conversion/utils.py
+++ b/src/calibre/ebooks/conversion/utils.py
@@ -416,7 +416,7 @@ class PreProcessor(object):
dehyphenator = Dehyphenator()
html = dehyphenator(html,'html_cleanup', length)
- if getattr(self.extra_opts, 'dehyphenate', True):
+ if getattr(self.extra_opts, 'dehyphenate', False):
# dehyphenate in cleanup mode to fix anything previous conversions/editing missed
self.log("Fixing hyphenated content")
dehyphenator = Dehyphenator()
@@ -429,13 +429,14 @@ class PreProcessor(object):
chapdetect3 = re.compile(r'<(?P(p|div)[^>]*)>\s*(?P(]*>)?\s*(?!([*#•]+\s*)+)(<[ibu][^>]*>){0,2}\s*(]*>)?\s*(<[ibu][^>]*>){0,2}\s*(]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*()?([ibu]>){0,2}\s*()?\s*([ibu]>){0,2}\s*()?\s*(p|div)>)', re.IGNORECASE)
html = chapdetect3.sub(self.chapter_break, html)
- # search for places where a first or second level heading is immediately followed by another
- # top level heading. demote the second heading to h3 to prevent splitting between chapter
- # headings and titles, images, etc
- doubleheading = re.compile(r'(?P]*>.+?\s*(<(?!h\d)[^>]*>\s*)*)[^>]*>.+?)', re.IGNORECASE)
- html = doubleheading.sub('\g'+'\n'+'
', html)
+ if getattr(self.extra_opts, 'renumber_headings', True):
+ # search for places where a first or second level heading is immediately followed by another
+ # top level heading. demote the second heading to h3 to prevent splitting between chapter
+ # headings and titles, images, etc
+ doubleheading = re.compile(r'(?P]*>.+?\s*(<(?!h\d)[^>]*>\s*)*)[^>]*>.+?)', re.IGNORECASE)
+ html = doubleheading.sub('\g'+'\n'+'
', html)
- if getattr(self.extra_opts, 'format_scene_breaks', True):
+ if getattr(self.extra_opts, 'format_scene_breaks', False):
# Center separator lines
html = re.sub(u'<(?Pp|div)[^>]*>\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(<(?Pfont|span|[ibu])[^>]*>)?\s*(?P([*#•]+\s*)+)\s*((?P=inner3)>)?\s*((?P=inner2)>)?\s*((?P=inner1)>)?\s*(?P=outer)>', '' + '\g' + '
', html)