mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
added option for renumbering heading tags
This commit is contained in:
parent
1301fe69d1
commit
946f1cf6c0
@ -137,7 +137,7 @@ def add_pipeline_options(parser, plumber):
|
||||
'italicize_common_cases', 'fix_indents',
|
||||
'html_unwrap_factor', 'unwrap_lines',
|
||||
'delete_blank_paragraphs', 'format_scene_breaks',
|
||||
'dehyphenate',
|
||||
'dehyphenate', 'renumber_headings',
|
||||
]
|
||||
),
|
||||
|
||||
|
@ -532,7 +532,13 @@ OptionRecommendation(name='dehyphenate',
|
||||
help=_('Analyses hyphenated words throughout the document. The '
|
||||
'document itself is used as a dictionary to determine whether hyphens '
|
||||
'should be retained or removed.')),
|
||||
|
||||
|
||||
OptionRecommendation(name='renumber_headings',
|
||||
recommended_value=False, level=OptionRecommendation.LOW,
|
||||
help=_('Looks for occurences of sequential <h1> or <h2> tags. '
|
||||
'The tags are renumbered to prevent splitting in the middle '
|
||||
'of chapter headings.')),
|
||||
|
||||
OptionRecommendation(name='sr1_search',
|
||||
recommended_value='', level=OptionRecommendation.LOW,
|
||||
help=_('Search pattern (regular expression) to be replaced with '
|
||||
|
@ -416,7 +416,7 @@ class PreProcessor(object):
|
||||
dehyphenator = Dehyphenator()
|
||||
html = dehyphenator(html,'html_cleanup', length)
|
||||
|
||||
if getattr(self.extra_opts, 'dehyphenate', True):
|
||||
if getattr(self.extra_opts, 'dehyphenate', False):
|
||||
# dehyphenate in cleanup mode to fix anything previous conversions/editing missed
|
||||
self.log("Fixing hyphenated content")
|
||||
dehyphenator = Dehyphenator()
|
||||
@ -429,13 +429,14 @@ class PreProcessor(object):
|
||||
chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([*#•]+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
|
||||
html = chapdetect3.sub(self.chapter_break, html)
|
||||
|
||||
# search for places where a first or second level heading is immediately followed by another
|
||||
# top level heading. demote the second heading to h3 to prevent splitting between chapter
|
||||
# headings and titles, images, etc
|
||||
doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
|
||||
html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)
|
||||
if getattr(self.extra_opts, 'renumber_headings', True):
|
||||
# search for places where a first or second level heading is immediately followed by another
|
||||
# top level heading. demote the second heading to h3 to prevent splitting between chapter
|
||||
# headings and titles, images, etc
|
||||
doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
|
||||
html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)
|
||||
|
||||
if getattr(self.extra_opts, 'format_scene_breaks', True):
|
||||
if getattr(self.extra_opts, 'format_scene_breaks', False):
|
||||
# Center separator lines
|
||||
html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em">' + '\g<break>' + '</p>', html)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user