mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
added option for renumbering heading tags
This commit is contained in:
parent
1301fe69d1
commit
946f1cf6c0
@ -137,7 +137,7 @@ def add_pipeline_options(parser, plumber):
|
|||||||
'italicize_common_cases', 'fix_indents',
|
'italicize_common_cases', 'fix_indents',
|
||||||
'html_unwrap_factor', 'unwrap_lines',
|
'html_unwrap_factor', 'unwrap_lines',
|
||||||
'delete_blank_paragraphs', 'format_scene_breaks',
|
'delete_blank_paragraphs', 'format_scene_breaks',
|
||||||
'dehyphenate',
|
'dehyphenate', 'renumber_headings',
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
|
|
||||||
|
@ -533,6 +533,12 @@ OptionRecommendation(name='dehyphenate',
|
|||||||
'document itself is used as a dictionary to determine whether hyphens '
|
'document itself is used as a dictionary to determine whether hyphens '
|
||||||
'should be retained or removed.')),
|
'should be retained or removed.')),
|
||||||
|
|
||||||
|
OptionRecommendation(name='renumber_headings',
|
||||||
|
recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
help=_('Looks for occurences of sequential <h1> or <h2> tags. '
|
||||||
|
'The tags are renumbered to prevent splitting in the middle '
|
||||||
|
'of chapter headings.')),
|
||||||
|
|
||||||
OptionRecommendation(name='sr1_search',
|
OptionRecommendation(name='sr1_search',
|
||||||
recommended_value='', level=OptionRecommendation.LOW,
|
recommended_value='', level=OptionRecommendation.LOW,
|
||||||
help=_('Search pattern (regular expression) to be replaced with '
|
help=_('Search pattern (regular expression) to be replaced with '
|
||||||
|
@ -416,7 +416,7 @@ class PreProcessor(object):
|
|||||||
dehyphenator = Dehyphenator()
|
dehyphenator = Dehyphenator()
|
||||||
html = dehyphenator(html,'html_cleanup', length)
|
html = dehyphenator(html,'html_cleanup', length)
|
||||||
|
|
||||||
if getattr(self.extra_opts, 'dehyphenate', True):
|
if getattr(self.extra_opts, 'dehyphenate', False):
|
||||||
# dehyphenate in cleanup mode to fix anything previous conversions/editing missed
|
# dehyphenate in cleanup mode to fix anything previous conversions/editing missed
|
||||||
self.log("Fixing hyphenated content")
|
self.log("Fixing hyphenated content")
|
||||||
dehyphenator = Dehyphenator()
|
dehyphenator = Dehyphenator()
|
||||||
@ -429,13 +429,14 @@ class PreProcessor(object):
|
|||||||
chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([*#•]+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
|
chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([*#•]+\s*)+)(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*.?(?=[a-z#\-*\s]+<)([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE)
|
||||||
html = chapdetect3.sub(self.chapter_break, html)
|
html = chapdetect3.sub(self.chapter_break, html)
|
||||||
|
|
||||||
|
if getattr(self.extra_opts, 'renumber_headings', True):
|
||||||
# search for places where a first or second level heading is immediately followed by another
|
# search for places where a first or second level heading is immediately followed by another
|
||||||
# top level heading. demote the second heading to h3 to prevent splitting between chapter
|
# top level heading. demote the second heading to h3 to prevent splitting between chapter
|
||||||
# headings and titles, images, etc
|
# headings and titles, images, etc
|
||||||
doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
|
doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE)
|
||||||
html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)
|
html = doubleheading.sub('\g<firsthead>'+'\n<h3'+'\g<secondhead>'+'</h3>', html)
|
||||||
|
|
||||||
if getattr(self.extra_opts, 'format_scene_breaks', True):
|
if getattr(self.extra_opts, 'format_scene_breaks', False):
|
||||||
# Center separator lines
|
# Center separator lines
|
||||||
html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em">' + '\g<break>' + '</p>', html)
|
html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em">' + '\g<break>' + '</p>', html)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user