Conversion: When automatically inserting page breaks, do not put a page break before a <h1> or <h2> tag if it is immediately preceded by another <h1> or <h2> tag.

2025-07-09 03:04:10 -04:00 · 2012-02-02 16:53:26 +05:30 · 2012-02-02 16:53:26 +05:30 · 8eb8146eff
commit 8eb8146eff
parent cd85d375f5
1 changed files with 13 additions and 1 deletions
--- a/src/calibre/ebooks/oeb/transforms/structure.py
+++ b/src/calibre/ebooks/oeb/transforms/structure.py
@ -12,7 +12,7 @@ from lxml import etree
 from urlparse import urlparse
 from collections import OrderedDict
-from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text
+from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text, barename
 from calibre.ebooks import ConversionError
 def XPath(x):
@ -59,6 +59,18 @@ class DetectStructure(object):
            pb_xpath = XPath(opts.page_breaks_before)
            for item in oeb.spine:
                for elem in pb_xpath(item.data):
                    try:
                        prev = elem.itersiblings(tag=etree.Element,
                                preceding=True).next()
                        if (barename(elem.tag) in {'h1', 'h2'} and barename(
                                prev.tag) in {'h1', 'h2'} and (not prev.tail or
                                    not prev.tail.split())):
                            # We have two adjacent headings, do not put a page
                            # break on the second one
                            continue
                    except StopIteration:
                        pass
                    style = elem.get('style', '')
                    if style:
                        style += '; '