Conversion: When automatically inserting page breaks, do not put a page break before a <h1> or <h2> tag if it is immediately preceded by another <h1> or <h2> tag.

2026-01-02 10:10:19 -05:00 · 2012-02-02 16:53:26 +05:30 · 2012-02-02 16:53:26 +05:30 · 8eb8146eff
commit 8eb8146eff
parent cd85d375f5
1 changed files with 13 additions and 1 deletions
--- a/src/calibre/ebooks/oeb/transforms/structure.py
+++ b/src/calibre/ebooks/oeb/transforms/structure.py
@ -12,7 +12,7 @@ from lxml import etree
 from urlparse import urlparse
 from collections import OrderedDict

-from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text
+from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text, barename
 from calibre.ebooks import ConversionError

 def XPath(x):
@ -59,6 +59,18 @@ class DetectStructure(object):
            pb_xpath = XPath(opts.page_breaks_before)
            for item in oeb.spine:
                for elem in pb_xpath(item.data):
+                    try:
+                        prev = elem.itersiblings(tag=etree.Element,
+                                preceding=True).next()
+                        if (barename(elem.tag) in {'h1', 'h2'} and barename(
+                                prev.tag) in {'h1', 'h2'} and (not prev.tail or
+                                    not prev.tail.split())):
+                            # We have two adjacent headings, do not put a page
+                            # break on the second one
+                            continue
+                    except StopIteration:
+                        pass
+
                    style = elem.get('style', '')
                    if style:
                        style += '; '