From 8eb8146effd5cb8131f6d8cd55aac3c488a3d0ba Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 2 Feb 2012 16:53:26 +0530 Subject: [PATCH] Conversion: When automatically inserting page breaks, do not put a page break before a

or

tag if it is immediately preceded by another

or

tag. --- src/calibre/ebooks/oeb/transforms/structure.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/oeb/transforms/structure.py b/src/calibre/ebooks/oeb/transforms/structure.py index f2a61ba6e1..dd3db1415a 100644 --- a/src/calibre/ebooks/oeb/transforms/structure.py +++ b/src/calibre/ebooks/oeb/transforms/structure.py @@ -12,7 +12,7 @@ from lxml import etree from urlparse import urlparse from collections import OrderedDict -from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text +from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text, barename from calibre.ebooks import ConversionError def XPath(x): @@ -59,6 +59,18 @@ class DetectStructure(object): pb_xpath = XPath(opts.page_breaks_before) for item in oeb.spine: for elem in pb_xpath(item.data): + try: + prev = elem.itersiblings(tag=etree.Element, + preceding=True).next() + if (barename(elem.tag) in {'h1', 'h2'} and barename( + prev.tag) in {'h1', 'h2'} and (not prev.tail or + not prev.tail.split())): + # We have two adjacent headings, do not put a page + # break on the second one + continue + except StopIteration: + pass + style = elem.get('style', '') if style: style += '; '