Conversion: When automatically inserting page breaks, do not put a page break before a <h1> or <h2> tag if it is immediately preceded by another <h1> or <h2> tag.

This commit is contained in:
Kovid Goyal 2012-02-02 16:53:26 +05:30
parent cd85d375f5
commit 8eb8146eff

View File

@ -12,7 +12,7 @@ from lxml import etree
from urlparse import urlparse from urlparse import urlparse
from collections import OrderedDict from collections import OrderedDict
from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text, barename
from calibre.ebooks import ConversionError from calibre.ebooks import ConversionError
def XPath(x): def XPath(x):
@ -59,6 +59,18 @@ class DetectStructure(object):
pb_xpath = XPath(opts.page_breaks_before) pb_xpath = XPath(opts.page_breaks_before)
for item in oeb.spine: for item in oeb.spine:
for elem in pb_xpath(item.data): for elem in pb_xpath(item.data):
try:
prev = elem.itersiblings(tag=etree.Element,
preceding=True).next()
if (barename(elem.tag) in {'h1', 'h2'} and barename(
prev.tag) in {'h1', 'h2'} and (not prev.tail or
not prev.tail.split())):
# We have two adjacent headings, do not put a page
# break on the second one
continue
except StopIteration:
pass
style = elem.get('style', '') style = elem.get('style', '')
if style: if style:
style += '; ' style += '; '