mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Dont add page breaks for chapters at the start of the file
PDF Output: Fix extra blank page being inserted at the start of the chapter when converting some epub files from feedbooks
This commit is contained in:
parent
0c5959f298
commit
5542dcfbb3
@ -10,7 +10,7 @@ import re, uuid
|
||||
|
||||
from lxml import etree
|
||||
from urlparse import urlparse
|
||||
from collections import OrderedDict
|
||||
from collections import OrderedDict, Counter
|
||||
|
||||
from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text, barename
|
||||
from calibre.ebooks import ConversionError
|
||||
@ -22,6 +22,26 @@ def XPath(x):
|
||||
raise ConversionError(
|
||||
'The syntax of the XPath expression %s is invalid.' % repr(x))
|
||||
|
||||
def isspace(x):
|
||||
return not x or x.replace(u'\xa0', u'').isspace()
|
||||
|
||||
def at_start(elem):
|
||||
' Return True if there is no content before elem '
|
||||
body = XPath('ancestor-or-self::h:body')(elem)
|
||||
if not body:
|
||||
return True
|
||||
body = body[0]
|
||||
ancestors = frozenset(XPath('ancestor::*')(elem))
|
||||
for x in body.iter():
|
||||
if x is elem:
|
||||
return True
|
||||
if getattr(x, 'tag', None) and x.tag.rpartition('}')[-1] in {'img', 'svg'}:
|
||||
return False
|
||||
if isspace(getattr(x, 'text', None)) and (x in ancestors or isspace(getattr(x, 'tail', None))):
|
||||
continue
|
||||
return False
|
||||
return False
|
||||
|
||||
class DetectStructure(object):
|
||||
|
||||
def __call__(self, oeb, opts):
|
||||
@ -51,7 +71,7 @@ class DetectStructure(object):
|
||||
regexp = re.compile(opts.toc_filter)
|
||||
for node in list(self.oeb.toc.iter()):
|
||||
if not node.title or regexp.search(node.title) is not None:
|
||||
self.log('Filtering', node.title if node.title else\
|
||||
self.log('Filtering', node.title if node.title else
|
||||
'empty node', 'from TOC')
|
||||
self.oeb.toc.remove(node)
|
||||
|
||||
@ -92,7 +112,8 @@ class DetectStructure(object):
|
||||
'Invalid start reading at XPath expression, ignoring: %s'%expr)
|
||||
return
|
||||
for item in self.oeb.spine:
|
||||
if not hasattr(item.data, 'xpath'): continue
|
||||
if not hasattr(item.data, 'xpath'):
|
||||
continue
|
||||
matches = expr(item.data)
|
||||
if matches:
|
||||
elem = matches[0]
|
||||
@ -129,17 +150,27 @@ class DetectStructure(object):
|
||||
chapter_mark = self.opts.chapter_mark
|
||||
page_break_before = 'display: block; page-break-before: always'
|
||||
page_break_after = 'display: block; page-break-after: always'
|
||||
c = Counter()
|
||||
for item, elem in self.detected_chapters:
|
||||
c[item] += 1
|
||||
text = xml2text(elem).strip()
|
||||
text = re.sub(r'\s+', ' ', text.strip())
|
||||
self.log('\tDetected chapter:', text[:50])
|
||||
if chapter_mark == 'none':
|
||||
continue
|
||||
elif chapter_mark == 'rule':
|
||||
if chapter_mark == 'rule':
|
||||
mark = etree.Element(XHTML('hr'))
|
||||
elif chapter_mark == 'pagebreak':
|
||||
if c[item] < 3 and at_start(elem):
|
||||
# For the first two elements in this item, check if they
|
||||
# are at the start of the file, in which case inserting a
|
||||
# page break in unnecessary and can lead to extra blank
|
||||
# pages in the PDF Output plugin. We need to use two as
|
||||
# feedbooks epubs match both a heading tag and its
|
||||
# containing div with the default chapter expression.
|
||||
continue
|
||||
mark = etree.Element(XHTML('div'), style=page_break_after)
|
||||
else: # chapter_mark == 'both':
|
||||
else: # chapter_mark == 'both':
|
||||
mark = etree.Element(XHTML('hr'), style=page_break_before)
|
||||
try:
|
||||
elem.addprevious(mark)
|
||||
@ -182,8 +213,6 @@ class DetectStructure(object):
|
||||
self.log('Maximum TOC links reached, stopping.')
|
||||
return
|
||||
|
||||
|
||||
|
||||
def elem_to_link(self, item, elem, counter):
|
||||
text = xml2text(elem).strip()
|
||||
if not text:
|
||||
@ -197,7 +226,6 @@ class DetectStructure(object):
|
||||
href = '#'.join((item.href, id))
|
||||
return text, href
|
||||
|
||||
|
||||
def add_leveled_toc_items(self):
|
||||
added = OrderedDict()
|
||||
added2 = OrderedDict()
|
||||
@ -223,7 +251,7 @@ class DetectStructure(object):
|
||||
node = self.oeb.toc.add(text, _href,
|
||||
play_order=self.oeb.toc.next_play_order())
|
||||
added[elem] = node
|
||||
#node.add(_('Top'), _href)
|
||||
# node.add(_('Top'), _href)
|
||||
|
||||
if self.opts.level2_toc is not None and added:
|
||||
for elem in find_matches(self.opts.level2_toc, document.data):
|
||||
@ -263,3 +291,4 @@ class DetectStructure(object):
|
||||
play_order=self.oeb.toc.next_play_order())
|
||||
break
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user