mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
EPUB/AZW3 Output: Fix incorrect splitting of html at page-break-after page breaks in certain circumstances (The split element being the first child of a parent that contains other split elements). Fixes #1139317 (Incorrect page break for Haodoo's PDB/uPDB to Epub conversion)
This commit is contained in:
parent
bde2a22e35
commit
70c75df551
@ -10,6 +10,7 @@ assumes a prior call to the flatcss transform.
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import os, math, functools, collections, re, copy
|
import os, math, functools, collections, re, copy
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
from lxml.etree import XPath as _XPath
|
from lxml.etree import XPath as _XPath
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
@ -106,8 +107,7 @@ class Split(object):
|
|||||||
continue
|
continue
|
||||||
for elem in selector(body[0]):
|
for elem in selector(body[0]):
|
||||||
if elem not in body:
|
if elem not in body:
|
||||||
if before:
|
elem.set('pb_before', '1' if before else '0')
|
||||||
elem.set('pb_before', '1')
|
|
||||||
page_breaks.add(elem)
|
page_breaks.add(elem)
|
||||||
|
|
||||||
for i, elem in enumerate(item.data.iter()):
|
for i, elem in enumerate(item.data.iter()):
|
||||||
@ -134,14 +134,12 @@ class Split(object):
|
|||||||
id = 'calibre_pb_%d'%i
|
id = 'calibre_pb_%d'%i
|
||||||
x.set('id', id)
|
x.set('id', id)
|
||||||
xp = XPath('//*[@id=%r]'%id)
|
xp = XPath('//*[@id=%r]'%id)
|
||||||
page_breaks_.append((xp,
|
page_breaks_.append((xp, x.get('pb_before', '0') == '1'))
|
||||||
x.get('pb_before', False)))
|
|
||||||
page_break_ids.append(id)
|
page_break_ids.append(id)
|
||||||
|
|
||||||
for elem in item.data.iter():
|
for elem in item.data.iter():
|
||||||
elem.attrib.pop('pb_order', False)
|
elem.attrib.pop('pb_order', False)
|
||||||
if elem.get('pb_before', False):
|
elem.attrib.pop('pb_before', False)
|
||||||
elem.attrib.pop('pb_before')
|
|
||||||
|
|
||||||
return page_breaks_, page_break_ids
|
return page_breaks_, page_break_ids
|
||||||
|
|
||||||
@ -223,22 +221,23 @@ class FlowSplitter(object):
|
|||||||
self.commit()
|
self.commit()
|
||||||
|
|
||||||
def split_on_page_breaks(self, orig_tree):
|
def split_on_page_breaks(self, orig_tree):
|
||||||
ordered_ids = []
|
ordered_ids = OrderedDict()
|
||||||
for elem in orig_tree.xpath('//*[@id]'):
|
all_page_break_ids = frozenset(self.page_break_ids)
|
||||||
id = elem.get('id')
|
for elem_id in orig_tree.xpath('//*/@id'):
|
||||||
if id in self.page_break_ids:
|
if elem_id in all_page_break_ids:
|
||||||
ordered_ids.append(self.page_breaks[self.page_break_ids.index(id)])
|
ordered_ids[elem_id] = self.page_breaks[
|
||||||
|
self.page_break_ids.index(elem_id)]
|
||||||
|
|
||||||
self.trees = []
|
self.trees = []
|
||||||
tree = orig_tree
|
tree = orig_tree
|
||||||
for pattern, before in ordered_ids:
|
for pattern, before in ordered_ids.itervalues():
|
||||||
elem = pattern(tree)
|
elem = pattern(tree)
|
||||||
if elem:
|
if elem:
|
||||||
self.log.debug('\t\tSplitting on page-break at %s'%
|
self.log.debug('\t\tSplitting on page-break at %s'%
|
||||||
elem[0].get('id'))
|
elem[0].get('id'))
|
||||||
before, after = self.do_split(tree, elem[0], before)
|
before_tree, after_tree = self.do_split(tree, elem[0], before)
|
||||||
self.trees.append(before)
|
self.trees.append(before_tree)
|
||||||
tree = after
|
tree = after_tree
|
||||||
self.trees.append(tree)
|
self.trees.append(tree)
|
||||||
trees, ids = [], set([])
|
trees, ids = [], set([])
|
||||||
for tree in self.trees:
|
for tree in self.trees:
|
||||||
@ -289,7 +288,6 @@ class FlowSplitter(object):
|
|||||||
if self.opts.verbose > 3 and npath != path:
|
if self.opts.verbose > 3 and npath != path:
|
||||||
self.log.debug('\t\t\tMoved split point %s to %s'%(path, npath))
|
self.log.debug('\t\t\tMoved split point %s to %s'%(path, npath))
|
||||||
|
|
||||||
|
|
||||||
return npath
|
return npath
|
||||||
|
|
||||||
def do_split(self, tree, split_point, before):
|
def do_split(self, tree, split_point, before):
|
||||||
@ -304,7 +302,11 @@ class FlowSplitter(object):
|
|||||||
root = tree.getroot()
|
root = tree.getroot()
|
||||||
root2 = tree2.getroot()
|
root2 = tree2.getroot()
|
||||||
body, body2 = map(self.get_body, (root, root2))
|
body, body2 = map(self.get_body, (root, root2))
|
||||||
path = self.adjust_split_point(root, path)
|
if before:
|
||||||
|
# We cannot adjust for after since moving an after split point to a
|
||||||
|
# parent will cause breakage if the parent contains any content
|
||||||
|
# after the original split point
|
||||||
|
path = self.adjust_split_point(root, path)
|
||||||
split_point = root.xpath(path)[0]
|
split_point = root.xpath(path)[0]
|
||||||
split_point2 = root2.xpath(path)[0]
|
split_point2 = root2.xpath(path)[0]
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user