Fix #2358 (HTML to ePub conversion results in duplicated content / incorrect breaks)

This commit is contained in:
Kovid Goyal 2009-04-29 20:31:18 -07:00
parent 6f072dc3d1
commit e869684a29

View File

@ -66,20 +66,27 @@ class Splitter(LoggingInterface):
if self.opts.profile.flow_size < sys.maxint: if self.opts.profile.flow_size < sys.maxint:
lt_found = False lt_found = False
self.log_info('\tLooking for large trees...') self.log_info('\tLooking for large trees...')
self.tree_map = {}
for i, tree in enumerate(list(trees)): for i, tree in enumerate(list(trees)):
self.trees = [] self.split_trees = []
size = len(tostring(tree.getroot())) size = len(tostring(tree.getroot()))
if size > self.opts.profile.flow_size: if size > self.opts.profile.flow_size:
lt_found = True lt_found = True
try: try:
self.split_to_size(tree) self.split_to_size(tree)
self.tree_map[tree] = self.split_trees
except (SplitError, RuntimeError): # Splitting fails except (SplitError, RuntimeError): # Splitting fails
if not self.always_remove: if not self.always_remove:
self.always_remove = True self.always_remove = True
self.split_trees = []
self.split_to_size(tree) self.split_to_size(tree)
self.tree_map[tree] = self.split_trees
else: else:
raise raise
trees[i:i+1] = list(self.trees) t = []
for x in trees:
t.extend(self.tree_map.get(x, [x]))
trees = t
if not lt_found: if not lt_found:
self.log_info('\tNo large trees found') self.log_info('\tNo large trees found')
@ -150,10 +157,10 @@ class Splitter(LoggingInterface):
continue continue
size = len(tostring(r)) size = len(tostring(r))
if size <= self.opts.profile.flow_size: if size <= self.opts.profile.flow_size:
self.trees.append(t) self.split_trees.append(t)
#print tostring(t.getroot(), pretty_print=True) #print tostring(t.getroot(), pretty_print=True)
self.log_debug('\t\t\tCommitted sub-tree #%d (%d KB)', self.log_debug('\t\t\tCommitted sub-tree #%d (%d KB)',
len(self.trees), size/1024.) len(self.split_trees), size/1024.)
self.split_size += size self.split_size += size
else: else:
self.split_to_size(t) self.split_to_size(t)