EPUB Output: Make the file size splitting algorithm more intelligent. If a split results ina tree that is very small, choose another split point.

This commit is contained in:
Kovid Goyal 2010-05-02 15:17:09 -06:00
parent 3df90b8926
commit 32f8611a76
2 changed files with 11 additions and 5 deletions

View File

@ -295,7 +295,7 @@ def xml2unicode(root, pretty_print=False):
return etree.tostring(root, pretty_print=pretty_print) return etree.tostring(root, pretty_print=pretty_print)
def xml2text(elem): def xml2text(elem):
return etree.tostring(elem, method='text', encoding=unicode) return etree.tostring(elem, method='text', encoding=unicode, with_tail=False)
ASCII_CHARS = set(chr(x) for x in xrange(128)) ASCII_CHARS = set(chr(x) for x in xrange(128))
UNIBYTE_CHARS = set(chr(x) for x in xrange(256)) UNIBYTE_CHARS = set(chr(x) for x in xrange(256))

View File

@ -5,7 +5,7 @@ __docformat__ = 'restructuredtext en'
''' '''
Splitting of the XHTML flows. Splitting can happen on page boundaries or can be Splitting of the XHTML flows. Splitting can happen on page boundaries or can be
forces at "likely" locations to conform to size limitations. This transform forced at "likely" locations to conform to size limitations. This transform
assumes a prior call to the flatcss transform. assumes a prior call to the flatcss transform.
''' '''
@ -385,12 +385,18 @@ class FlowSplitter(object):
raise SplitError(self.item.href, root) raise SplitError(self.item.href, root)
self.log.debug('\t\t\tSplit point:', split_point.tag, tree.getpath(split_point)) self.log.debug('\t\t\tSplit point:', split_point.tag, tree.getpath(split_point))
for t in self.do_split(tree, split_point, before): trees = self.do_split(tree, split_point, before)
sizes = [len(tostring(t.getroot())) for t in trees]
if min(sizes) < 5*1024:
self.log.debug('\t\t\tSplit tree too small')
self.split_to_size(tree)
return
for t, size in zip(trees, sizes):
r = t.getroot() r = t.getroot()
if self.is_page_empty(r): if self.is_page_empty(r):
continue continue
size = len(tostring(r)) elif size <= self.max_flow_size:
if size <= self.max_flow_size:
self.split_trees.append(t) self.split_trees.append(t)
self.log.debug( self.log.debug(
'\t\t\tCommitted sub-tree #%d (%d KB)'%( '\t\t\tCommitted sub-tree #%d (%d KB)'%(