diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 026c072845..2aabbf2e95 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -295,7 +295,7 @@ def xml2unicode(root, pretty_print=False): return etree.tostring(root, pretty_print=pretty_print) def xml2text(elem): - return etree.tostring(elem, method='text', encoding=unicode) + return etree.tostring(elem, method='text', encoding=unicode, with_tail=False) ASCII_CHARS = set(chr(x) for x in xrange(128)) UNIBYTE_CHARS = set(chr(x) for x in xrange(256)) diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py index d62c6353ea..4633131dc0 100644 --- a/src/calibre/ebooks/oeb/transforms/split.py +++ b/src/calibre/ebooks/oeb/transforms/split.py @@ -5,7 +5,7 @@ __docformat__ = 'restructuredtext en' ''' Splitting of the XHTML flows. Splitting can happen on page boundaries or can be -forces at "likely" locations to conform to size limitations. This transform +forced at "likely" locations to conform to size limitations. This transform assumes a prior call to the flatcss transform. ''' @@ -385,12 +385,18 @@ class FlowSplitter(object): raise SplitError(self.item.href, root) self.log.debug('\t\t\tSplit point:', split_point.tag, tree.getpath(split_point)) - for t in self.do_split(tree, split_point, before): + trees = self.do_split(tree, split_point, before) + sizes = [len(tostring(t.getroot())) for t in trees] + if min(sizes) < 5*1024: + self.log.debug('\t\t\tSplit tree too small') + self.split_to_size(tree) + return + + for t, size in zip(trees, sizes): r = t.getroot() if self.is_page_empty(r): continue - size = len(tostring(r)) - if size <= self.max_flow_size: + elif size <= self.max_flow_size: self.split_trees.append(t) self.log.debug( '\t\t\tCommitted sub-tree #%d (%d KB)'%(