diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py
index f770622952..2aabbf2e95 100644
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@@ -294,6 +294,9 @@ def xml2str(root, pretty_print=False, strip_comments=False):
 def xml2unicode(root, pretty_print=False):
     return etree.tostring(root, pretty_print=pretty_print)
 
+def xml2text(elem):
+    return etree.tostring(elem, method='text', encoding=unicode, with_tail=False)
+
 ASCII_CHARS   = set(chr(x) for x in xrange(128))
 UNIBYTE_CHARS = set(chr(x) for x in xrange(256))
 URL_SAFE      = set('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py
index d62c6353ea..4633131dc0 100644
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@@ -5,7 +5,7 @@ __docformat__ = 'restructuredtext en'
 
 '''
 Splitting of the XHTML flows. Splitting can happen on page boundaries or can be
-forces at "likely" locations to conform to size limitations. This transform
+forced at "likely" locations to conform to size limitations. This transform
 assumes a prior call to the flatcss transform.
 '''
 
@@ -385,12 +385,18 @@ class FlowSplitter(object):
             raise SplitError(self.item.href, root)
         self.log.debug('\t\t\tSplit point:', split_point.tag, tree.getpath(split_point))
 
-        for t in self.do_split(tree, split_point, before):
+        trees = self.do_split(tree, split_point, before)
+        sizes = [len(tostring(t.getroot())) for t in trees]
+        if min(sizes) < 5*1024:
+            self.log.debug('\t\t\tSplit tree too small')
+            self.split_to_size(tree)
+            return
+
+        for t, size in zip(trees, sizes):
             r = t.getroot()
             if self.is_page_empty(r):
                 continue
-            size = len(tostring(r))
-            if size <= self.max_flow_size:
+            elif size <= self.max_flow_size:
                 self.split_trees.append(t)
                 self.log.debug(
                     '\t\t\tCommitted sub-tree #%d (%d KB)'%(
diff --git a/src/calibre/ebooks/oeb/transforms/structure.py b/src/calibre/ebooks/oeb/transforms/structure.py
index 15e9675aa8..07235b4fb0 100644
--- a/src/calibre/ebooks/oeb/transforms/structure.py
+++ b/src/calibre/ebooks/oeb/transforms/structure.py
@@ -11,7 +11,7 @@ import re
 from lxml import etree
 from urlparse import urlparse
 
-from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML
+from calibre.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text
 from calibre.ebooks import ConversionError
 
 def XPath(x):
@@ -79,8 +79,7 @@ class DetectStructure(object):
             page_break_before = 'display: block; page-break-before: always'
             page_break_after = 'display: block; page-break-after: always'
             for item, elem in self.detected_chapters:
-                text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
-                text = text.strip()
+                text = xml2text(elem).strip()
                 self.log('\tDetected chapter:', text[:50])
                 if chapter_mark == 'none':
                     continue
@@ -120,8 +119,7 @@ class DetectStructure(object):
                     if frag:
                         href = '#'.join((href, frag))
                     if not self.oeb.toc.has_href(href):
-                        text = u' '.join([t.strip() for t in \
-                                a.xpath('descendant::text()')])
+                        text = xml2text(a)
                         text = text[:100].strip()
                         if not self.oeb.toc.has_text(text):
                             num += 1
@@ -135,7 +133,7 @@ class DetectStructure(object):
 
 
     def elem_to_link(self, item, elem, counter):
-        text = u' '.join([t.strip() for t in elem.xpath('descendant::text()')])
+        text = xml2text(elem)
         text = text[:100].strip()
         id = elem.get('id', 'calibre_toc_%d'%counter)
         elem.set('id', id)