From 901f54e124ab4cd1c554ffc741f0a4fc79e43d43 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 12 Jan 2014 09:25:07 +0530 Subject: [PATCH] EPUB Output: Fix splitting of large HTML files removing all child tags from inside
 tags. Fixes #1267327 [Private
 bug](https://bugs.launchpad.net/calibre/+bug/1267327)

---
 src/calibre/ebooks/oeb/transforms/split.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/calibre/ebooks/oeb/transforms/split.py b/src/calibre/ebooks/oeb/transforms/split.py
index 01e4348b34..02215c5121 100644
--- a/src/calibre/ebooks/oeb/transforms/split.py
+++ b/src/calibre/ebooks/oeb/transforms/split.py
@@ -317,13 +317,11 @@ class FlowSplitter(object):
     def split_to_size(self, tree):
         self.log.debug('\t\tSplitting...')
         root = tree.getroot()
-        # Split large 
 tags
-        for pre in list(XPath('//h:pre')(root)):
-            text = u''.join(pre.xpath('descendant::text()'))
-            pre.text = text
-            for child in list(pre.iterchildren()):
-                pre.remove(child)
-            if len(pre.text) > self.max_flow_size*0.5:
+        # Split large 
 tags if they contain only text
+        for pre in XPath('//h:pre')(root):
+            if len(tuple(pre.iterchildren(etree.Element))) > 0:
+                continue
+            if pre.text and len(pre.text) > self.max_flow_size*0.5:
                 self.log.debug('\t\tSplitting large 
 tag')
                 frags = self.split_text(pre.text, root, int(0.2*self.max_flow_size))
                 new_pres = []