From c89d4548386969e1eca1a15d5b420160ebea5964 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 30 Jul 2014 10:23:16 +0530 Subject: [PATCH] Fix #1349536 [convert do not preserve spaces in
](https://bugs.launchpad.net/calibre/+bug/1349536)

---
 src/calibre/ebooks/BeautifulSoup.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/BeautifulSoup.py b/src/calibre/ebooks/BeautifulSoup.py
index c19f4c0a11..ecdfec486b 100644
--- a/src/calibre/ebooks/BeautifulSoup.py
+++ b/src/calibre/ebooks/BeautifulSoup.py
@@ -972,6 +972,7 @@ class BeautifulStoneSoup(Tag, SGMLParser):
     NESTABLE_TAGS = {}
     RESET_NESTING_TAGS = {}
     QUOTE_TAGS = {}
+    PRESERVE_WHITESPACE_TAGS = frozenset()
 
     MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'),
                        lambda x: x.group(1) + ' />'),
@@ -1155,7 +1156,10 @@ class BeautifulStoneSoup(Tag, SGMLParser):
     def endData(self, containerClass=NavigableString):
         if self.currentData:
             currentData = ''.join(self.currentData)
-            if not currentData.translate(self.STRIP_ASCII_SPACES):
+            # Changed by Kovid to not clobber whitespace inside 
 tags and the like
+            if ( (not currentData.translate(self.STRIP_ASCII_SPACES)) and (
+                    not frozenset(tag.name for tag in self.tagStack).intersection(
+                        self.PRESERVE_WHITESPACE_TAGS))):
                 if '\n' in currentData:
                     currentData = '\n'
                 else:
@@ -1443,6 +1447,8 @@ class BeautifulSoup(BeautifulStoneSoup):
                                     ['br' , 'hr', 'input', 'img', 'meta',
                                     'spacer', 'link', 'frame', 'base'])
 
+    PRESERVE_WHITESPACE_TAGS = frozenset(('pre', 'textarea'))
+
     QUOTE_TAGS = {'script' : None, 'textarea' : None}
 
     #According to the HTML standard, each of these inline tags can