diff --git a/src/calibre/ebooks/BeautifulSoup.py b/src/calibre/ebooks/BeautifulSoup.py index c19f4c0a11..ecdfec486b 100644 --- a/src/calibre/ebooks/BeautifulSoup.py +++ b/src/calibre/ebooks/BeautifulSoup.py @@ -972,6 +972,7 @@ class BeautifulStoneSoup(Tag, SGMLParser): NESTABLE_TAGS = {} RESET_NESTING_TAGS = {} QUOTE_TAGS = {} + PRESERVE_WHITESPACE_TAGS = frozenset() MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), lambda x: x.group(1) + ' />'), @@ -1155,7 +1156,10 @@ class BeautifulStoneSoup(Tag, SGMLParser): def endData(self, containerClass=NavigableString): if self.currentData: currentData = ''.join(self.currentData) - if not currentData.translate(self.STRIP_ASCII_SPACES): + # Changed by Kovid to not clobber whitespace inside
 tags and the like
+            if ( (not currentData.translate(self.STRIP_ASCII_SPACES)) and (
+                    not frozenset(tag.name for tag in self.tagStack).intersection(
+                        self.PRESERVE_WHITESPACE_TAGS))):
                 if '\n' in currentData:
                     currentData = '\n'
                 else:
@@ -1443,6 +1447,8 @@ class BeautifulSoup(BeautifulStoneSoup):
                                     ['br' , 'hr', 'input', 'img', 'meta',
                                     'spacer', 'link', 'frame', 'base'])
 
+    PRESERVE_WHITESPACE_TAGS = frozenset(('pre', 'textarea'))
+
     QUOTE_TAGS = {'script' : None, 'textarea' : None}
 
     #According to the HTML standard, each of these inline tags can