From 30e231c4a69a01818823681375d9e031b333bce5 Mon Sep 17 00:00:00 2001 From: Li Fanxi Date: Mon, 11 Oct 2010 17:12:58 +0800 Subject: [PATCH] [SNBOutput] Handle
 tag in html.

---
 src/calibre/ebooks/snb/snbml.py | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/src/calibre/ebooks/snb/snbml.py b/src/calibre/ebooks/snb/snbml.py
index 9b2c24c758..f32ddaadf1 100644
--- a/src/calibre/ebooks/snb/snbml.py
+++ b/src/calibre/ebooks/snb/snbml.py
@@ -53,6 +53,7 @@ SPACE_TAGS = [
 
 CALIBRE_SNB_IMG_TAG = "<$$calibre_snb_temp_img$$>"
 CALIBRE_SNB_BM_TAG = "<$$calibre_snb_bm_tag$$>"
+CALIBRE_SNB_PRE_TAG = "<$$calibre_snb_pre_tag$$>"
 
 class SNBMLizer(object):
     
@@ -83,7 +84,7 @@ class SNBMLizer(object):
         output = [ u'' ]
         stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile)
         content = unicode(etree.tostring(self.item.data.find(XHTML('body')), encoding=unicode))
-        content = self.remove_newlines(content)
+#        content = self.remove_newlines(content)
         trees = { }
         for subitem, subtitle in self.subitems:
             snbcTree = etree.Element("snbc")
@@ -96,7 +97,12 @@ class SNBMLizer(object):
 
         subitem = ''
         for line in output.splitlines():
-            line = line.strip(u' \t\n\r\u3000')
+            if not line.find(CALIBRE_SNB_PRE_TAG) == 0:
+                line = line.strip(u' \t\n\r\u3000')
+            else:
+                etree.SubElement(trees[subitem].find(".//body"), "text").text = \
+                    etree.CDATA(line[len(CALIBRE_SNB_PRE_TAG):])
+                continue
             if len(line) != 0:
                 if line.find(CALIBRE_SNB_IMG_TAG) == 0:
                     prefix = ProcessFileName(os.path.dirname(self.item.href))
@@ -137,7 +143,7 @@ class SNBMLizer(object):
         text = re.sub('(?<=.)%s(?=.)' % os.linesep, ' ', text)
 
         # Remove multiple spaces.
-        text = re.sub('[ ]{2,}', ' ', text)
+        #text = re.sub('[ ]{2,}', ' ', text)
 
         # Remove excessive newlines.
         text = re.sub('\n[ ]+\n', '\n\n', text)
@@ -187,7 +193,7 @@ class SNBMLizer(object):
 
         return text
 
-    def dump_text(self, subitems, elem, stylizer, end=''):
+    def dump_text(self, subitems, elem, stylizer, end='', pre=False):
 
         if not isinstance(elem.tag, basestring) \
            or namespace(elem.tag) != XHTML_NS:
@@ -225,20 +231,27 @@ class SNBMLizer(object):
         if tag == 'br':
             text.append(u'\n\n')
 
+        pre = (tag == 'pre' or pre)
         # Process tags that contain text.
         if hasattr(elem, 'text') and elem.text:
-            text.append(elem.text)
-
+            if pre:
+                text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG ).join(elem.text.splitlines()))
+            else:
+                text.append(elem.text)
+            
         for item in elem:
             en = u''
             if len(text) >= 2:
                 en = text[-1][-2:]
-            text += self.dump_text(subitems, item, stylizer, en)
+            text += self.dump_text(subitems, item, stylizer, en, pre)
 
         if in_block:
             text.append(u'\n\n')
 
         if hasattr(elem, 'tail') and elem.tail:
-            text.append(elem.tail)
+            if pre:
+                text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG ).join(elem.tail.splitlines()))
+            else:
+                text.append(elem.tail)
 
         return text