From dd69e247476fa000e7f5b2d4edb60034d862dbdd Mon Sep 17 00:00:00 2001
From: Li Fanxi <lifanxi@freemindworld.com>
Date: Sat, 9 Oct 2010 22:30:38 +0800
Subject: [PATCH] [SNBOutput] Add basic output support for SNB file.

---
 src/calibre/ebooks/snb/output.py  | 193 ++++++++++++++++---
 src/calibre/ebooks/snb/snbfile.py | 300 ++++++++++++++++++++++++++++++
 src/calibre/ebooks/snb/snbml.py   | 160 ++++++++++++++++
 3 files changed, 629 insertions(+), 24 deletions(-)
 create mode 100644 src/calibre/ebooks/snb/snbfile.py
 create mode 100644 src/calibre/ebooks/snb/snbml.py

diff --git a/src/calibre/ebooks/snb/output.py b/src/calibre/ebooks/snb/output.py
index 4b94b65405..c302c17729 100644
--- a/src/calibre/ebooks/snb/output.py
+++ b/src/calibre/ebooks/snb/output.py
@@ -4,10 +4,29 @@ __license__ = 'GPL 3'
 __copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
 __docformat__ = 'restructuredtext en'
 
-import os
+import os, string
 
-from calibre.customize.conversion import OutputFormatPlugin, \
-    OptionRecommendation
+from lxml import etree
+from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
+from calibre.ptempfile import TemporaryDirectory
+from calibre.constants import __appname__, __version__
+from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
+from calibre.ebooks.snb.snbfile import SNBFile
+from calibre.ebooks.snb.snbml import SNBMLizer
+
+def ProcessFileName(fileName):
+    # Flat the path 
+    fileName = fileName.replace("/", "_").replace(os.sep, "_")
+    # Handle bookmark for HTML file
+    fileName = fileName.replace("#", "_")
+    # Make it lower case
+    fileName = fileName.lower()
+    # Change extension from jpeg to jpg
+    root, ext = os.path.splitext(fileName) 
+    if ext in [ '.jpeg', '.jpg', '.gif', '.svg' ]:
+        fileName = root + '.png'
+    return fileName
+    
 
 class SNBOutput(OutputFormatPlugin):
 
@@ -45,26 +64,152 @@ class SNBOutput(OutputFormatPlugin):
      ])
 
     def convert(self, oeb_book, output_path, input_plugin, opts, log):
+        # Create temp dir
+        with TemporaryDirectory('_snb_output') as tdir:
+            # Create stub directories
+            snbfDir = os.path.join(tdir, 'snbf') 
+            snbcDir = os.path.join(tdir, 'snbc')
+            snbiDir = os.path.join(tdir, 'snbc/images')
+            os.mkdir(snbfDir)
+            os.mkdir(snbcDir)
+            os.mkdir(snbiDir)
+
+            # Process Meta data
+            meta = oeb_book.metadata
+            if meta.title:
+                title = unicode(meta.title[0])
+            else:
+                title = ''
+            authors = [unicode(x) for x in meta.creator if x.role == 'aut']
+            if meta.publisher:
+                publishers = unicode(meta.publisher[0])
+            else:
+                publishers = ''
+            if meta.language:
+                lang = unicode(meta.language[0]).upper()
+            else:
+                lang = ''
+            if meta.description:
+                abstract = unicode(meta.description[0])
+            else:
+                abstract = ''
+
+            # Process Cover
+            from calibre.ebooks.oeb.base import urldefrag
+            g, m, s = oeb_book.guide, oeb_book.manifest, oeb_book.spine
+            href = None
+            if 'titlepage' not in g:
+                if 'cover' in g:
+                    href = g['cover'].href
+
+            # Output book info file
+            bookInfoTree = etree.Element("book-snbf", version="1.0")
+            headTree = etree.SubElement(bookInfoTree, "head")
+            etree.SubElement(headTree, "name").text = title
+            etree.SubElement(headTree, "author").text = ' '.join(authors)
+            etree.SubElement(headTree, "language").text = lang
+            etree.SubElement(headTree, "rights")
+            etree.SubElement(headTree, "publisher").text = publishers
+            etree.SubElement(headTree, "generator").text = __appname__ + ' ' + __version__
+            etree.SubElement(headTree, "created")
+            etree.SubElement(headTree, "abstract").text = abstract
+            if href != None:
+                etree.SubElement(headTree, "cover").text = ProcessFileName(href)
+            else:
+                etree.SubElement(headTree, "cover")
+            bookInfoFile = open(os.path.join(snbfDir, 'book.snbf'), 'wb')
+            bookInfoFile.write(etree.tostring(bookInfoTree, pretty_print=True, encoding='utf-8'))
+            bookInfoFile.close()
+            
+            # Output TOC
+            tocInfoTree = etree.Element("toc-snbf")
+            tocHead = etree.SubElement(tocInfoTree, "head")
+            tocBody = etree.SubElement(tocInfoTree, "body")
+            outputFiles = { }
+            if oeb_book.toc.count() == 0:
+                log.warn('This SNB file has no Table of Contents. '
+                    'Creating a default TOC')
+                first = iter(oeb_book.spine).next()
+                oeb_book.toc.add(_('Start'), first.href)
+
+            for tocitem in oeb_book.toc:
+                ch = etree.SubElement(tocBody, "chapter")
+                ch.set("src", ProcessFileName(tocitem.href) + ".snbc")
+                ch.text = tocitem.title
+                if tocitem.href.find('#') != -1:
+                    item = string.split(tocitem.href, '#')
+                    if len(item) != 2:
+                        log.error('Error in TOC item: %s' % tocitem)
+                    else:
+                        if item[0] in outputFiles:
+                            outputFiles[item[0]].append((item[1], tocitem.title)) 
+                        else:
+                            outputFiles[item[0]] = [] 
+                            outputFiles[item[0]].append((item[1], tocitem.title)) 
+                else:
+                    if tocitem.href in outputFiles:
+                        outputFiles[tocitem.href].append(("", tocitem)) 
+                    else:
+                        outputFiles[tocitem.href] = [] 
+                        outputFiles[tocitem.href].append(("", tocitem))
+
+            etree.SubElement(tocHead, "chapters").text = '%d' % len(tocBody)
+
+            tocInfoFile = open(os.path.join(snbfDir, 'toc.snbf'), 'wb')
+            tocInfoFile.write(etree.tostring(tocInfoTree, pretty_print=True, encoding='utf-8'))
+            tocInfoFile.close()
+
+            # Output Files
+            for item in s:
+                from calibre.ebooks.oeb.base import OEB_DOCS, OEB_IMAGES, PNG_MIME
+                if m.hrefs[item.href].media_type in OEB_DOCS:
+                    if not item.href in outputFiles:
+                        log.debug('Skipping %s because unused in TOC.' % item.href)
+                        continue
+                    log.debug('Converting %s to snbc...' % item.href)
+                    snbwriter = SNBMLizer(log)
+                    snbcTrees = snbwriter.extract_content(oeb_book, item, outputFiles[item.href], opts)
+                    for subName in snbcTrees:
+                        postfix = ''
+                        if subName != '':
+                             postfix = '_' + subName
+                        outputFile = open(os.path.join(snbcDir, ProcessFileName(item.href + postfix + ".snbc")), 'wb')
+                        outputFile.write(etree.tostring(snbcTrees[subName], pretty_print=True, encoding='utf-8'))
+                        outputFile.close()
+            for item in m:
+                if m.hrefs[item.href].media_type in OEB_IMAGES:
+                    log.debug('Converting image: %s ...' % item.href)
+                    content = m.hrefs[item.href].data
+                    if m.hrefs[item.href].media_type != PNG_MIME:
+                        # Convert
+                        from calibre.utils.magick import Image
+                        img = Image()
+                        img.load(content)
+                        img.save(os.path.join(snbiDir, ProcessFileName(item.href)))
+                    else:
+                        outputFile = open(os.path.join(snbiDir, ProcessFileName(item.href)), 'wb')
+                        outputFile.write(content)
+                        outputFile.close()
+            
+            # Package as SNB File
+            snbFile = SNBFile()
+            snbFile.FromDir(tdir)
+            snbFile.Output(output_path)
+
+if __name__ == '__main__':
+    from calibre.ebooks.oeb.reader import OEBReader
+    from calibre.ebooks.oeb.base import OEBBook
+    from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
+    from calibre.customize.profiles import HanlinV3Output
+    class OptionValues(object):
         pass
-        # writer = TXTMLizer(log)
-        # txt = writer.extract_content(oeb_book, opts)
-
-        # log.debug('\tReplacing newlines with selected type...')
-        # txt = specified_newlines(TxtNewlines(opts.newline).newline, txt)
-
-        # close = False
-        # if not hasattr(output_path, 'write'):
-        #     close = True
-        #     if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
-        #         os.makedirs(os.path.dirname(output_path))
-        #     out_stream = open(output_path, 'wb')
-        # else:
-        #     out_stream = output_path
-
-        # out_stream.seek(0)
-        # out_stream.truncate()
-        # out_stream.write(txt.encode(opts.output_encoding, 'replace'))
-
-        # if close:
-        #     out_stream.close()
 
+    opts = OptionValues()
+    opts.output_profile = HanlinV3Output(None)
+    
+    html_preprocessor = HTMLPreProcessor(None, None, opts)
+    from calibre.utils.logging import default_log
+    oeb = OEBBook(default_log, html_preprocessor)
+    reader = OEBReader
+    reader()(oeb, '/tmp/bbb/processed/')
+    SNBOutput(None).convert(oeb, '/tmp/test.snb', None, None, default_log);
diff --git a/src/calibre/ebooks/snb/snbfile.py b/src/calibre/ebooks/snb/snbfile.py
new file mode 100644
index 0000000000..aa690fb92b
--- /dev/null
+++ b/src/calibre/ebooks/snb/snbfile.py
@@ -0,0 +1,300 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
+__docformat__ = 'restructuredtext en'
+
+import sys, struct, zlib, bz2, os, math
+
+class FileStream:
+    def IsBinary(self):
+        return self.attr & 0x41000000 != 0x41000000
+
+def compareFileStream(file1, file2):
+    return cmp(file1.fileName, file2.fileName)
+
+class BlockData:
+    pass
+
+class SNBFile:
+
+    files = []
+    blocks = [] 
+    
+    MAGIC = 'SNBP000B'
+    REV80 = 0x00008000
+    REVA3 = 0x00A3A3A3
+    REVZ1 = 0x00000000
+    REVZ2 = 0x00000000
+    
+    def __init__(self, inputFile = None):
+        if inputFile != None:
+            self.Parse(inputFile);
+        
+    def Parse(self, inputFile):
+        self.fileName = inputFile
+        
+        snbFile = open(self.fileName, "rb")
+        snbFile.seek(0)
+        
+        # Read header
+        vmbr = snbFile.read(44)
+        (self.magic, self.rev80, self.revA3, self.revZ1, 
+         self.fileCount, self.vfatSize, self.vfatCompressed, 
+         self.binStreamSize, self.plainStreamSizeUncompressed, 
+         self.revZ2) = struct.unpack('>8siiiiiiiii', vmbr)
+
+        # Read FAT
+        self.vfat = zlib.decompress(snbFile.read(self.vfatCompressed))
+        self.ParseFile(self.vfat, self.fileCount)
+        
+        # Read tail 
+        snbFile.seek(-16, os.SEEK_END)
+        #plainStreamEnd = snbFile.tell()
+        tailblock = snbFile.read(16)
+        (self.tailSize, self.tailOffset, self.tailMagic) = struct.unpack('>ii8s', tailblock)
+        snbFile.seek(self.tailOffset)
+        self.vTailUncompressed = zlib.decompress(snbFile.read(self.tailSize))
+        self.tailSizeUncompressed = len(self.vTailUncompressed)
+        self.ParseTail(self.vTailUncompressed, self.fileCount)
+        
+        # Uncompress file data
+        # Read files
+        binPos = 0
+        plainPos = 0
+        uncompressedData = None
+        for f in self.files:
+            if f.attr & 0x41000000 == 0x41000000: 
+                # Compressed Files
+                if uncompressedData == None:
+                    uncompressedData = ""
+                    for i in range(self.plainBlock):
+                        bzdc = bz2.BZ2Decompressor()
+                        if (i < self.plainBlock - 1):
+                            bSize = self.blocks[self.binBlock + i + 1].Offset - self.blocks[self.binBlock + i].Offset;
+                        else:
+                            bSize = self.tailOffset - self.blocks[self.binBlock + i].Offset;
+                        snbFile.seek(self.blocks[self.binBlock + i].Offset);
+                        try:
+                            data = snbFile.read(bSize)
+                            uncompressedData += bzdc.decompress(data)
+                        except EOFError, e:
+                            print e
+                f.fileBody = uncompressedData[plainPos:plainPos+f.fileSize]
+                plainPos += f.fileSize
+            elif f.attr & 0x01000000 == 0x01000000:
+                # Binary Files
+                snbFile.seek(44 + self.vfatCompressed + binPos)
+                f.fileBody = snbFile.read(f.fileSize)
+                binPos += f.fileSize
+            else:
+                print f.attr, f.fileName
+                raise Exception("Invalid file")
+        snbFile.close()
+
+    def ParseFile(self, vfat, fileCount):
+        fileNames = vfat[fileCount*12:].split('\0');
+        for i in range(fileCount):
+            f = FileStream()
+            (f.attr, f.fileNameOffset, f.fileSize) = struct.unpack('>iii', vfat[i * 12 : (i+1)*12])
+            f.fileName = fileNames[i]
+            self.files.append(f)
+            
+    def ParseTail(self, vtail, fileCount):
+        self.binBlock = (self.binStreamSize + 0x8000 - 1) / 0x8000;
+        self.plainBlock = (self.plainStreamSizeUncompressed + 0x8000 - 1) / 0x8000;
+        for i in range(self.binBlock + self.plainBlock):
+            block = BlockData()
+            (block.Offset,) = struct.unpack('>i', vtail[i * 4 : (i+1) * 4])
+            self.blocks.append(block)
+        for i in range(fileCount):
+            (self.files[i].blockIndex, self.files[i].contentOffset) = struct.unpack('>ii', vtail[(self.binBlock + self.plainBlock) * 4 + i * 8 : (self.binBlock + self.plainBlock) * 4 + (i+1) * 8])
+
+    def IsValid(self):
+        if self.magic != SNBFile.MAGIC:
+            return False
+        if self.rev80 != SNBFile.REV80:
+            return False
+        if self.revA3 != SNBFile.REVA3:
+            return False
+        if self.revZ1 != SNBFile.REVZ1:
+            return False
+        if self.revZ2 != SNBFile.REVZ2:
+            return False
+        if self.vfatSize != len(self.vfat):
+            return False
+        if self.fileCount != len(self.files):
+            return False
+        if (self.binBlock + self.plainBlock) * 4 + self.fileCount * 8 != self.tailSizeUncompressed:
+            return False
+        if self.tailMagic != SNBFile.MAGIC:
+            print self.tailMagic
+            return False
+        return True
+
+    def FromDir(self, tdir):
+        for root, dirs, files in os.walk(tdir):
+            for name in files:
+                print name
+                p, ext = os.path.splitext(name)
+                if ext in [ ".snbf", ".snbc" ]:
+                    self.AppendPlain(os.path.relpath(os.path.join(root, name), tdir), tdir)
+                else:
+                    self.AppendBinary(os.path.relpath(os.path.join(root, name), tdir), tdir)
+
+    def AppendPlain(self, fileName, tdir):
+        f = FileStream()
+        f.attr = 0x41000000
+        f.fileSize = os.path.getsize(os.path.join(tdir,fileName))
+        f.fileBody = open(os.path.join(tdir,fileName), 'rb').read()
+        f.fileName = fileName
+        print f.fileSize
+        self.files.append(f)
+
+    def AppendBinary(self, fileName, tdir):
+        f = FileStream()
+        f.attr = 0x01000000
+        f.fileSize = os.path.getsize(os.path.join(tdir,fileName))
+        f.fileBody = open(os.path.join(tdir,fileName), 'rb').read()
+        f.fileName = fileName
+        print f.fileSize
+        self.files.append(f)
+        
+    def Output(self, outputFile):
+
+        # Sort the files in file buffer, 
+        # requried by the SNB file format
+        self.files.sort(compareFileStream)
+
+        outputFile = open(outputFile, 'wb')
+        # File header part 1
+        vmbrp1 = struct.pack('>8siiii', SNBFile.MAGIC, SNBFile.REV80, SNBFile.REVA3, SNBFile.REVZ1, len(self.files))
+        
+        # Create VFAT & file stream
+        vfat = ''
+        fileNameTable = ''
+        plainStream = ''
+        binStream = ''
+        for f in self.files:
+            vfat += struct.pack('>iii', f.attr, len(fileNameTable), f.fileSize);
+            fileNameTable += (f.fileName + '\0')
+            
+            if f.attr & 0x41000000 == 0x41000000: 
+                # Plain Files
+                f.contentOffset = len(plainStream)
+                plainStream += f.fileBody
+            elif f.attr & 0x01000000 == 0x01000000:
+                # Binary Files
+                f.contentOffset = len(binStream)
+                binStream += f.fileBody
+            else:
+                print f.attr, f.fileName
+                raise Exception("Unknown file type")
+        vfatCompressed = zlib.compress(vfat+fileNameTable)
+        
+        # File header part 2
+        vmbrp2 = struct.pack('>iiiii', len(vfat+fileNameTable), len(vfatCompressed), len(binStream), len(plainStream), SNBFile.REVZ2)
+        # Write header
+        outputFile.write(vmbrp1 + vmbrp2)
+        # Write vfat 
+        outputFile.write(vfatCompressed)
+        
+        # Generate block information
+        binBlockOffset = 0x2C + len(vfatCompressed)
+        plainBlockOffset = binBlockOffset + len(binStream)
+        
+        binBlock = (len(binStream) + 0x8000 - 1) / 0x8000
+        plainBlock = (len(plainStream) + 0x8000 - 1) / 0x8000
+        
+        offset = 0
+        tailBlock = ''
+        for i in range(binBlock):
+            tailBlock += struct.pack('>i', binBlockOffset + offset)
+            offset += 0x8000;
+        tailRec = ''
+        for f in self.files:
+            t = 0
+            if f.IsBinary():
+                t = 0
+            else:
+                t = binBlock
+            tailRec += struct.pack('>ii', f.contentOffset / 0x8000 + t, f.contentOffset % 0x8000);
+            
+        # Write binary stream
+        outputFile.write(binStream)
+        
+        # Write plain stream
+        pos = 0
+        offset = 0
+        while pos < len(plainStream):
+            tailBlock += struct.pack('>i', plainBlockOffset + offset);
+            block = plainStream[pos:pos+0x8000];
+            compressed = bz2.compress(block)
+            outputFile.write(compressed)
+            offset += len(compressed)
+            pos += 0x8000
+        
+        # Write tail block
+        compressedTail = zlib.compress(tailBlock + tailRec)
+        outputFile.write(compressedTail)
+       
+        # Write tail pointer
+        veom = struct.pack('>ii', len(compressedTail), plainBlockOffset + offset)
+        outputFile.write(veom)
+        
+        # Write file end mark
+        outputFile.write(SNBFile.MAGIC);
+        
+        # Close
+        outputFile.close()
+        return 
+
+    def Dump(self):
+        print "File Name:\t", self.fileName
+        print "File Count:\t", self.fileCount
+        print "VFAT Size(Compressed):\t%d(%d)" % (self.vfatSize, self.vfatCompressed)
+        print "Binary Stream Size:\t", self.binStreamSize
+        print "Plain Stream Uncompressed Size:\t", self.plainStreamSizeUncompressed
+        print "Binary Block Count:\t", self.binBlock
+        print "Plain Block Count:\t", self.plainBlock
+        for i in range(self.fileCount):
+            print "File ", i
+            f = self.files[i]
+            print "File Name: ", f.fileName
+            print "File Attr: ", f.attr
+            print "File Size: ", f.fileSize
+            print "Block Index: ", f.blockIndex
+            print "Content Offset: ", f.contentOffset
+            tempFile = open("/tmp/" + f.fileName, 'wb')
+            tempFile.write(f.fileBody)
+            tempFile.close()
+            
+def usage():
+    print "This unit test is for INTERNAL usage only!"
+    print "This unit test accept two parameters."
+    print "python snbfile.py <INPUTFILE> <DESTFILE>"
+    print "The input file will be extracted and write to dest file. "
+    print "Meta data of the file will be shown during this process." 
+
+def main():
+    if len(sys.argv) != 3:
+        usage()
+        sys.exit(0)
+    inputFile = sys.argv[1]
+    outputFile = sys.argv[2]
+
+    print "Input file: ", inputFile
+    print "Output file: ", outputFile
+    
+    snbFile = SNBFile(inputFile)
+    if snbFile.IsValid():
+        snbFile.Dump()
+        snbFile.Output(outputFile)
+    else:
+        print "The input file is invalid."
+        return 1
+    return 0
+
+if __name__ == "__main__":
+    """SNB file unit test"""
+    sys.exit(main())
diff --git a/src/calibre/ebooks/snb/snbml.py b/src/calibre/ebooks/snb/snbml.py
new file mode 100644
index 0000000000..e1956b5937
--- /dev/null
+++ b/src/calibre/ebooks/snb/snbml.py
@@ -0,0 +1,160 @@
+# -*- coding: utf-8 -*-
+
+__license__ = 'GPL 3'
+__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Transform OEB content into SNB format
+'''
+
+import os
+import re
+
+from lxml import etree
+
+from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
+from calibre.ebooks.oeb.stylizer import Stylizer
+
+def ProcessFileName(fileName):
+    # Flat the path 
+    fileName = fileName.replace("/", "_").replace(os.sep, "_")
+    # Handle bookmark for HTML file
+    fileName = fileName.replace("#", "_")
+    # Make it lower case
+    fileName = fileName.lower()
+    # Change extension from jpeg to jpg
+    root, ext = os.path.splitext(fileName) 
+    if ext in [ '.jpeg', '.jpg', '.gif', '.svg' ]:
+        fileName = root + '.png'
+    return fileName
+    
+
+BLOCK_TAGS = [
+    'div',
+    'p',
+    'h1',
+    'h2',
+    'h3',
+    'h4',
+    'h5',
+    'h6',
+    'li',
+    'tr',
+]
+
+BLOCK_STYLES = [
+    'block',
+]
+
+SPACE_TAGS = [
+    'td',
+]
+
+CLIABRE_SNB_IMG_TAG = "<calibre_snb_temp_img>"
+
+class SNBMLizer(object):
+    
+    curSubItem = ""
+    curText = [ ]
+
+    def __init__(self, log):
+        self.log = log
+
+    def extract_content(self, oeb_book, item, subitems, opts):
+        self.log.info('Converting XHTML to SNBC...')
+        self.oeb_book = oeb_book
+        self.opts = opts
+        self.item = item
+        self.subitems = subitems
+        return self.mlize();
+
+
+    def mlize(self):
+        stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile)
+        content = unicode(etree.tostring(self.item.data.find(XHTML('body')), encoding=unicode))
+        content = self.remove_newlines(content)
+        trees = { }
+        for subitem, subtitle in self.subitems:
+            snbcTree = etree.Element("snbc")
+            etree.SubElement(etree.SubElement(snbcTree, "head"), "title").text = subtitle
+            etree.SubElement(snbcTree, "body")
+            trees[subitem] = snbcTree
+
+        self.dump_text(trees, self.subitems, etree.fromstring(content), stylizer)
+        self.Output(trees)
+        return trees
+
+    def remove_newlines(self, text):
+        self.log.debug('\tRemove newlines for processing...')
+        text = text.replace('\r\n', ' ')
+        text = text.replace('\n', ' ')
+        text = text.replace('\r', ' ')
+
+        return text
+
+    def dump_text(self, trees, subitems, elem, stylizer, end=''):
+        '''
+        @elem: The element in the etree that we are working on.
+        @stylizer: The style information attached to the element.
+        @end: The last two characters of the text from the previous element.
+              This is used to determine if a blank line is needed when starting
+              a new block element.
+        '''
+        if not isinstance(elem.tag, basestring) \
+           or namespace(elem.tag) != XHTML_NS:
+            return ['']
+
+        if elem.attrib.get('id') != None and elem.attrib['id'] in [ href for href, title in subitems ]:
+            if self.curSubItem != None and self.curSubItem != elem.attrib['id']:
+                self.Output(trees)
+                self.curSubItem = elem.attrib['id']
+                self.curText = [ ]
+
+        style = stylizer.style(elem)
+
+        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
+           or style['visibility'] == 'hidden':
+            return ['']
+
+        tag = barename(elem.tag)
+        in_block = False
+
+        # Are we in a paragraph block?
+        if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
+            in_block = True
+            if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text:
+                self.curText.append(u'\n\n')
+
+        if tag in SPACE_TAGS:
+            if not end.endswith('u ') and hasattr(elem, 'text') and elem.text:
+                self.curText.append(u' ')
+
+        if tag == 'img':
+            self.curText.append(u'%s%s' % (CLIABRE_SNB_IMG_TAG, ProcessFileName(elem.attrib['src'])))
+
+        # Process tags that contain text.
+        if hasattr(elem, 'text') and elem.text:
+            self.curText.append(elem.text)
+
+        for item in elem:
+            en = u''
+            if len(self.curText) >= 2:
+                en = self.curText[-1][-2:]
+            self.dump_text(trees, subitems, item, stylizer, en)
+
+        if in_block:
+            self.curText.append(u'\n\n')
+
+        if hasattr(elem, 'tail') and elem.tail:
+            self.curText.append(elem.tail)
+
+    def Output(self, trees):
+        if self.curSubItem == None or not self.curSubItem in trees:
+            return
+        for t in self.curText:
+            if len(t.strip(' \t\n\r')) != 0:
+                if t.find(CLIABRE_SNB_IMG_TAG) == 0:
+                    etree.SubElement(trees[self.curSubItem], "img").text = t[len(CLIABRE_SNB_IMG_TAG):]
+                else:
+                    etree.SubElement(trees[self.curSubItem], "text").text = etree.CDATA(unicode('' + t))