Support for the SNB format used by the Bambook e-book reader

2025-08-30 23:00:21 -04:00 · 2010-10-17 10:17:59 -06:00 · 2010-10-17 10:17:59 -06:00 · 88bc2991b2
commit 88bc2991b2
parent 0f29b108c7 565295b353
15 changed files with 1150 additions and 5 deletions
--- a/resources/images/mimetypes/snb.png
+++ b/resources/images/mimetypes/snb.png
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -293,6 +293,17 @@ class RTFMetadataReader(MetadataReaderPlugin):
        from calibre.ebooks.metadata.rtf import get_metadata
        return get_metadata(stream)
 class SNBMetadataReader(MetadataReaderPlugin):
    name        = 'Read SNB metadata'
    file_types  = set(['snb'])
    description = _('Read metadata from %s files') % 'SNB'
    author      = 'Li Fanxi'
    def get_metadata(self, stream, ftype):
        from calibre.ebooks.metadata.snb import get_metadata
        return get_metadata(stream)
 class TOPAZMetadataReader(MetadataReaderPlugin):
    name        = 'Read Topaz metadata'
@ -420,6 +431,7 @@ from calibre.ebooks.tcr.input import TCRInput
 from calibre.ebooks.txt.input import TXTInput
 from calibre.ebooks.lrf.input import LRFInput
 from calibre.ebooks.chm.input import CHMInput
 from calibre.ebooks.snb.input import SNBInput
 from calibre.ebooks.epub.output import EPUBOutput
 from calibre.ebooks.fb2.output import FB2Output
@ -434,6 +446,7 @@ from calibre.ebooks.rb.output import RBOutput
 from calibre.ebooks.rtf.output import RTFOutput
 from calibre.ebooks.tcr.output import TCROutput
 from calibre.ebooks.txt.output import TXTOutput
 from calibre.ebooks.snb.output import SNBOutput
 from calibre.customize.profiles import input_profiles, output_profiles
@ -495,6 +508,7 @@ plugins += [
    TXTInput,
    LRFInput,
    CHMInput,
    SNBInput,
 ]
 plugins += [
    EPUBOutput,
@ -510,6 +524,7 @@ plugins += [
    RTFOutput,
    TCROutput,
    TXTOutput,
    SNBOutput,
 ]
 # Order here matters. The first matched device is the one used.
 plugins += [
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -647,11 +647,25 @@ class NookOutput(OutputProfile):
    fbase                     = 16
    fsizes                    = [12, 12, 14, 16, 18, 20, 22, 24]
 class BambookOutput(OutputProfile):
    name        = 'Sanda Bambook'
    short_name  = 'bambook'
    description = _('This profile is intended for the Sanda Bambook.')
    # Screen size is a best guess
    screen_size               = (800, 600)
    comic_screen_size         = (700, 540)
    dpi                       = 168.451
    fbase                     = 12
    fsizes                    = [10, 12, 14, 16]
 output_profiles = [OutputProfile, SonyReaderOutput, SonyReader300Output,
        SonyReader900Output, MSReaderOutput, MobipocketOutput, HanlinV3Output,
        HanlinV5Output, CybookG3Output, CybookOpusOutput, KindleOutput,
        iPadOutput, KoboReaderOutput,
        SonyReaderLandscapeOutput, KindleDXOutput, IlliadOutput,
-        IRexDR1000Output, IRexDR800Output, JetBook5Output, NookOutput,]
+        IRexDR1000Output, IRexDR800Output, JetBook5Output, NookOutput,
        BambookOutput, ]
 output_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower()))
--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -25,7 +25,7 @@ class DRMError(ValueError):
 BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
                   'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
                   'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
-                   'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan']
+                   'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan', 'snb']
 class HTMLRenderer(object):
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -15,7 +15,7 @@ _METADATA_PRIORITIES = [
                       'html', 'htm', 'xhtml', 'xhtm',
                       'rtf', 'fb2', 'pdf', 'prc', 'odt',
                       'epub', 'lit', 'lrx', 'lrf', 'mobi',
-                       'rb', 'imp', 'azw'
+                       'rb', 'imp', 'azw', 'snb'
                      ]
 # The priorities for loading metadata from different file types
--- a/src/calibre/ebooks/metadata/snb.py
+++ b/src/calibre/ebooks/metadata/snb.py
@ -0,0 +1,47 @@
 '''Read meta information from SNB files'''
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
 import os
 from StringIO import StringIO
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.snb.snbfile import SNBFile
 from lxml import etree
 def get_metadata(stream, extract_cover=True):
    """ Return metadata as a L{MetaInfo} object """
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    snbFile = SNBFile()
    try:
        if not hasattr(stream, 'write'):
            snbFile.Parse(StringIO(stream), True)
        else:
            stream.seek(0)
            snbFile.Parse(stream, True)
        meta = snbFile.GetFileStream('snbf/book.snbf')
        if meta != None:
            meta = etree.fromstring(meta)
            mi.title = meta.find('.//head/name').text
            mi.authors = [meta.find('.//head/author').text]
            mi.language = meta.find('.//head/language').text.lower().replace('_', '-')
            mi.publisher = meta.find('.//head/publisher').text
            if extract_cover:
                cover = meta.find('.//head/cover')
                if cover != None and cover.text != None:
                    root, ext = os.path.splitext(cover.text)
                    if ext == '.jpeg':
                        ext = '.jpg'
                    mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text))
    except Exception:
        import traceback
        traceback.print_exc()
    return mi
--- a/src/calibre/ebooks/snb/init.py
+++ b/src/calibre/ebooks/snb/init.py
@ -0,0 +1,9 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Used for snb output
 '''
--- a/src/calibre/ebooks/snb/input.py
+++ b/src/calibre/ebooks/snb/input.py
@ -0,0 +1,103 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
 __docformat__ = 'restructuredtext en'
 import os, uuid
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.oeb.base import DirContainer
 from calibre.ebooks.snb.snbfile import SNBFile
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.filenames import ascii_filename
 from lxml import etree
 HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
 def html_encode(s):
    return s.replace(u'&', u'&amp;').replace(u'<', u'&lt;').replace(u'>', u'&gt;').replace(u'"', u'&quot;').replace(u"'", u'&apos;').replace(u'\n', u'<br/>').replace(u' ', u'&nbsp;')
 class SNBInput(InputFormatPlugin):
    name        = 'SNB Input'
    author      = 'Li Fanxi'
    description = 'Convert SNB files to OEB'
    file_types  = set(['snb'])
    options = set([
    ])
    def convert(self, stream, options, file_ext, log,
                accelerators):
        log.debug("Parsing SNB file...")
        snbFile = SNBFile()
        try:
            snbFile.Parse(stream)
        except:
            raise ValueError("Invalid SNB file")
        if not snbFile.IsValid():
            log.debug("Invaild SNB file")
            raise ValueError("Invalid SNB file")
        log.debug("Handle meta data ...")
        from calibre.ebooks.conversion.plumber import create_oebbook
        oeb = create_oebbook(log, None, options, self,
                encoding=options.input_encoding, populate=False)
        meta = snbFile.GetFileStream('snbf/book.snbf')
        if meta != None:
            meta = etree.fromstring(meta)
            oeb.metadata.add('title', meta.find('.//head/name').text)
            oeb.metadata.add('creator', meta.find('.//head/author').text, attrib={'role':'aut'})
            oeb.metadata.add('language', meta.find('.//head/language').text.lower().replace('_', '-'))
            oeb.metadata.add('creator', meta.find('.//head/generator').text)
            oeb.metadata.add('publisher', meta.find('.//head/publisher').text)
            cover = meta.find('.//head/cover')
            if cover != None and cover.text != None:
                oeb.guide.add('cover', 'Cover', cover.text)
        bookid = str(uuid.uuid4())
        oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
        for ident in oeb.metadata.identifier:
            if 'id' in ident.attrib:
                oeb.uid = oeb.metadata.identifier[0]
                break
        with TemporaryDirectory('_chm2oeb', keep=True) as tdir:
            log.debug('Process TOC ...')
            toc = snbFile.GetFileStream('snbf/toc.snbf')
            oeb.container = DirContainer(tdir, log)
            if toc != None:
                toc = etree.fromstring(toc)
                i = 1
                for ch in toc.find('.//body'):
                    chapterName = ch.text
                    chapterSrc = ch.get('src')
                    fname = 'ch_%d.htm' % i
                    data = snbFile.GetFileStream('snbc/' + chapterSrc)
                    if data != None:
                        snbc = etree.fromstring(data)
                        outputFile = open(os.path.join(tdir, fname), 'wb')
                        lines = []
                        for line in snbc.find('.//body'):
                            if line.tag == 'text':
                                lines.append(u'<p>%s</p>' % html_encode(line.text))
                            elif line.tag == 'img':
                                lines.append(u'<p><img src="%s" /></p>' % html_encode(line.text))
                        outputFile.write((HTML_TEMPLATE % (chapterName, u'\n'.join(lines))).encode('utf-8', 'replace'))
                        outputFile.close()
                    oeb.toc.add(ch.text, fname)
                    id, href = oeb.manifest.generate(id='html',
                        href=ascii_filename(fname))
                    item = oeb.manifest.add(id, href, 'text/html')
                    item.html_input_href = fname
                    oeb.spine.add(item, True)
                    i = i + 1
                imageFiles = snbFile.OutputImageFiles(tdir)
                for f, m in imageFiles:
                    id, href = oeb.manifest.generate(id='image',
                        href=ascii_filename(f))
                    item = oeb.manifest.add(id, href, m)
                    item.html_input_href = f
        return oeb
--- a/src/calibre/ebooks/snb/output.py
+++ b/src/calibre/ebooks/snb/output.py
@ -0,0 +1,265 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
 __docformat__ = 'restructuredtext en'
 import os, string
 from lxml import etree
 from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
 from calibre.ptempfile import TemporaryDirectory
 from calibre.constants import __appname__, __version__
 from calibre.ebooks.snb.snbfile import SNBFile
 from calibre.ebooks.snb.snbml import SNBMLizer, ProcessFileName
 class SNBOutput(OutputFormatPlugin):
    name = 'SNB Output'
    author = 'Li Fanxi'
    file_type = 'snb'
    options = set([
        # OptionRecommendation(name='newline', recommended_value='system',
        #     level=OptionRecommendation.LOW,
        #     short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(),
        #     help=_('Type of newline to use. Options are %s. Default is \'system\'. '
        #         'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. '
        #         'For Mac OS X use \'unix\'. \'system\' will default to the newline '
        #         'type used by this OS.') % sorted(TxtNewlines.NEWLINE_TYPES.keys())),
        OptionRecommendation(name='snb_output_encoding', recommended_value='utf-8',
            level=OptionRecommendation.LOW,
            help=_('Specify the character encoding of the output document. ' \
            'The default is utf-8. Note: This option is not honored by all ' \
            'formats.')),
        # OptionRecommendation(name='inline_toc',
        #     recommended_value=False, level=OptionRecommendation.LOW,
        #     help=_('Add Table of Contents to beginning of the book.')),
        OptionRecommendation(name='snb_max_line_length',
            recommended_value=0, level=OptionRecommendation.LOW,
            help=_('The maximum number of characters per line. This splits on '
            'the first space before the specified value. If no space is found '
            'the line will be broken at the space after and will exceed the '
            'specified value. Also, there is a minimum of 25 characters. '
            'Use 0 to disable line splitting.')),
        # OptionRecommendation(name='force_max_line_length',
        #     recommended_value=False, level=OptionRecommendation.LOW,
        #     help=_('Force splitting on the max-line-length value when no space '
        #     'is present. Also allows max-line-length to be below the minimum')),
     ])
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        self.opts = opts
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
        try:
            rasterizer = SVGRasterizer()
            rasterizer(oeb_book, opts)
        except Unavailable:
            self.log.warn('SVG rasterizer unavailable, SVG will not be converted')
        # Create temp dir
        with TemporaryDirectory('_snb_output') as tdir:
            # Create stub directories
            snbfDir = os.path.join(tdir, 'snbf')
            snbcDir = os.path.join(tdir, 'snbc')
            snbiDir = os.path.join(tdir, 'snbc/images')
            os.mkdir(snbfDir)
            os.mkdir(snbcDir)
            os.mkdir(snbiDir)
            # Process Meta data
            meta = oeb_book.metadata
            if meta.title:
                title = unicode(meta.title[0])
            else:
                title = ''
            authors = [unicode(x) for x in meta.creator if x.role == 'aut']
            if meta.publisher:
                publishers = unicode(meta.publisher[0])
            else:
                publishers = ''
            if meta.language:
                lang = unicode(meta.language[0]).upper()
            else:
                lang = ''
            if meta.description:
                abstract = unicode(meta.description[0])
            else:
                abstract = ''
            # Process Cover
            g, m, s = oeb_book.guide, oeb_book.manifest, oeb_book.spine
            href = None
            if 'titlepage' not in g:
                if 'cover' in g:
                    href = g['cover'].href
            # Output book info file
            bookInfoTree = etree.Element("book-snbf", version="1.0")
            headTree = etree.SubElement(bookInfoTree, "head")
            etree.SubElement(headTree, "name").text = title
            etree.SubElement(headTree, "author").text = ' '.join(authors)
            etree.SubElement(headTree, "language").text = lang
            etree.SubElement(headTree, "rights")
            etree.SubElement(headTree, "publisher").text = publishers
            etree.SubElement(headTree, "generator").text = __appname__ + ' ' + __version__
            etree.SubElement(headTree, "created")
            etree.SubElement(headTree, "abstract").text = abstract
            if href != None:
                etree.SubElement(headTree, "cover").text = ProcessFileName(href)
            else:
                etree.SubElement(headTree, "cover")
            bookInfoFile = open(os.path.join(snbfDir, 'book.snbf'), 'wb')
            bookInfoFile.write(etree.tostring(bookInfoTree, pretty_print=True, encoding='utf-8'))
            bookInfoFile.close()
            # Output TOC
            tocInfoTree = etree.Element("toc-snbf")
            tocHead = etree.SubElement(tocInfoTree, "head")
            tocBody = etree.SubElement(tocInfoTree, "body")
            outputFiles = { }
            if oeb_book.toc.count() == 0:
                log.warn('This SNB file has no Table of Contents. '
                    'Creating a default TOC')
                first = iter(oeb_book.spine).next()
                oeb_book.toc.add(_('Start Page'), first.href)
            else:
                first = iter(oeb_book.spine).next()
                if oeb_book.toc[0].href != first.href:
                    # The pages before the fist item in toc will be stored as
                    # "Cover Pages".
                    # oeb_book.toc does not support "insert", so we generate
                    # the tocInfoTree directly instead of modifying the toc
                    ch = etree.SubElement(tocBody, "chapter")
                    ch.set("src", ProcessFileName(first.href) + ".snbc")
                    ch.text = _('Cover Pages')
                    outputFiles[first.href] = []
                    outputFiles[first.href].append(("", _("Cover Pages")))
            for tocitem in oeb_book.toc:
                if tocitem.href.find('#') != -1:
                    item = string.split(tocitem.href, '#')
                    if len(item) != 2:
                        log.error('Error in TOC item: %s' % tocitem)
                    else:
                        if item[0] in outputFiles:
                            outputFiles[item[0]].append((item[1], tocitem.title))
                        else:
                            outputFiles[item[0]] = []
                            if not "" in outputFiles[item[0]]:
                                outputFiles[item[0]].append(("", tocitem.title + _(" (Preface)")))
                                ch = etree.SubElement(tocBody, "chapter")
                                ch.set("src", ProcessFileName(item[0]) + ".snbc")
                                ch.text = tocitem.title + _(" (Preface)")
                            outputFiles[item[0]].append((item[1], tocitem.title))
                else:
                    if tocitem.href in outputFiles:
                        outputFiles[tocitem.href].append(("", tocitem.title))
                    else:
                        outputFiles[tocitem.href] = []
                        outputFiles[tocitem.href].append(("", tocitem.title))
                ch = etree.SubElement(tocBody, "chapter")
                ch.set("src", ProcessFileName(tocitem.href) + ".snbc")
                ch.text = tocitem.title
            etree.SubElement(tocHead, "chapters").text = '%d' % len(tocBody)
            tocInfoFile = open(os.path.join(snbfDir, 'toc.snbf'), 'wb')
            tocInfoFile.write(etree.tostring(tocInfoTree, pretty_print=True, encoding='utf-8'))
            tocInfoFile.close()
            # Output Files
            oldTree = None
            mergeLast = False
            lastName = None
            for item in s:
                from calibre.ebooks.oeb.base import OEB_DOCS, OEB_IMAGES
                if m.hrefs[item.href].media_type in OEB_DOCS:
                    if not item.href in outputFiles:
                        log.debug('File %s is unused in TOC. Continue in last chapter' % item.href)
                        mergeLast = True
                    else:
                        if oldTree != None and mergeLast:
                            log.debug('Output the modified chapter again: %s' % lastName)
                            outputFile = open(os.path.join(snbcDir, lastName), 'wb')
                            outputFile.write(etree.tostring(oldTree, pretty_print=True, encoding='utf-8'))
                            outputFile.close()
                            mergeLast = False
                    log.debug('Converting %s to snbc...' % item.href)
                    snbwriter = SNBMLizer(log)
                    snbcTrees = None
                    if not mergeLast:
                        snbcTrees = snbwriter.extract_content(oeb_book, item, outputFiles[item.href], opts)
                        for subName in snbcTrees:
                            postfix = ''
                            if subName != '':
                                postfix = '_' + subName
                            lastName = ProcessFileName(item.href + postfix + ".snbc")
                            oldTree = snbcTrees[subName]
                            outputFile = open(os.path.join(snbcDir, lastName), 'wb')
                            outputFile.write(etree.tostring(oldTree, pretty_print=True, encoding='utf-8'))
                            outputFile.close()
                    else:
                        log.debug('Merge %s with last TOC item...' % item.href)
                        snbwriter.merge_content(oldTree, oeb_book, item, [('', _("Start"))], opts)
            # Output the last one if needed
            log.debug('Output the last modified chapter again: %s' % lastName)
            if oldTree != None and mergeLast:
                outputFile = open(os.path.join(snbcDir, lastName), 'wb')
                outputFile.write(etree.tostring(oldTree, pretty_print=True, encoding='utf-8'))
                outputFile.close()
                mergeLast = False
            for item in m:
                if m.hrefs[item.href].media_type in OEB_IMAGES:
                    log.debug('Converting image: %s ...' % item.href)
                    content = m.hrefs[item.href].data
                    # Convert & Resize image
                    self.HandleImage(content, os.path.join(snbiDir, ProcessFileName(item.href)))
            # Package as SNB File
            snbFile = SNBFile()
            snbFile.FromDir(tdir)
            snbFile.Output(output_path)
    def HandleImage(self, imageData, imagePath):
        from calibre.utils.magick import Image
        img = Image()
        img.load(imageData)
        (x,y) = img.size
        if self.opts:
            SCREEN_Y, SCREEN_X = self.opts.output_profile.comic_screen_size
        else:
            SCREEN_X = 540
            SCREEN_Y = 700
        # Handle big image only
        if x > SCREEN_X or y > SCREEN_Y:
            xScale = float(x) / SCREEN_X
            yScale = float(y) / SCREEN_Y
            scale = max(xScale, yScale)
            # TODO : intelligent image rotation
            #     img = img.rotate(90)
            #     x,y = y,x
            img.size = (x / scale, y / scale)
        img.save(imagePath)
 if __name__ == '__main__':
    from calibre.ebooks.oeb.reader import OEBReader
    from calibre.ebooks.oeb.base import OEBBook
    from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
    from calibre.customize.profiles import HanlinV3Output
    class OptionValues(object):
        pass
    opts = OptionValues()
    opts.output_profile = HanlinV3Output(None)
    html_preprocessor = HTMLPreProcessor(None, None, opts)
    from calibre.utils.logging import default_log
    oeb = OEBBook(default_log, html_preprocessor)
    reader = OEBReader
    reader()(oeb, '/tmp/bbb/processed/')
    SNBOutput(None).convert(oeb, '/tmp/test.snb', None, None, default_log);
--- a/src/calibre/ebooks/snb/snbfile.py
+++ b/src/calibre/ebooks/snb/snbfile.py
@ -0,0 +1,319 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
 __docformat__ = 'restructuredtext en'
 import sys, struct, zlib, bz2, os
 from mimetypes import types_map
 class FileStream:
    def IsBinary(self):
        return self.attr & 0x41000000 != 0x41000000
 def compareFileStream(file1, file2):
    return cmp(file1.fileName, file2.fileName)
 class BlockData:
    pass
 class SNBFile:
    MAGIC = 'SNBP000B'
    REV80 = 0x00008000
    REVA3 = 0x00A3A3A3
    REVZ1 = 0x00000000
    REVZ2 = 0x00000000
    def __init__(self, inputFile = None):
        self.files = []
        self.blocks = []
        if inputFile != None:
            self.Open(inputFile)
    def Open(self, inputFile):
        self.fileName = inputFile
        snbFile = open(self.fileName, "rb")
        snbFile.seek(0)
        self.Parse(snbFile)
        snbFile.close()
    def Parse(self, snbFile, metaOnly = False):
        # Read header
        vmbr = snbFile.read(44)
        (self.magic, self.rev80, self.revA3, self.revZ1,
         self.fileCount, self.vfatSize, self.vfatCompressed,
         self.binStreamSize, self.plainStreamSizeUncompressed,
         self.revZ2) = struct.unpack('>8siiiiiiiii', vmbr)
        # Read FAT
        self.vfat = zlib.decompress(snbFile.read(self.vfatCompressed))
        self.ParseFile(self.vfat, self.fileCount)
        # Read tail
        snbFile.seek(-16, os.SEEK_END)
        #plainStreamEnd = snbFile.tell()
        tailblock = snbFile.read(16)
        (self.tailSize, self.tailOffset, self.tailMagic) = struct.unpack('>ii8s', tailblock)
        snbFile.seek(self.tailOffset)
        self.vTailUncompressed = zlib.decompress(snbFile.read(self.tailSize))
        self.tailSizeUncompressed = len(self.vTailUncompressed)
        self.ParseTail(self.vTailUncompressed, self.fileCount)
        # Uncompress file data
        # Read files
        binPos = 0
        plainPos = 0
        uncompressedData = None
        for f in self.files:
            if f.attr & 0x41000000 == 0x41000000:
                # Compressed Files
                if uncompressedData == None:
                    uncompressedData = ""
                    for i in range(self.plainBlock):
                        bzdc = bz2.BZ2Decompressor()
                        if (i < self.plainBlock - 1):
                            bSize = self.blocks[self.binBlock + i + 1].Offset - self.blocks[self.binBlock + i].Offset;
                        else:
                            bSize = self.tailOffset - self.blocks[self.binBlock + i].Offset;
                        snbFile.seek(self.blocks[self.binBlock + i].Offset);
                        try:
                            data = snbFile.read(bSize)
                            uncompressedData += bzdc.decompress(data)
                        except Exception, e:
                            print e
                f.fileBody = uncompressedData[plainPos:plainPos+f.fileSize]
                plainPos += f.fileSize
            elif f.attr & 0x01000000 == 0x01000000:
                # Binary Files
                snbFile.seek(44 + self.vfatCompressed + binPos)
                f.fileBody = snbFile.read(f.fileSize)
                binPos += f.fileSize
            else:
                print f.attr, f.fileName
                raise Exception("Invalid file")
    def ParseFile(self, vfat, fileCount):
        fileNames = vfat[fileCount*12:].split('\0');
        for i in range(fileCount):
            f = FileStream()
            (f.attr, f.fileNameOffset, f.fileSize) = struct.unpack('>iii', vfat[i * 12 : (i+1)*12])
            f.fileName = fileNames[i]
            self.files.append(f)
    def ParseTail(self, vtail, fileCount):
        self.binBlock = (self.binStreamSize + 0x8000 - 1) / 0x8000;
        self.plainBlock = (self.plainStreamSizeUncompressed + 0x8000 - 1) / 0x8000;
        for i in range(self.binBlock + self.plainBlock):
            block = BlockData()
            (block.Offset,) = struct.unpack('>i', vtail[i * 4 : (i+1) * 4])
            self.blocks.append(block)
        for i in range(fileCount):
            (self.files[i].blockIndex, self.files[i].contentOffset) = struct.unpack('>ii', vtail[(self.binBlock + self.plainBlock) * 4 + i * 8 : (self.binBlock + self.plainBlock) * 4 + (i+1) * 8])
    def IsValid(self):
        if self.magic != SNBFile.MAGIC:
            return False
        if self.rev80 != SNBFile.REV80:
            return False
        if self.revA3 != SNBFile.REVA3:
            return False
        if self.revZ1 != SNBFile.REVZ1:
            return False
        if self.revZ2 != SNBFile.REVZ2:
            return False
        if self.vfatSize != len(self.vfat):
            return False
        if self.fileCount != len(self.files):
            return False
        if (self.binBlock + self.plainBlock) * 4 + self.fileCount * 8 != self.tailSizeUncompressed:
            return False
        if self.tailMagic != SNBFile.MAGIC:
            print self.tailMagic
            return False
        return True
    def FromDir(self, tdir):
        for root, dirs, files in os.walk(tdir):
            for name in files:
                p, ext = os.path.splitext(name)
                if ext in [ ".snbf", ".snbc" ]:
                    self.AppendPlain(os.path.relpath(os.path.join(root, name), tdir), tdir)
                else:
                    self.AppendBinary(os.path.relpath(os.path.join(root, name), tdir), tdir)
    def AppendPlain(self, fileName, tdir):
        f = FileStream()
        f.attr = 0x41000000
        f.fileSize = os.path.getsize(os.path.join(tdir,fileName))
        f.fileBody = open(os.path.join(tdir,fileName), 'rb').read()
        f.fileName = fileName.replace(os.sep, '/')
        self.files.append(f)
    def AppendBinary(self, fileName, tdir):
        f = FileStream()
        f.attr = 0x01000000
        f.fileSize = os.path.getsize(os.path.join(tdir,fileName))
        f.fileBody = open(os.path.join(tdir,fileName), 'rb').read()
        f.fileName = fileName.replace(os.sep, '/')
        self.files.append(f)
    def GetFileStream(self, fileName):
        for file in self.files:
            if file.fileName == fileName:
                return file.fileBody
        return None
    def OutputImageFiles(self, path):
        fileNames = []
        for f in self.files:
            fname = os.path.basename(f.fileName)
            root, ext = os.path.splitext(fname)
            if ext in [ '.jpeg', '.jpg', '.gif', '.svg', '.png' ]:
                file = open(os.path.join(path, fname), 'wb')
                file.write(f.fileBody)
                file.close()
                fileNames.append((fname, types_map[ext]))
        return fileNames
    def Output(self, outputFile):
        # Sort the files in file buffer,
        # requried by the SNB file format
        self.files.sort(compareFileStream)
        outputFile = open(outputFile, 'wb')
        # File header part 1
        vmbrp1 = struct.pack('>8siiii', SNBFile.MAGIC, SNBFile.REV80, SNBFile.REVA3, SNBFile.REVZ1, len(self.files))
        # Create VFAT & file stream
        vfat = ''
        fileNameTable = ''
        plainStream = ''
        binStream = ''
        for f in self.files:
            vfat += struct.pack('>iii', f.attr, len(fileNameTable), f.fileSize);
            fileNameTable += (f.fileName + '\0')
            if f.attr & 0x41000000 == 0x41000000:
                # Plain Files
                f.contentOffset = len(plainStream)
                plainStream += f.fileBody
            elif f.attr & 0x01000000 == 0x01000000:
                # Binary Files
                f.contentOffset = len(binStream)
                binStream += f.fileBody
            else:
                print f.attr, f.fileName
                raise Exception("Unknown file type")
        vfatCompressed = zlib.compress(vfat+fileNameTable)
        # File header part 2
        vmbrp2 = struct.pack('>iiiii', len(vfat+fileNameTable), len(vfatCompressed), len(binStream), len(plainStream), SNBFile.REVZ2)
        # Write header
        outputFile.write(vmbrp1 + vmbrp2)
        # Write vfat
        outputFile.write(vfatCompressed)
        # Generate block information
        binBlockOffset = 0x2C + len(vfatCompressed)
        plainBlockOffset = binBlockOffset + len(binStream)
        binBlock = (len(binStream) + 0x8000 - 1) / 0x8000
        #plainBlock = (len(plainStream) + 0x8000 - 1) / 0x8000
        offset = 0
        tailBlock = ''
        for i in range(binBlock):
            tailBlock += struct.pack('>i', binBlockOffset + offset)
            offset += 0x8000;
        tailRec = ''
        for f in self.files:
            t = 0
            if f.IsBinary():
                t = 0
            else:
                t = binBlock
            tailRec += struct.pack('>ii', f.contentOffset / 0x8000 + t, f.contentOffset % 0x8000);
        # Write binary stream
        outputFile.write(binStream)
        # Write plain stream
        pos = 0
        offset = 0
        while pos < len(plainStream):
            tailBlock += struct.pack('>i', plainBlockOffset + offset);
            block = plainStream[pos:pos+0x8000];
            compressed = bz2.compress(block)
            outputFile.write(compressed)
            offset += len(compressed)
            pos += 0x8000
        # Write tail block
        compressedTail = zlib.compress(tailBlock + tailRec)
        outputFile.write(compressedTail)
        # Write tail pointer
        veom = struct.pack('>ii', len(compressedTail), plainBlockOffset + offset)
        outputFile.write(veom)
        # Write file end mark
        outputFile.write(SNBFile.MAGIC);
        # Close
        outputFile.close()
        return
    def Dump(self):
        if self.fileName:
            print "File Name:\t", self.fileName
        print "File Count:\t", self.fileCount
        print "VFAT Size(Compressed):\t%d(%d)" % (self.vfatSize, self.vfatCompressed)
        print "Binary Stream Size:\t", self.binStreamSize
        print "Plain Stream Uncompressed Size:\t", self.plainStreamSizeUncompressed
        print "Binary Block Count:\t", self.binBlock
        print "Plain Block Count:\t", self.plainBlock
        for i in range(self.fileCount):
            print "File ", i
            f = self.files[i]
            print "File Name: ", f.fileName
            print "File Attr: ", f.attr
            print "File Size: ", f.fileSize
            print "Block Index: ", f.blockIndex
            print "Content Offset: ", f.contentOffset
            tempFile = open("/tmp/" + f.fileName, 'wb')
            tempFile.write(f.fileBody)
            tempFile.close()
 def usage():
    print "This unit test is for INTERNAL usage only!"
    print "This unit test accept two parameters."
    print "python snbfile.py <INPUTFILE> <DESTFILE>"
    print "The input file will be extracted and write to dest file. "
    print "Meta data of the file will be shown during this process."
 def main():
    if len(sys.argv) != 3:
        usage()
        sys.exit(0)
    inputFile = sys.argv[1]
    outputFile = sys.argv[2]
    print "Input file: ", inputFile
    print "Output file: ", outputFile
    snbFile = SNBFile(inputFile)
    if snbFile.IsValid():
        snbFile.Dump()
        snbFile.Output(outputFile)
    else:
        print "The input file is invalid."
        return 1
    return 0
 if __name__ == "__main__":
    """SNB file unit test"""
    sys.exit(main())
--- a/src/calibre/ebooks/snb/snbml.py
+++ b/src/calibre/ebooks/snb/snbml.py
@ -0,0 +1,263 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Transform OEB content into SNB format
 '''
 import os
 import re
 from lxml import etree
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
 from calibre.ebooks.oeb.stylizer import Stylizer
 def ProcessFileName(fileName):
    # Flat the path
    fileName = fileName.replace("/", "_").replace(os.sep, "_")
    # Handle bookmark for HTML file
    fileName = fileName.replace("#", "_")
    # Make it lower case
    fileName = fileName.lower()
    # Change all images to jpg
    root, ext = os.path.splitext(fileName)
    if ext in [ '.jpeg', '.jpg', '.gif', '.svg', '.png' ]:
        fileName = root + '.jpg'
    return fileName
 BLOCK_TAGS = [
    'div',
    'p',
    'h1',
    'h2',
    'h3',
    'h4',
    'h5',
    'h6',
    'li',
    'tr',
 ]
 BLOCK_STYLES = [
    'block',
 ]
 SPACE_TAGS = [
    'td',
 ]
 CALIBRE_SNB_IMG_TAG = "<$$calibre_snb_temp_img$$>"
 CALIBRE_SNB_BM_TAG = "<$$calibre_snb_bm_tag$$>"
 CALIBRE_SNB_PRE_TAG = "<$$calibre_snb_pre_tag$$>"
 class SNBMLizer(object):
    curSubItem = ""
 #    curText = [ ]
    def __init__(self, log):
        self.log = log
    def extract_content(self, oeb_book, item, subitems, opts):
        self.log.info('Converting XHTML to SNBC...')
        self.oeb_book = oeb_book
        self.opts = opts
        self.item = item
        self.subitems = subitems
        return self.mlize();
    def merge_content(self, old_tree, oeb_book, item, subitems, opts):
        newTrees = self.extract_content(oeb_book, item, subitems, opts)
        body = old_tree.find(".//body")
        if body != None:
            for subName in newTrees:
                newbody = newTrees[subName].find(".//body")
                for entity in newbody:
                    body.append(entity)
    def mlize(self):
        output = [ u'' ]
        stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile)
        content = unicode(etree.tostring(self.item.data.find(XHTML('body')), encoding=unicode))
 #        content = self.remove_newlines(content)
        trees = { }
        for subitem, subtitle in self.subitems:
            snbcTree = etree.Element("snbc")
            etree.SubElement(etree.SubElement(snbcTree, "head"), "title").text = subtitle
            etree.SubElement(snbcTree, "body")
            trees[subitem] = snbcTree
        output.append(u'%s%s\n\n' % (CALIBRE_SNB_BM_TAG, ""))
        output += self.dump_text(self.subitems, etree.fromstring(content), stylizer)[0]
        output = self.cleanup_text(u''.join(output))
        subitem = ''
        for line in output.splitlines():
            if not line.find(CALIBRE_SNB_PRE_TAG) == 0:
                line = line.strip(u' \t\n\r\u3000')
            else:
                etree.SubElement(trees[subitem].find(".//body"), "text").text = \
                    etree.CDATA(line[len(CALIBRE_SNB_PRE_TAG):])
                continue
            if len(line) != 0:
                if line.find(CALIBRE_SNB_IMG_TAG) == 0:
                    prefix = ProcessFileName(os.path.dirname(self.item.href))
                    if prefix != '':
                        etree.SubElement(trees[subitem].find(".//body"), "img").text = \
                            prefix + '_' + line[len(CALIBRE_SNB_IMG_TAG):]
                    else:
                        etree.SubElement(trees[subitem].find(".//body"), "img").text = \
                            line[len(CALIBRE_SNB_IMG_TAG):]
                elif line.find(CALIBRE_SNB_BM_TAG) == 0:
                    subitem = line[len(CALIBRE_SNB_BM_TAG):]
                else:
                    etree.SubElement(trees[subitem].find(".//body"), "text").text = \
                        etree.CDATA(unicode(u'\u3000\u3000' + line))
        return trees
    def remove_newlines(self, text):
        self.log.debug('\tRemove newlines for processing...')
        text = text.replace('\r\n', ' ')
        text = text.replace('\n', ' ')
        text = text.replace('\r', ' ')
        return text
    def cleanup_text(self, text):
        self.log.debug('\tClean up text...')
        # Replace bad characters.
        text = text.replace(u'\xc2', '')
        text = text.replace(u'\xa0', ' ')
        text = text.replace(u'\xa9', '(C)')
        # Replace tabs, vertical tags and form feeds with single space.
        text = text.replace('\t+', ' ')
        text = text.replace('\v+', ' ')
        text = text.replace('\f+', ' ')
        # Single line paragraph.
        text = re.sub('(?<=.)%s(?=.)' % os.linesep, ' ', text)
        # Remove multiple spaces.
        #text = re.sub('[ ]{2,}', ' ', text)
        # Remove excessive newlines.
        text = re.sub('\n[ ]+\n', '\n\n', text)
        if self.opts.remove_paragraph_spacing:
            text = re.sub('\n{2,}', '\n', text)
            text = re.sub('(?imu)^(?=.)', '\t', text)
        else:
            text = re.sub('\n{3,}', '\n\n', text)
        # Replace spaces at the beginning and end of lines
        text = re.sub('(?imu)^[ ]+', '', text)
        text = re.sub('(?imu)[ ]+$', '', text)
        if self.opts.snb_max_line_length:
            max_length = self.opts.snb_max_line_length
            if self.opts.max_line_length < 25:# and not self.opts.force_max_line_length:
                max_length = 25
            short_lines = []
            lines = text.splitlines()
            for line in lines:
                while len(line) > max_length:
                    space = line.rfind(' ', 0, max_length)
                    if space != -1:
                        # Space was found.
                        short_lines.append(line[:space])
                        line = line[space + 1:]
                    else:
                        # Space was not found.
                        if False and self.opts.force_max_line_length:
                            # Force breaking at max_lenght.
                            short_lines.append(line[:max_length])
                            line = line[max_length:]
                        else:
                            # Look for the first space after max_length.
                            space = line.find(' ', max_length, len(line))
                            if space != -1:
                                # Space was found.
                                short_lines.append(line[:space])
                                line = line[space + 1:]
                            else:
                                # No space was found cannot break line.
                                short_lines.append(line)
                                line = ''
                # Add the text that was less than max_lengh to the list
                short_lines.append(line)
            text = '\n'.join(short_lines)
        return text
    def dump_text(self, subitems, elem, stylizer, end='', pre=False, li = ''):
        if not isinstance(elem.tag, basestring) \
           or namespace(elem.tag) != XHTML_NS:
            return ['']
        text = ['']
        style = stylizer.style(elem)
        if elem.attrib.get('id') != None and elem.attrib['id'] in [ href for href, title in subitems ]:
            if self.curSubItem != None and self.curSubItem != elem.attrib['id']:
                self.curSubItem = elem.attrib['id']
                text.append(u'\n\n%s%s\n\n' % (CALIBRE_SNB_BM_TAG, self.curSubItem))
        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
           or style['visibility'] == 'hidden':
            return ['']
        tag = barename(elem.tag)
        in_block = False
        # Are we in a paragraph block?
        if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
            in_block = True
            if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text:
                text.append(u'\n\n')
        if tag in SPACE_TAGS:
            if not end.endswith('u ') and hasattr(elem, 'text') and elem.text:
                text.append(u' ')
        if tag == 'img':
            text.append(u'\n\n%s%s\n\n' % (CALIBRE_SNB_IMG_TAG, ProcessFileName(elem.attrib['src'])))
        if tag == 'br':
            text.append(u'\n\n')
        if tag == 'li':
            li = '- '
        pre = (tag == 'pre' or pre)
        # Process tags that contain text.
        if hasattr(elem, 'text') and elem.text:
            if pre:
                text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG ).join((li + elem.text).splitlines()))
            else:
                text.append(li + elem.text)
            li = ''
        for item in elem:
            en = u''
            if len(text) >= 2:
                en = text[-1][-2:]
            t = self.dump_text(subitems, item, stylizer, en, pre, li)[0]
            text += t
        if in_block:
            text.append(u'\n\n')
        if hasattr(elem, 'tail') and elem.tail:
            if pre:
                text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG ).join(elem.tail.splitlines()))
            else:
                text.append(li + elem.tail)
            li = ''
        return text, li
--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@ -166,6 +166,7 @@ class AddAction(InterfaceAction):
                        (_('Topaz books'), ['tpz','azw1']),
                        (_('Text books'), ['txt', 'rtf']),
                        (_('PDF Books'), ['pdf']),
                        (_('SNB Books'), ['snb']),
                        (_('Comics'), ['cbz', 'cbr', 'cbc']),
                        (_('Archives'), ['zip', 'rar']),
                        ]
--- a/src/calibre/gui2/convert/snb_output.py
+++ b/src/calibre/gui2/convert/snb_output.py
@ -0,0 +1,35 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 from calibre.gui2.convert.snb_output_ui import Ui_Form
 from calibre.gui2.convert import Widget
 newline_model = None
 class PluginWidget(Widget, Ui_Form):
    TITLE = _('SNB Output')
    HELP = _('Options specific to')+' SNB '+_('output')
    COMMIT_NAME = 'snb_output'
    ICON = I('mimetypes/snb.png')
    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        Widget.__init__(self, parent,
        [])
        self.db, self.book_id = db, book_id
        self.initialize_options(get_option, get_help, db, book_id)
        # default = self.opt_newline.currentText()
        # global newline_model
        # if newline_model is None:
        #     newline_model = BasicComboModel(TxtNewlines.NEWLINE_TYPES.keys())
        # self.newline_model = newline_model
        # self.opt_newline.setModel(self.newline_model)
        # default_index = self.opt_newline.findText(default)
        # system_index = self.opt_newline.findText('system')
        # self.opt_newline.setCurrentIndex(default_index if default_index != -1 else system_index if system_index != -1 else 0)
--- a/src/calibre/gui2/convert/snb_output.ui
+++ b/src/calibre/gui2/convert/snb_output.ui
@ -0,0 +1,74 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <ui version="4.0">
 <class>Form</class>
 <widget class="QWidget" name="Form">
  <property name="geometry">
   <rect>
    <x>0</x>
    <y>0</y>
    <width>400</width>
    <height>300</height>
   </rect>
  </property>
  <property name="windowTitle">
   <string>Form</string>
  </property>
  <layout class="QGridLayout" name="gridLayout">
   <!-- <item row="0" column="0"> -->
   <!--  <widget class="QLabel" name="label"> -->
   <!--   <property name="text"> -->
   <!--    <string>&amp;Line ending style:</string> -->
   <!--   </property> -->
   <!--   <property name="buddy"> -->
   <!--    <cstring>opt_newline</cstring> -->
   <!--   </property> -->
   <!--  </widget> -->
   <!-- </item> -->
   <!-- <item row="0" column="1"> -->
   <!--  <widget class="QComboBox" name="opt_newline"/> -->
   <!-- </item> -->
   <!-- <item row="4" column="0"> -->
   <!--  <spacer name="verticalSpacer"> -->
   <!--   <property name="orientation"> -->
   <!--    <enum>Qt::Vertical</enum> -->
   <!--   </property> -->
   <!--   <property name="sizeHint" stdset="0"> -->
   <!--    <size> -->
   <!--     <width>20</width> -->
   <!--     <height>246</height> -->
   <!--    </size> -->
   <!--   </property> -->
   <!--  </spacer> -->
   <!-- </item> -->
   <!-- <item row="3" column="0" colspan="2"> -->
   <!--  <widget class="QCheckBox" name="opt_inline_toc"> -->
   <!--   <property name="text"> -->
   <!--    <string>&amp;Inline TOC</string> -->
   <!--   </property> -->
   <!--  </widget> -->
   <!-- </item> -->
   <!-- <item row="1" column="1"> -->
   <!--  <widget class="QSpinBox" name="opt_max_line_length"/> -->
   <!-- </item> -->
   <!-- <item row="1" column="0"> -->
   <!--  <widget class="QLabel" name="label_2"> -->
   <!--   <property name="text"> -->
   <!--    <string>&amp;Maximum line length:</string> -->
   <!--   </property> -->
   <!--   <property name="buddy"> -->
   <!--    <cstring>opt_max_line_length</cstring> -->
   <!--   </property> -->
   <!--  </widget> -->
   <!-- </item> -->
   <!-- <item row="2" column="0" colspan="2"> -->
   <!--  <widget class="QCheckBox" name="opt_force_max_line_length"> -->
   <!--   <property name="text"> -->
   <!--    <string>Force maximum line length</string> -->
   <!--   </property> -->
   <!--  </widget> -->
   <!-- </item> -->
  </layout>
 </widget>
 <resources/>
 <connections/>
 </ui>
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -20,9 +20,9 @@ What formats does |app| support conversion to/from?
 |app| supports the conversion of many input formats to many output formats.
 It can convert every input format in the following list, to every output format.
-*Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, LIT, LRF, MOBI, ODT, PDF, PRC**, PDB, PML, RB, RTF, TCR, TXT
+*Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, LIT, LRF, MOBI, ODT, PDF, PRC**, PDB, PML, RB, RTF, SNB, TCR, TXT
-*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, PDB, PML, RB, PDF, TCR, TXT
+*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, PDB, PML, RB, PDF, SNB, TCR, TXT
 ** PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers