Support for the SNB format used by the Bambook e-book reader

This commit is contained in:
Kovid Goyal 2010-10-17 10:17:59 -06:00
commit 88bc2991b2
15 changed files with 1150 additions and 5 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.1 KiB

View File

@ -292,6 +292,17 @@ class RTFMetadataReader(MetadataReaderPlugin):
def get_metadata(self, stream, ftype): def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.rtf import get_metadata from calibre.ebooks.metadata.rtf import get_metadata
return get_metadata(stream) return get_metadata(stream)
class SNBMetadataReader(MetadataReaderPlugin):
name = 'Read SNB metadata'
file_types = set(['snb'])
description = _('Read metadata from %s files') % 'SNB'
author = 'Li Fanxi'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.snb import get_metadata
return get_metadata(stream)
class TOPAZMetadataReader(MetadataReaderPlugin): class TOPAZMetadataReader(MetadataReaderPlugin):
@ -420,6 +431,7 @@ from calibre.ebooks.tcr.input import TCRInput
from calibre.ebooks.txt.input import TXTInput from calibre.ebooks.txt.input import TXTInput
from calibre.ebooks.lrf.input import LRFInput from calibre.ebooks.lrf.input import LRFInput
from calibre.ebooks.chm.input import CHMInput from calibre.ebooks.chm.input import CHMInput
from calibre.ebooks.snb.input import SNBInput
from calibre.ebooks.epub.output import EPUBOutput from calibre.ebooks.epub.output import EPUBOutput
from calibre.ebooks.fb2.output import FB2Output from calibre.ebooks.fb2.output import FB2Output
@ -434,6 +446,7 @@ from calibre.ebooks.rb.output import RBOutput
from calibre.ebooks.rtf.output import RTFOutput from calibre.ebooks.rtf.output import RTFOutput
from calibre.ebooks.tcr.output import TCROutput from calibre.ebooks.tcr.output import TCROutput
from calibre.ebooks.txt.output import TXTOutput from calibre.ebooks.txt.output import TXTOutput
from calibre.ebooks.snb.output import SNBOutput
from calibre.customize.profiles import input_profiles, output_profiles from calibre.customize.profiles import input_profiles, output_profiles
@ -495,6 +508,7 @@ plugins += [
TXTInput, TXTInput,
LRFInput, LRFInput,
CHMInput, CHMInput,
SNBInput,
] ]
plugins += [ plugins += [
EPUBOutput, EPUBOutput,
@ -510,6 +524,7 @@ plugins += [
RTFOutput, RTFOutput,
TCROutput, TCROutput,
TXTOutput, TXTOutput,
SNBOutput,
] ]
# Order here matters. The first matched device is the one used. # Order here matters. The first matched device is the one used.
plugins += [ plugins += [

View File

@ -647,11 +647,25 @@ class NookOutput(OutputProfile):
fbase = 16 fbase = 16
fsizes = [12, 12, 14, 16, 18, 20, 22, 24] fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
class BambookOutput(OutputProfile):
name = 'Sanda Bambook'
short_name = 'bambook'
description = _('This profile is intended for the Sanda Bambook.')
# Screen size is a best guess
screen_size = (800, 600)
comic_screen_size = (700, 540)
dpi = 168.451
fbase = 12
fsizes = [10, 12, 14, 16]
output_profiles = [OutputProfile, SonyReaderOutput, SonyReader300Output, output_profiles = [OutputProfile, SonyReaderOutput, SonyReader300Output,
SonyReader900Output, MSReaderOutput, MobipocketOutput, HanlinV3Output, SonyReader900Output, MSReaderOutput, MobipocketOutput, HanlinV3Output,
HanlinV5Output, CybookG3Output, CybookOpusOutput, KindleOutput, HanlinV5Output, CybookG3Output, CybookOpusOutput, KindleOutput,
iPadOutput, KoboReaderOutput, iPadOutput, KoboReaderOutput,
SonyReaderLandscapeOutput, KindleDXOutput, IlliadOutput, SonyReaderLandscapeOutput, KindleDXOutput, IlliadOutput,
IRexDR1000Output, IRexDR800Output, JetBook5Output, NookOutput,] IRexDR1000Output, IRexDR800Output, JetBook5Output, NookOutput,
BambookOutput, ]
output_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower())) output_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower()))

View File

@ -25,7 +25,7 @@ class DRMError(ValueError):
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm', BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc', 'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan'] 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan', 'snb']
class HTMLRenderer(object): class HTMLRenderer(object):

View File

@ -15,7 +15,7 @@ _METADATA_PRIORITIES = [
'html', 'htm', 'xhtml', 'xhtm', 'html', 'htm', 'xhtml', 'xhtm',
'rtf', 'fb2', 'pdf', 'prc', 'odt', 'rtf', 'fb2', 'pdf', 'prc', 'odt',
'epub', 'lit', 'lrx', 'lrf', 'mobi', 'epub', 'lit', 'lrx', 'lrf', 'mobi',
'rb', 'imp', 'azw' 'rb', 'imp', 'azw', 'snb'
] ]
# The priorities for loading metadata from different file types # The priorities for loading metadata from different file types

View File

@ -0,0 +1,47 @@
'''Read meta information from SNB files'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
import os
from StringIO import StringIO
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.snb.snbfile import SNBFile
from lxml import etree
def get_metadata(stream, extract_cover=True):
""" Return metadata as a L{MetaInfo} object """
mi = MetaInformation(_('Unknown'), [_('Unknown')])
snbFile = SNBFile()
try:
if not hasattr(stream, 'write'):
snbFile.Parse(StringIO(stream), True)
else:
stream.seek(0)
snbFile.Parse(stream, True)
meta = snbFile.GetFileStream('snbf/book.snbf')
if meta != None:
meta = etree.fromstring(meta)
mi.title = meta.find('.//head/name').text
mi.authors = [meta.find('.//head/author').text]
mi.language = meta.find('.//head/language').text.lower().replace('_', '-')
mi.publisher = meta.find('.//head/publisher').text
if extract_cover:
cover = meta.find('.//head/cover')
if cover != None and cover.text != None:
root, ext = os.path.splitext(cover.text)
if ext == '.jpeg':
ext = '.jpg'
mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text))
except Exception:
import traceback
traceback.print_exc()
return mi

View File

@ -0,0 +1,9 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
__docformat__ = 'restructuredtext en'
'''
Used for snb output
'''

103
src/calibre/ebooks/snb/input.py Executable file
View File

@ -0,0 +1,103 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
__docformat__ = 'restructuredtext en'
import os, uuid
from calibre.customize.conversion import InputFormatPlugin
from calibre.ebooks.oeb.base import DirContainer
from calibre.ebooks.snb.snbfile import SNBFile
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.filenames import ascii_filename
from lxml import etree
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
def html_encode(s):
return s.replace(u'&', u'&amp;').replace(u'<', u'&lt;').replace(u'>', u'&gt;').replace(u'"', u'&quot;').replace(u"'", u'&apos;').replace(u'\n', u'<br/>').replace(u' ', u'&nbsp;')
class SNBInput(InputFormatPlugin):
name = 'SNB Input'
author = 'Li Fanxi'
description = 'Convert SNB files to OEB'
file_types = set(['snb'])
options = set([
])
def convert(self, stream, options, file_ext, log,
accelerators):
log.debug("Parsing SNB file...")
snbFile = SNBFile()
try:
snbFile.Parse(stream)
except:
raise ValueError("Invalid SNB file")
if not snbFile.IsValid():
log.debug("Invaild SNB file")
raise ValueError("Invalid SNB file")
log.debug("Handle meta data ...")
from calibre.ebooks.conversion.plumber import create_oebbook
oeb = create_oebbook(log, None, options, self,
encoding=options.input_encoding, populate=False)
meta = snbFile.GetFileStream('snbf/book.snbf')
if meta != None:
meta = etree.fromstring(meta)
oeb.metadata.add('title', meta.find('.//head/name').text)
oeb.metadata.add('creator', meta.find('.//head/author').text, attrib={'role':'aut'})
oeb.metadata.add('language', meta.find('.//head/language').text.lower().replace('_', '-'))
oeb.metadata.add('creator', meta.find('.//head/generator').text)
oeb.metadata.add('publisher', meta.find('.//head/publisher').text)
cover = meta.find('.//head/cover')
if cover != None and cover.text != None:
oeb.guide.add('cover', 'Cover', cover.text)
bookid = str(uuid.uuid4())
oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
for ident in oeb.metadata.identifier:
if 'id' in ident.attrib:
oeb.uid = oeb.metadata.identifier[0]
break
with TemporaryDirectory('_chm2oeb', keep=True) as tdir:
log.debug('Process TOC ...')
toc = snbFile.GetFileStream('snbf/toc.snbf')
oeb.container = DirContainer(tdir, log)
if toc != None:
toc = etree.fromstring(toc)
i = 1
for ch in toc.find('.//body'):
chapterName = ch.text
chapterSrc = ch.get('src')
fname = 'ch_%d.htm' % i
data = snbFile.GetFileStream('snbc/' + chapterSrc)
if data != None:
snbc = etree.fromstring(data)
outputFile = open(os.path.join(tdir, fname), 'wb')
lines = []
for line in snbc.find('.//body'):
if line.tag == 'text':
lines.append(u'<p>%s</p>' % html_encode(line.text))
elif line.tag == 'img':
lines.append(u'<p><img src="%s" /></p>' % html_encode(line.text))
outputFile.write((HTML_TEMPLATE % (chapterName, u'\n'.join(lines))).encode('utf-8', 'replace'))
outputFile.close()
oeb.toc.add(ch.text, fname)
id, href = oeb.manifest.generate(id='html',
href=ascii_filename(fname))
item = oeb.manifest.add(id, href, 'text/html')
item.html_input_href = fname
oeb.spine.add(item, True)
i = i + 1
imageFiles = snbFile.OutputImageFiles(tdir)
for f, m in imageFiles:
id, href = oeb.manifest.generate(id='image',
href=ascii_filename(f))
item = oeb.manifest.add(id, href, m)
item.html_input_href = f
return oeb

View File

@ -0,0 +1,265 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
__docformat__ = 'restructuredtext en'
import os, string
from lxml import etree
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
from calibre.ptempfile import TemporaryDirectory
from calibre.constants import __appname__, __version__
from calibre.ebooks.snb.snbfile import SNBFile
from calibre.ebooks.snb.snbml import SNBMLizer, ProcessFileName
class SNBOutput(OutputFormatPlugin):
name = 'SNB Output'
author = 'Li Fanxi'
file_type = 'snb'
options = set([
# OptionRecommendation(name='newline', recommended_value='system',
# level=OptionRecommendation.LOW,
# short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(),
# help=_('Type of newline to use. Options are %s. Default is \'system\'. '
# 'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. '
# 'For Mac OS X use \'unix\'. \'system\' will default to the newline '
# 'type used by this OS.') % sorted(TxtNewlines.NEWLINE_TYPES.keys())),
OptionRecommendation(name='snb_output_encoding', recommended_value='utf-8',
level=OptionRecommendation.LOW,
help=_('Specify the character encoding of the output document. ' \
'The default is utf-8. Note: This option is not honored by all ' \
'formats.')),
# OptionRecommendation(name='inline_toc',
# recommended_value=False, level=OptionRecommendation.LOW,
# help=_('Add Table of Contents to beginning of the book.')),
OptionRecommendation(name='snb_max_line_length',
recommended_value=0, level=OptionRecommendation.LOW,
help=_('The maximum number of characters per line. This splits on '
'the first space before the specified value. If no space is found '
'the line will be broken at the space after and will exceed the '
'specified value. Also, there is a minimum of 25 characters. '
'Use 0 to disable line splitting.')),
# OptionRecommendation(name='force_max_line_length',
# recommended_value=False, level=OptionRecommendation.LOW,
# help=_('Force splitting on the max-line-length value when no space '
# 'is present. Also allows max-line-length to be below the minimum')),
])
def convert(self, oeb_book, output_path, input_plugin, opts, log):
self.opts = opts
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
try:
rasterizer = SVGRasterizer()
rasterizer(oeb_book, opts)
except Unavailable:
self.log.warn('SVG rasterizer unavailable, SVG will not be converted')
# Create temp dir
with TemporaryDirectory('_snb_output') as tdir:
# Create stub directories
snbfDir = os.path.join(tdir, 'snbf')
snbcDir = os.path.join(tdir, 'snbc')
snbiDir = os.path.join(tdir, 'snbc/images')
os.mkdir(snbfDir)
os.mkdir(snbcDir)
os.mkdir(snbiDir)
# Process Meta data
meta = oeb_book.metadata
if meta.title:
title = unicode(meta.title[0])
else:
title = ''
authors = [unicode(x) for x in meta.creator if x.role == 'aut']
if meta.publisher:
publishers = unicode(meta.publisher[0])
else:
publishers = ''
if meta.language:
lang = unicode(meta.language[0]).upper()
else:
lang = ''
if meta.description:
abstract = unicode(meta.description[0])
else:
abstract = ''
# Process Cover
g, m, s = oeb_book.guide, oeb_book.manifest, oeb_book.spine
href = None
if 'titlepage' not in g:
if 'cover' in g:
href = g['cover'].href
# Output book info file
bookInfoTree = etree.Element("book-snbf", version="1.0")
headTree = etree.SubElement(bookInfoTree, "head")
etree.SubElement(headTree, "name").text = title
etree.SubElement(headTree, "author").text = ' '.join(authors)
etree.SubElement(headTree, "language").text = lang
etree.SubElement(headTree, "rights")
etree.SubElement(headTree, "publisher").text = publishers
etree.SubElement(headTree, "generator").text = __appname__ + ' ' + __version__
etree.SubElement(headTree, "created")
etree.SubElement(headTree, "abstract").text = abstract
if href != None:
etree.SubElement(headTree, "cover").text = ProcessFileName(href)
else:
etree.SubElement(headTree, "cover")
bookInfoFile = open(os.path.join(snbfDir, 'book.snbf'), 'wb')
bookInfoFile.write(etree.tostring(bookInfoTree, pretty_print=True, encoding='utf-8'))
bookInfoFile.close()
# Output TOC
tocInfoTree = etree.Element("toc-snbf")
tocHead = etree.SubElement(tocInfoTree, "head")
tocBody = etree.SubElement(tocInfoTree, "body")
outputFiles = { }
if oeb_book.toc.count() == 0:
log.warn('This SNB file has no Table of Contents. '
'Creating a default TOC')
first = iter(oeb_book.spine).next()
oeb_book.toc.add(_('Start Page'), first.href)
else:
first = iter(oeb_book.spine).next()
if oeb_book.toc[0].href != first.href:
# The pages before the fist item in toc will be stored as
# "Cover Pages".
# oeb_book.toc does not support "insert", so we generate
# the tocInfoTree directly instead of modifying the toc
ch = etree.SubElement(tocBody, "chapter")
ch.set("src", ProcessFileName(first.href) + ".snbc")
ch.text = _('Cover Pages')
outputFiles[first.href] = []
outputFiles[first.href].append(("", _("Cover Pages")))
for tocitem in oeb_book.toc:
if tocitem.href.find('#') != -1:
item = string.split(tocitem.href, '#')
if len(item) != 2:
log.error('Error in TOC item: %s' % tocitem)
else:
if item[0] in outputFiles:
outputFiles[item[0]].append((item[1], tocitem.title))
else:
outputFiles[item[0]] = []
if not "" in outputFiles[item[0]]:
outputFiles[item[0]].append(("", tocitem.title + _(" (Preface)")))
ch = etree.SubElement(tocBody, "chapter")
ch.set("src", ProcessFileName(item[0]) + ".snbc")
ch.text = tocitem.title + _(" (Preface)")
outputFiles[item[0]].append((item[1], tocitem.title))
else:
if tocitem.href in outputFiles:
outputFiles[tocitem.href].append(("", tocitem.title))
else:
outputFiles[tocitem.href] = []
outputFiles[tocitem.href].append(("", tocitem.title))
ch = etree.SubElement(tocBody, "chapter")
ch.set("src", ProcessFileName(tocitem.href) + ".snbc")
ch.text = tocitem.title
etree.SubElement(tocHead, "chapters").text = '%d' % len(tocBody)
tocInfoFile = open(os.path.join(snbfDir, 'toc.snbf'), 'wb')
tocInfoFile.write(etree.tostring(tocInfoTree, pretty_print=True, encoding='utf-8'))
tocInfoFile.close()
# Output Files
oldTree = None
mergeLast = False
lastName = None
for item in s:
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_IMAGES
if m.hrefs[item.href].media_type in OEB_DOCS:
if not item.href in outputFiles:
log.debug('File %s is unused in TOC. Continue in last chapter' % item.href)
mergeLast = True
else:
if oldTree != None and mergeLast:
log.debug('Output the modified chapter again: %s' % lastName)
outputFile = open(os.path.join(snbcDir, lastName), 'wb')
outputFile.write(etree.tostring(oldTree, pretty_print=True, encoding='utf-8'))
outputFile.close()
mergeLast = False
log.debug('Converting %s to snbc...' % item.href)
snbwriter = SNBMLizer(log)
snbcTrees = None
if not mergeLast:
snbcTrees = snbwriter.extract_content(oeb_book, item, outputFiles[item.href], opts)
for subName in snbcTrees:
postfix = ''
if subName != '':
postfix = '_' + subName
lastName = ProcessFileName(item.href + postfix + ".snbc")
oldTree = snbcTrees[subName]
outputFile = open(os.path.join(snbcDir, lastName), 'wb')
outputFile.write(etree.tostring(oldTree, pretty_print=True, encoding='utf-8'))
outputFile.close()
else:
log.debug('Merge %s with last TOC item...' % item.href)
snbwriter.merge_content(oldTree, oeb_book, item, [('', _("Start"))], opts)
# Output the last one if needed
log.debug('Output the last modified chapter again: %s' % lastName)
if oldTree != None and mergeLast:
outputFile = open(os.path.join(snbcDir, lastName), 'wb')
outputFile.write(etree.tostring(oldTree, pretty_print=True, encoding='utf-8'))
outputFile.close()
mergeLast = False
for item in m:
if m.hrefs[item.href].media_type in OEB_IMAGES:
log.debug('Converting image: %s ...' % item.href)
content = m.hrefs[item.href].data
# Convert & Resize image
self.HandleImage(content, os.path.join(snbiDir, ProcessFileName(item.href)))
# Package as SNB File
snbFile = SNBFile()
snbFile.FromDir(tdir)
snbFile.Output(output_path)
def HandleImage(self, imageData, imagePath):
from calibre.utils.magick import Image
img = Image()
img.load(imageData)
(x,y) = img.size
if self.opts:
SCREEN_Y, SCREEN_X = self.opts.output_profile.comic_screen_size
else:
SCREEN_X = 540
SCREEN_Y = 700
# Handle big image only
if x > SCREEN_X or y > SCREEN_Y:
xScale = float(x) / SCREEN_X
yScale = float(y) / SCREEN_Y
scale = max(xScale, yScale)
# TODO : intelligent image rotation
# img = img.rotate(90)
# x,y = y,x
img.size = (x / scale, y / scale)
img.save(imagePath)
if __name__ == '__main__':
from calibre.ebooks.oeb.reader import OEBReader
from calibre.ebooks.oeb.base import OEBBook
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
from calibre.customize.profiles import HanlinV3Output
class OptionValues(object):
pass
opts = OptionValues()
opts.output_profile = HanlinV3Output(None)
html_preprocessor = HTMLPreProcessor(None, None, opts)
from calibre.utils.logging import default_log
oeb = OEBBook(default_log, html_preprocessor)
reader = OEBReader
reader()(oeb, '/tmp/bbb/processed/')
SNBOutput(None).convert(oeb, '/tmp/test.snb', None, None, default_log);

View File

@ -0,0 +1,319 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
__docformat__ = 'restructuredtext en'
import sys, struct, zlib, bz2, os
from mimetypes import types_map
class FileStream:
def IsBinary(self):
return self.attr & 0x41000000 != 0x41000000
def compareFileStream(file1, file2):
return cmp(file1.fileName, file2.fileName)
class BlockData:
pass
class SNBFile:
MAGIC = 'SNBP000B'
REV80 = 0x00008000
REVA3 = 0x00A3A3A3
REVZ1 = 0x00000000
REVZ2 = 0x00000000
def __init__(self, inputFile = None):
self.files = []
self.blocks = []
if inputFile != None:
self.Open(inputFile)
def Open(self, inputFile):
self.fileName = inputFile
snbFile = open(self.fileName, "rb")
snbFile.seek(0)
self.Parse(snbFile)
snbFile.close()
def Parse(self, snbFile, metaOnly = False):
# Read header
vmbr = snbFile.read(44)
(self.magic, self.rev80, self.revA3, self.revZ1,
self.fileCount, self.vfatSize, self.vfatCompressed,
self.binStreamSize, self.plainStreamSizeUncompressed,
self.revZ2) = struct.unpack('>8siiiiiiiii', vmbr)
# Read FAT
self.vfat = zlib.decompress(snbFile.read(self.vfatCompressed))
self.ParseFile(self.vfat, self.fileCount)
# Read tail
snbFile.seek(-16, os.SEEK_END)
#plainStreamEnd = snbFile.tell()
tailblock = snbFile.read(16)
(self.tailSize, self.tailOffset, self.tailMagic) = struct.unpack('>ii8s', tailblock)
snbFile.seek(self.tailOffset)
self.vTailUncompressed = zlib.decompress(snbFile.read(self.tailSize))
self.tailSizeUncompressed = len(self.vTailUncompressed)
self.ParseTail(self.vTailUncompressed, self.fileCount)
# Uncompress file data
# Read files
binPos = 0
plainPos = 0
uncompressedData = None
for f in self.files:
if f.attr & 0x41000000 == 0x41000000:
# Compressed Files
if uncompressedData == None:
uncompressedData = ""
for i in range(self.plainBlock):
bzdc = bz2.BZ2Decompressor()
if (i < self.plainBlock - 1):
bSize = self.blocks[self.binBlock + i + 1].Offset - self.blocks[self.binBlock + i].Offset;
else:
bSize = self.tailOffset - self.blocks[self.binBlock + i].Offset;
snbFile.seek(self.blocks[self.binBlock + i].Offset);
try:
data = snbFile.read(bSize)
uncompressedData += bzdc.decompress(data)
except Exception, e:
print e
f.fileBody = uncompressedData[plainPos:plainPos+f.fileSize]
plainPos += f.fileSize
elif f.attr & 0x01000000 == 0x01000000:
# Binary Files
snbFile.seek(44 + self.vfatCompressed + binPos)
f.fileBody = snbFile.read(f.fileSize)
binPos += f.fileSize
else:
print f.attr, f.fileName
raise Exception("Invalid file")
def ParseFile(self, vfat, fileCount):
fileNames = vfat[fileCount*12:].split('\0');
for i in range(fileCount):
f = FileStream()
(f.attr, f.fileNameOffset, f.fileSize) = struct.unpack('>iii', vfat[i * 12 : (i+1)*12])
f.fileName = fileNames[i]
self.files.append(f)
def ParseTail(self, vtail, fileCount):
self.binBlock = (self.binStreamSize + 0x8000 - 1) / 0x8000;
self.plainBlock = (self.plainStreamSizeUncompressed + 0x8000 - 1) / 0x8000;
for i in range(self.binBlock + self.plainBlock):
block = BlockData()
(block.Offset,) = struct.unpack('>i', vtail[i * 4 : (i+1) * 4])
self.blocks.append(block)
for i in range(fileCount):
(self.files[i].blockIndex, self.files[i].contentOffset) = struct.unpack('>ii', vtail[(self.binBlock + self.plainBlock) * 4 + i * 8 : (self.binBlock + self.plainBlock) * 4 + (i+1) * 8])
def IsValid(self):
if self.magic != SNBFile.MAGIC:
return False
if self.rev80 != SNBFile.REV80:
return False
if self.revA3 != SNBFile.REVA3:
return False
if self.revZ1 != SNBFile.REVZ1:
return False
if self.revZ2 != SNBFile.REVZ2:
return False
if self.vfatSize != len(self.vfat):
return False
if self.fileCount != len(self.files):
return False
if (self.binBlock + self.plainBlock) * 4 + self.fileCount * 8 != self.tailSizeUncompressed:
return False
if self.tailMagic != SNBFile.MAGIC:
print self.tailMagic
return False
return True
def FromDir(self, tdir):
for root, dirs, files in os.walk(tdir):
for name in files:
p, ext = os.path.splitext(name)
if ext in [ ".snbf", ".snbc" ]:
self.AppendPlain(os.path.relpath(os.path.join(root, name), tdir), tdir)
else:
self.AppendBinary(os.path.relpath(os.path.join(root, name), tdir), tdir)
def AppendPlain(self, fileName, tdir):
f = FileStream()
f.attr = 0x41000000
f.fileSize = os.path.getsize(os.path.join(tdir,fileName))
f.fileBody = open(os.path.join(tdir,fileName), 'rb').read()
f.fileName = fileName.replace(os.sep, '/')
self.files.append(f)
def AppendBinary(self, fileName, tdir):
f = FileStream()
f.attr = 0x01000000
f.fileSize = os.path.getsize(os.path.join(tdir,fileName))
f.fileBody = open(os.path.join(tdir,fileName), 'rb').read()
f.fileName = fileName.replace(os.sep, '/')
self.files.append(f)
def GetFileStream(self, fileName):
for file in self.files:
if file.fileName == fileName:
return file.fileBody
return None
def OutputImageFiles(self, path):
fileNames = []
for f in self.files:
fname = os.path.basename(f.fileName)
root, ext = os.path.splitext(fname)
if ext in [ '.jpeg', '.jpg', '.gif', '.svg', '.png' ]:
file = open(os.path.join(path, fname), 'wb')
file.write(f.fileBody)
file.close()
fileNames.append((fname, types_map[ext]))
return fileNames
def Output(self, outputFile):
# Sort the files in file buffer,
# requried by the SNB file format
self.files.sort(compareFileStream)
outputFile = open(outputFile, 'wb')
# File header part 1
vmbrp1 = struct.pack('>8siiii', SNBFile.MAGIC, SNBFile.REV80, SNBFile.REVA3, SNBFile.REVZ1, len(self.files))
# Create VFAT & file stream
vfat = ''
fileNameTable = ''
plainStream = ''
binStream = ''
for f in self.files:
vfat += struct.pack('>iii', f.attr, len(fileNameTable), f.fileSize);
fileNameTable += (f.fileName + '\0')
if f.attr & 0x41000000 == 0x41000000:
# Plain Files
f.contentOffset = len(plainStream)
plainStream += f.fileBody
elif f.attr & 0x01000000 == 0x01000000:
# Binary Files
f.contentOffset = len(binStream)
binStream += f.fileBody
else:
print f.attr, f.fileName
raise Exception("Unknown file type")
vfatCompressed = zlib.compress(vfat+fileNameTable)
# File header part 2
vmbrp2 = struct.pack('>iiiii', len(vfat+fileNameTable), len(vfatCompressed), len(binStream), len(plainStream), SNBFile.REVZ2)
# Write header
outputFile.write(vmbrp1 + vmbrp2)
# Write vfat
outputFile.write(vfatCompressed)
# Generate block information
binBlockOffset = 0x2C + len(vfatCompressed)
plainBlockOffset = binBlockOffset + len(binStream)
binBlock = (len(binStream) + 0x8000 - 1) / 0x8000
#plainBlock = (len(plainStream) + 0x8000 - 1) / 0x8000
offset = 0
tailBlock = ''
for i in range(binBlock):
tailBlock += struct.pack('>i', binBlockOffset + offset)
offset += 0x8000;
tailRec = ''
for f in self.files:
t = 0
if f.IsBinary():
t = 0
else:
t = binBlock
tailRec += struct.pack('>ii', f.contentOffset / 0x8000 + t, f.contentOffset % 0x8000);
# Write binary stream
outputFile.write(binStream)
# Write plain stream
pos = 0
offset = 0
while pos < len(plainStream):
tailBlock += struct.pack('>i', plainBlockOffset + offset);
block = plainStream[pos:pos+0x8000];
compressed = bz2.compress(block)
outputFile.write(compressed)
offset += len(compressed)
pos += 0x8000
# Write tail block
compressedTail = zlib.compress(tailBlock + tailRec)
outputFile.write(compressedTail)
# Write tail pointer
veom = struct.pack('>ii', len(compressedTail), plainBlockOffset + offset)
outputFile.write(veom)
# Write file end mark
outputFile.write(SNBFile.MAGIC);
# Close
outputFile.close()
return
def Dump(self):
if self.fileName:
print "File Name:\t", self.fileName
print "File Count:\t", self.fileCount
print "VFAT Size(Compressed):\t%d(%d)" % (self.vfatSize, self.vfatCompressed)
print "Binary Stream Size:\t", self.binStreamSize
print "Plain Stream Uncompressed Size:\t", self.plainStreamSizeUncompressed
print "Binary Block Count:\t", self.binBlock
print "Plain Block Count:\t", self.plainBlock
for i in range(self.fileCount):
print "File ", i
f = self.files[i]
print "File Name: ", f.fileName
print "File Attr: ", f.attr
print "File Size: ", f.fileSize
print "Block Index: ", f.blockIndex
print "Content Offset: ", f.contentOffset
tempFile = open("/tmp/" + f.fileName, 'wb')
tempFile.write(f.fileBody)
tempFile.close()
def usage():
print "This unit test is for INTERNAL usage only!"
print "This unit test accept two parameters."
print "python snbfile.py <INPUTFILE> <DESTFILE>"
print "The input file will be extracted and write to dest file. "
print "Meta data of the file will be shown during this process."
def main():
if len(sys.argv) != 3:
usage()
sys.exit(0)
inputFile = sys.argv[1]
outputFile = sys.argv[2]
print "Input file: ", inputFile
print "Output file: ", outputFile
snbFile = SNBFile(inputFile)
if snbFile.IsValid():
snbFile.Dump()
snbFile.Output(outputFile)
else:
print "The input file is invalid."
return 1
return 0
if __name__ == "__main__":
"""SNB file unit test"""
sys.exit(main())

View File

@ -0,0 +1,263 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
__docformat__ = 'restructuredtext en'
'''
Transform OEB content into SNB format
'''
import os
import re
from lxml import etree
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
from calibre.ebooks.oeb.stylizer import Stylizer
def ProcessFileName(fileName):
# Flat the path
fileName = fileName.replace("/", "_").replace(os.sep, "_")
# Handle bookmark for HTML file
fileName = fileName.replace("#", "_")
# Make it lower case
fileName = fileName.lower()
# Change all images to jpg
root, ext = os.path.splitext(fileName)
if ext in [ '.jpeg', '.jpg', '.gif', '.svg', '.png' ]:
fileName = root + '.jpg'
return fileName
BLOCK_TAGS = [
'div',
'p',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'li',
'tr',
]
BLOCK_STYLES = [
'block',
]
SPACE_TAGS = [
'td',
]
CALIBRE_SNB_IMG_TAG = "<$$calibre_snb_temp_img$$>"
CALIBRE_SNB_BM_TAG = "<$$calibre_snb_bm_tag$$>"
CALIBRE_SNB_PRE_TAG = "<$$calibre_snb_pre_tag$$>"
class SNBMLizer(object):
curSubItem = ""
# curText = [ ]
def __init__(self, log):
self.log = log
def extract_content(self, oeb_book, item, subitems, opts):
self.log.info('Converting XHTML to SNBC...')
self.oeb_book = oeb_book
self.opts = opts
self.item = item
self.subitems = subitems
return self.mlize();
def merge_content(self, old_tree, oeb_book, item, subitems, opts):
newTrees = self.extract_content(oeb_book, item, subitems, opts)
body = old_tree.find(".//body")
if body != None:
for subName in newTrees:
newbody = newTrees[subName].find(".//body")
for entity in newbody:
body.append(entity)
def mlize(self):
output = [ u'' ]
stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile)
content = unicode(etree.tostring(self.item.data.find(XHTML('body')), encoding=unicode))
# content = self.remove_newlines(content)
trees = { }
for subitem, subtitle in self.subitems:
snbcTree = etree.Element("snbc")
etree.SubElement(etree.SubElement(snbcTree, "head"), "title").text = subtitle
etree.SubElement(snbcTree, "body")
trees[subitem] = snbcTree
output.append(u'%s%s\n\n' % (CALIBRE_SNB_BM_TAG, ""))
output += self.dump_text(self.subitems, etree.fromstring(content), stylizer)[0]
output = self.cleanup_text(u''.join(output))
subitem = ''
for line in output.splitlines():
if not line.find(CALIBRE_SNB_PRE_TAG) == 0:
line = line.strip(u' \t\n\r\u3000')
else:
etree.SubElement(trees[subitem].find(".//body"), "text").text = \
etree.CDATA(line[len(CALIBRE_SNB_PRE_TAG):])
continue
if len(line) != 0:
if line.find(CALIBRE_SNB_IMG_TAG) == 0:
prefix = ProcessFileName(os.path.dirname(self.item.href))
if prefix != '':
etree.SubElement(trees[subitem].find(".//body"), "img").text = \
prefix + '_' + line[len(CALIBRE_SNB_IMG_TAG):]
else:
etree.SubElement(trees[subitem].find(".//body"), "img").text = \
line[len(CALIBRE_SNB_IMG_TAG):]
elif line.find(CALIBRE_SNB_BM_TAG) == 0:
subitem = line[len(CALIBRE_SNB_BM_TAG):]
else:
etree.SubElement(trees[subitem].find(".//body"), "text").text = \
etree.CDATA(unicode(u'\u3000\u3000' + line))
return trees
def remove_newlines(self, text):
self.log.debug('\tRemove newlines for processing...')
text = text.replace('\r\n', ' ')
text = text.replace('\n', ' ')
text = text.replace('\r', ' ')
return text
def cleanup_text(self, text):
self.log.debug('\tClean up text...')
# Replace bad characters.
text = text.replace(u'\xc2', '')
text = text.replace(u'\xa0', ' ')
text = text.replace(u'\xa9', '(C)')
# Replace tabs, vertical tags and form feeds with single space.
text = text.replace('\t+', ' ')
text = text.replace('\v+', ' ')
text = text.replace('\f+', ' ')
# Single line paragraph.
text = re.sub('(?<=.)%s(?=.)' % os.linesep, ' ', text)
# Remove multiple spaces.
#text = re.sub('[ ]{2,}', ' ', text)
# Remove excessive newlines.
text = re.sub('\n[ ]+\n', '\n\n', text)
if self.opts.remove_paragraph_spacing:
text = re.sub('\n{2,}', '\n', text)
text = re.sub('(?imu)^(?=.)', '\t', text)
else:
text = re.sub('\n{3,}', '\n\n', text)
# Replace spaces at the beginning and end of lines
text = re.sub('(?imu)^[ ]+', '', text)
text = re.sub('(?imu)[ ]+$', '', text)
if self.opts.snb_max_line_length:
max_length = self.opts.snb_max_line_length
if self.opts.max_line_length < 25:# and not self.opts.force_max_line_length:
max_length = 25
short_lines = []
lines = text.splitlines()
for line in lines:
while len(line) > max_length:
space = line.rfind(' ', 0, max_length)
if space != -1:
# Space was found.
short_lines.append(line[:space])
line = line[space + 1:]
else:
# Space was not found.
if False and self.opts.force_max_line_length:
# Force breaking at max_lenght.
short_lines.append(line[:max_length])
line = line[max_length:]
else:
# Look for the first space after max_length.
space = line.find(' ', max_length, len(line))
if space != -1:
# Space was found.
short_lines.append(line[:space])
line = line[space + 1:]
else:
# No space was found cannot break line.
short_lines.append(line)
line = ''
# Add the text that was less than max_lengh to the list
short_lines.append(line)
text = '\n'.join(short_lines)
return text
def dump_text(self, subitems, elem, stylizer, end='', pre=False, li = ''):
if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS:
return ['']
text = ['']
style = stylizer.style(elem)
if elem.attrib.get('id') != None and elem.attrib['id'] in [ href for href, title in subitems ]:
if self.curSubItem != None and self.curSubItem != elem.attrib['id']:
self.curSubItem = elem.attrib['id']
text.append(u'\n\n%s%s\n\n' % (CALIBRE_SNB_BM_TAG, self.curSubItem))
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
or style['visibility'] == 'hidden':
return ['']
tag = barename(elem.tag)
in_block = False
# Are we in a paragraph block?
if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
in_block = True
if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text:
text.append(u'\n\n')
if tag in SPACE_TAGS:
if not end.endswith('u ') and hasattr(elem, 'text') and elem.text:
text.append(u' ')
if tag == 'img':
text.append(u'\n\n%s%s\n\n' % (CALIBRE_SNB_IMG_TAG, ProcessFileName(elem.attrib['src'])))
if tag == 'br':
text.append(u'\n\n')
if tag == 'li':
li = '- '
pre = (tag == 'pre' or pre)
# Process tags that contain text.
if hasattr(elem, 'text') and elem.text:
if pre:
text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG ).join((li + elem.text).splitlines()))
else:
text.append(li + elem.text)
li = ''
for item in elem:
en = u''
if len(text) >= 2:
en = text[-1][-2:]
t = self.dump_text(subitems, item, stylizer, en, pre, li)[0]
text += t
if in_block:
text.append(u'\n\n')
if hasattr(elem, 'tail') and elem.tail:
if pre:
text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG ).join(elem.tail.splitlines()))
else:
text.append(li + elem.tail)
li = ''
return text, li

View File

@ -166,6 +166,7 @@ class AddAction(InterfaceAction):
(_('Topaz books'), ['tpz','azw1']), (_('Topaz books'), ['tpz','azw1']),
(_('Text books'), ['txt', 'rtf']), (_('Text books'), ['txt', 'rtf']),
(_('PDF Books'), ['pdf']), (_('PDF Books'), ['pdf']),
(_('SNB Books'), ['snb']),
(_('Comics'), ['cbz', 'cbr', 'cbc']), (_('Comics'), ['cbz', 'cbr', 'cbc']),
(_('Archives'), ['zip', 'rar']), (_('Archives'), ['zip', 'rar']),
] ]

View File

@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
from calibre.gui2.convert.snb_output_ui import Ui_Form
from calibre.gui2.convert import Widget
newline_model = None
class PluginWidget(Widget, Ui_Form):
TITLE = _('SNB Output')
HELP = _('Options specific to')+' SNB '+_('output')
COMMIT_NAME = 'snb_output'
ICON = I('mimetypes/snb.png')
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent,
[])
self.db, self.book_id = db, book_id
self.initialize_options(get_option, get_help, db, book_id)
# default = self.opt_newline.currentText()
# global newline_model
# if newline_model is None:
# newline_model = BasicComboModel(TxtNewlines.NEWLINE_TYPES.keys())
# self.newline_model = newline_model
# self.opt_newline.setModel(self.newline_model)
# default_index = self.opt_newline.findText(default)
# system_index = self.opt_newline.findText('system')
# self.opt_newline.setCurrentIndex(default_index if default_index != -1 else system_index if system_index != -1 else 0)

View File

@ -0,0 +1,74 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>Form</class>
<widget class="QWidget" name="Form">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>400</width>
<height>300</height>
</rect>
</property>
<property name="windowTitle">
<string>Form</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<!-- <item row="0" column="0"> -->
<!-- <widget class="QLabel" name="label"> -->
<!-- <property name="text"> -->
<!-- <string>&amp;Line ending style:</string> -->
<!-- </property> -->
<!-- <property name="buddy"> -->
<!-- <cstring>opt_newline</cstring> -->
<!-- </property> -->
<!-- </widget> -->
<!-- </item> -->
<!-- <item row="0" column="1"> -->
<!-- <widget class="QComboBox" name="opt_newline"/> -->
<!-- </item> -->
<!-- <item row="4" column="0"> -->
<!-- <spacer name="verticalSpacer"> -->
<!-- <property name="orientation"> -->
<!-- <enum>Qt::Vertical</enum> -->
<!-- </property> -->
<!-- <property name="sizeHint" stdset="0"> -->
<!-- <size> -->
<!-- <width>20</width> -->
<!-- <height>246</height> -->
<!-- </size> -->
<!-- </property> -->
<!-- </spacer> -->
<!-- </item> -->
<!-- <item row="3" column="0" colspan="2"> -->
<!-- <widget class="QCheckBox" name="opt_inline_toc"> -->
<!-- <property name="text"> -->
<!-- <string>&amp;Inline TOC</string> -->
<!-- </property> -->
<!-- </widget> -->
<!-- </item> -->
<!-- <item row="1" column="1"> -->
<!-- <widget class="QSpinBox" name="opt_max_line_length"/> -->
<!-- </item> -->
<!-- <item row="1" column="0"> -->
<!-- <widget class="QLabel" name="label_2"> -->
<!-- <property name="text"> -->
<!-- <string>&amp;Maximum line length:</string> -->
<!-- </property> -->
<!-- <property name="buddy"> -->
<!-- <cstring>opt_max_line_length</cstring> -->
<!-- </property> -->
<!-- </widget> -->
<!-- </item> -->
<!-- <item row="2" column="0" colspan="2"> -->
<!-- <widget class="QCheckBox" name="opt_force_max_line_length"> -->
<!-- <property name="text"> -->
<!-- <string>Force maximum line length</string> -->
<!-- </property> -->
<!-- </widget> -->
<!-- </item> -->
</layout>
</widget>
<resources/>
<connections/>
</ui>

View File

@ -20,9 +20,9 @@ What formats does |app| support conversion to/from?
|app| supports the conversion of many input formats to many output formats. |app| supports the conversion of many input formats to many output formats.
It can convert every input format in the following list, to every output format. It can convert every input format in the following list, to every output format.
*Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, LIT, LRF, MOBI, ODT, PDF, PRC**, PDB, PML, RB, RTF, TCR, TXT *Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, LIT, LRF, MOBI, ODT, PDF, PRC**, PDB, PML, RB, RTF, SNB, TCR, TXT
*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, PDB, PML, RB, PDF, TCR, TXT *Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, PDB, PML, RB, PDF, SNB, TCR, TXT
** PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers ** PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers