diff --git a/resources/images/mimetypes/snb.png b/resources/images/mimetypes/snb.png new file mode 100644 index 0000000000..41b55f4343 Binary files /dev/null and b/resources/images/mimetypes/snb.png differ diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 6fda73f785..fe187a1400 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -292,6 +292,17 @@ class RTFMetadataReader(MetadataReaderPlugin): def get_metadata(self, stream, ftype): from calibre.ebooks.metadata.rtf import get_metadata return get_metadata(stream) + +class SNBMetadataReader(MetadataReaderPlugin): + + name = 'Read SNB metadata' + file_types = set(['snb']) + description = _('Read metadata from %s files') % 'SNB' + author = 'Li Fanxi' + + def get_metadata(self, stream, ftype): + from calibre.ebooks.metadata.snb import get_metadata + return get_metadata(stream) class TOPAZMetadataReader(MetadataReaderPlugin): @@ -420,6 +431,7 @@ from calibre.ebooks.tcr.input import TCRInput from calibre.ebooks.txt.input import TXTInput from calibre.ebooks.lrf.input import LRFInput from calibre.ebooks.chm.input import CHMInput +from calibre.ebooks.snb.input import SNBInput from calibre.ebooks.epub.output import EPUBOutput from calibre.ebooks.fb2.output import FB2Output @@ -434,6 +446,7 @@ from calibre.ebooks.rb.output import RBOutput from calibre.ebooks.rtf.output import RTFOutput from calibre.ebooks.tcr.output import TCROutput from calibre.ebooks.txt.output import TXTOutput +from calibre.ebooks.snb.output import SNBOutput from calibre.customize.profiles import input_profiles, output_profiles @@ -495,6 +508,7 @@ plugins += [ TXTInput, LRFInput, CHMInput, + SNBInput, ] plugins += [ EPUBOutput, @@ -510,6 +524,7 @@ plugins += [ RTFOutput, TCROutput, TXTOutput, + SNBOutput, ] # Order here matters. The first matched device is the one used. plugins += [ diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index 1d879f0c5d..4fa53b1cdb 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -647,11 +647,25 @@ class NookOutput(OutputProfile): fbase = 16 fsizes = [12, 12, 14, 16, 18, 20, 22, 24] +class BambookOutput(OutputProfile): + + name = 'Sanda Bambook' + short_name = 'bambook' + description = _('This profile is intended for the Sanda Bambook.') + + # Screen size is a best guess + screen_size = (800, 600) + comic_screen_size = (700, 540) + dpi = 168.451 + fbase = 12 + fsizes = [10, 12, 14, 16] + output_profiles = [OutputProfile, SonyReaderOutput, SonyReader300Output, SonyReader900Output, MSReaderOutput, MobipocketOutput, HanlinV3Output, HanlinV5Output, CybookG3Output, CybookOpusOutput, KindleOutput, iPadOutput, KoboReaderOutput, SonyReaderLandscapeOutput, KindleDXOutput, IlliadOutput, - IRexDR1000Output, IRexDR800Output, JetBook5Output, NookOutput,] + IRexDR1000Output, IRexDR800Output, JetBook5Output, NookOutput, + BambookOutput, ] output_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower())) diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index 624b277e61..9bdf937dd1 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -25,7 +25,7 @@ class DRMError(ValueError): BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm', 'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc', 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', - 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan'] + 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan', 'snb'] class HTMLRenderer(object): diff --git a/src/calibre/ebooks/metadata/meta.py b/src/calibre/ebooks/metadata/meta.py index 87b8d3b535..cbd9db3f04 100644 --- a/src/calibre/ebooks/metadata/meta.py +++ b/src/calibre/ebooks/metadata/meta.py @@ -15,7 +15,7 @@ _METADATA_PRIORITIES = [ 'html', 'htm', 'xhtml', 'xhtm', 'rtf', 'fb2', 'pdf', 'prc', 'odt', 'epub', 'lit', 'lrx', 'lrf', 'mobi', - 'rb', 'imp', 'azw' + 'rb', 'imp', 'azw', 'snb' ] # The priorities for loading metadata from different file types diff --git a/src/calibre/ebooks/metadata/snb.py b/src/calibre/ebooks/metadata/snb.py new file mode 100755 index 0000000000..2a330b19e6 --- /dev/null +++ b/src/calibre/ebooks/metadata/snb.py @@ -0,0 +1,47 @@ +'''Read meta information from SNB files''' + +from __future__ import with_statement + +__license__ = 'GPL v3' +__copyright__ = '2010, Li Fanxi ' + +import os +from StringIO import StringIO +from calibre.ebooks.metadata import MetaInformation +from calibre.ebooks.snb.snbfile import SNBFile +from lxml import etree + +def get_metadata(stream, extract_cover=True): + """ Return metadata as a L{MetaInfo} object """ + mi = MetaInformation(_('Unknown'), [_('Unknown')]) + snbFile = SNBFile() + + try: + if not hasattr(stream, 'write'): + snbFile.Parse(StringIO(stream), True) + else: + stream.seek(0) + snbFile.Parse(stream, True) + + meta = snbFile.GetFileStream('snbf/book.snbf') + + if meta != None: + meta = etree.fromstring(meta) + mi.title = meta.find('.//head/name').text + mi.authors = [meta.find('.//head/author').text] + mi.language = meta.find('.//head/language').text.lower().replace('_', '-') + mi.publisher = meta.find('.//head/publisher').text + + if extract_cover: + cover = meta.find('.//head/cover') + if cover != None and cover.text != None: + root, ext = os.path.splitext(cover.text) + if ext == '.jpeg': + ext = '.jpg' + mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text)) + + except Exception: + import traceback + traceback.print_exc() + + return mi diff --git a/src/calibre/ebooks/snb/__init__.py b/src/calibre/ebooks/snb/__init__.py new file mode 100644 index 0000000000..d83022b362 --- /dev/null +++ b/src/calibre/ebooks/snb/__init__.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__copyright__ = '2010, Li Fanxi ' +__docformat__ = 'restructuredtext en' + +''' +Used for snb output +''' + diff --git a/src/calibre/ebooks/snb/input.py b/src/calibre/ebooks/snb/input.py new file mode 100755 index 0000000000..052db6d059 --- /dev/null +++ b/src/calibre/ebooks/snb/input.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL 3' +__copyright__ = '2010, Li Fanxi ' +__docformat__ = 'restructuredtext en' + +import os, uuid + +from calibre.customize.conversion import InputFormatPlugin +from calibre.ebooks.oeb.base import DirContainer +from calibre.ebooks.snb.snbfile import SNBFile +from calibre.ptempfile import TemporaryDirectory +from calibre.utils.filenames import ascii_filename +from lxml import etree + +HTML_TEMPLATE = u'%s\n%s\n' + +def html_encode(s): + return s.replace(u'&', u'&').replace(u'<', u'<').replace(u'>', u'>').replace(u'"', u'"').replace(u"'", u''').replace(u'\n', u'
').replace(u' ', u' ') + +class SNBInput(InputFormatPlugin): + + name = 'SNB Input' + author = 'Li Fanxi' + description = 'Convert SNB files to OEB' + file_types = set(['snb']) + + options = set([ + ]) + + def convert(self, stream, options, file_ext, log, + accelerators): + log.debug("Parsing SNB file...") + snbFile = SNBFile() + try: + snbFile.Parse(stream) + except: + raise ValueError("Invalid SNB file") + if not snbFile.IsValid(): + log.debug("Invaild SNB file") + raise ValueError("Invalid SNB file") + log.debug("Handle meta data ...") + from calibre.ebooks.conversion.plumber import create_oebbook + oeb = create_oebbook(log, None, options, self, + encoding=options.input_encoding, populate=False) + meta = snbFile.GetFileStream('snbf/book.snbf') + if meta != None: + meta = etree.fromstring(meta) + oeb.metadata.add('title', meta.find('.//head/name').text) + oeb.metadata.add('creator', meta.find('.//head/author').text, attrib={'role':'aut'}) + oeb.metadata.add('language', meta.find('.//head/language').text.lower().replace('_', '-')) + oeb.metadata.add('creator', meta.find('.//head/generator').text) + oeb.metadata.add('publisher', meta.find('.//head/publisher').text) + cover = meta.find('.//head/cover') + if cover != None and cover.text != None: + oeb.guide.add('cover', 'Cover', cover.text) + + bookid = str(uuid.uuid4()) + oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid') + for ident in oeb.metadata.identifier: + if 'id' in ident.attrib: + oeb.uid = oeb.metadata.identifier[0] + break + + with TemporaryDirectory('_chm2oeb', keep=True) as tdir: + log.debug('Process TOC ...') + toc = snbFile.GetFileStream('snbf/toc.snbf') + oeb.container = DirContainer(tdir, log) + if toc != None: + toc = etree.fromstring(toc) + i = 1 + for ch in toc.find('.//body'): + chapterName = ch.text + chapterSrc = ch.get('src') + fname = 'ch_%d.htm' % i + data = snbFile.GetFileStream('snbc/' + chapterSrc) + if data != None: + snbc = etree.fromstring(data) + outputFile = open(os.path.join(tdir, fname), 'wb') + lines = [] + for line in snbc.find('.//body'): + if line.tag == 'text': + lines.append(u'

%s

' % html_encode(line.text)) + elif line.tag == 'img': + lines.append(u'

' % html_encode(line.text)) + outputFile.write((HTML_TEMPLATE % (chapterName, u'\n'.join(lines))).encode('utf-8', 'replace')) + outputFile.close() + oeb.toc.add(ch.text, fname) + id, href = oeb.manifest.generate(id='html', + href=ascii_filename(fname)) + item = oeb.manifest.add(id, href, 'text/html') + item.html_input_href = fname + oeb.spine.add(item, True) + i = i + 1 + imageFiles = snbFile.OutputImageFiles(tdir) + for f, m in imageFiles: + id, href = oeb.manifest.generate(id='image', + href=ascii_filename(f)) + item = oeb.manifest.add(id, href, m) + item.html_input_href = f + + return oeb + diff --git a/src/calibre/ebooks/snb/output.py b/src/calibre/ebooks/snb/output.py new file mode 100644 index 0000000000..73a726bd26 --- /dev/null +++ b/src/calibre/ebooks/snb/output.py @@ -0,0 +1,265 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL 3' +__copyright__ = '2010, Li Fanxi ' +__docformat__ = 'restructuredtext en' + +import os, string + +from lxml import etree +from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation +from calibre.ptempfile import TemporaryDirectory +from calibre.constants import __appname__, __version__ +from calibre.ebooks.snb.snbfile import SNBFile +from calibre.ebooks.snb.snbml import SNBMLizer, ProcessFileName + +class SNBOutput(OutputFormatPlugin): + + name = 'SNB Output' + author = 'Li Fanxi' + file_type = 'snb' + + options = set([ + # OptionRecommendation(name='newline', recommended_value='system', + # level=OptionRecommendation.LOW, + # short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(), + # help=_('Type of newline to use. Options are %s. Default is \'system\'. ' + # 'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. ' + # 'For Mac OS X use \'unix\'. \'system\' will default to the newline ' + # 'type used by this OS.') % sorted(TxtNewlines.NEWLINE_TYPES.keys())), + OptionRecommendation(name='snb_output_encoding', recommended_value='utf-8', + level=OptionRecommendation.LOW, + help=_('Specify the character encoding of the output document. ' \ + 'The default is utf-8. Note: This option is not honored by all ' \ + 'formats.')), + # OptionRecommendation(name='inline_toc', + # recommended_value=False, level=OptionRecommendation.LOW, + # help=_('Add Table of Contents to beginning of the book.')), + OptionRecommendation(name='snb_max_line_length', + recommended_value=0, level=OptionRecommendation.LOW, + help=_('The maximum number of characters per line. This splits on ' + 'the first space before the specified value. If no space is found ' + 'the line will be broken at the space after and will exceed the ' + 'specified value. Also, there is a minimum of 25 characters. ' + 'Use 0 to disable line splitting.')), + # OptionRecommendation(name='force_max_line_length', + # recommended_value=False, level=OptionRecommendation.LOW, + # help=_('Force splitting on the max-line-length value when no space ' + # 'is present. Also allows max-line-length to be below the minimum')), + ]) + + def convert(self, oeb_book, output_path, input_plugin, opts, log): + self.opts = opts + from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable + try: + rasterizer = SVGRasterizer() + rasterizer(oeb_book, opts) + except Unavailable: + self.log.warn('SVG rasterizer unavailable, SVG will not be converted') + + # Create temp dir + with TemporaryDirectory('_snb_output') as tdir: + # Create stub directories + snbfDir = os.path.join(tdir, 'snbf') + snbcDir = os.path.join(tdir, 'snbc') + snbiDir = os.path.join(tdir, 'snbc/images') + os.mkdir(snbfDir) + os.mkdir(snbcDir) + os.mkdir(snbiDir) + + # Process Meta data + meta = oeb_book.metadata + if meta.title: + title = unicode(meta.title[0]) + else: + title = '' + authors = [unicode(x) for x in meta.creator if x.role == 'aut'] + if meta.publisher: + publishers = unicode(meta.publisher[0]) + else: + publishers = '' + if meta.language: + lang = unicode(meta.language[0]).upper() + else: + lang = '' + if meta.description: + abstract = unicode(meta.description[0]) + else: + abstract = '' + + # Process Cover + g, m, s = oeb_book.guide, oeb_book.manifest, oeb_book.spine + href = None + if 'titlepage' not in g: + if 'cover' in g: + href = g['cover'].href + + # Output book info file + bookInfoTree = etree.Element("book-snbf", version="1.0") + headTree = etree.SubElement(bookInfoTree, "head") + etree.SubElement(headTree, "name").text = title + etree.SubElement(headTree, "author").text = ' '.join(authors) + etree.SubElement(headTree, "language").text = lang + etree.SubElement(headTree, "rights") + etree.SubElement(headTree, "publisher").text = publishers + etree.SubElement(headTree, "generator").text = __appname__ + ' ' + __version__ + etree.SubElement(headTree, "created") + etree.SubElement(headTree, "abstract").text = abstract + if href != None: + etree.SubElement(headTree, "cover").text = ProcessFileName(href) + else: + etree.SubElement(headTree, "cover") + bookInfoFile = open(os.path.join(snbfDir, 'book.snbf'), 'wb') + bookInfoFile.write(etree.tostring(bookInfoTree, pretty_print=True, encoding='utf-8')) + bookInfoFile.close() + + # Output TOC + tocInfoTree = etree.Element("toc-snbf") + tocHead = etree.SubElement(tocInfoTree, "head") + tocBody = etree.SubElement(tocInfoTree, "body") + outputFiles = { } + if oeb_book.toc.count() == 0: + log.warn('This SNB file has no Table of Contents. ' + 'Creating a default TOC') + first = iter(oeb_book.spine).next() + oeb_book.toc.add(_('Start Page'), first.href) + else: + first = iter(oeb_book.spine).next() + if oeb_book.toc[0].href != first.href: + # The pages before the fist item in toc will be stored as + # "Cover Pages". + # oeb_book.toc does not support "insert", so we generate + # the tocInfoTree directly instead of modifying the toc + ch = etree.SubElement(tocBody, "chapter") + ch.set("src", ProcessFileName(first.href) + ".snbc") + ch.text = _('Cover Pages') + outputFiles[first.href] = [] + outputFiles[first.href].append(("", _("Cover Pages"))) + + for tocitem in oeb_book.toc: + if tocitem.href.find('#') != -1: + item = string.split(tocitem.href, '#') + if len(item) != 2: + log.error('Error in TOC item: %s' % tocitem) + else: + if item[0] in outputFiles: + outputFiles[item[0]].append((item[1], tocitem.title)) + else: + outputFiles[item[0]] = [] + if not "" in outputFiles[item[0]]: + outputFiles[item[0]].append(("", tocitem.title + _(" (Preface)"))) + ch = etree.SubElement(tocBody, "chapter") + ch.set("src", ProcessFileName(item[0]) + ".snbc") + ch.text = tocitem.title + _(" (Preface)") + outputFiles[item[0]].append((item[1], tocitem.title)) + else: + if tocitem.href in outputFiles: + outputFiles[tocitem.href].append(("", tocitem.title)) + else: + outputFiles[tocitem.href] = [] + outputFiles[tocitem.href].append(("", tocitem.title)) + ch = etree.SubElement(tocBody, "chapter") + ch.set("src", ProcessFileName(tocitem.href) + ".snbc") + ch.text = tocitem.title + + + etree.SubElement(tocHead, "chapters").text = '%d' % len(tocBody) + + tocInfoFile = open(os.path.join(snbfDir, 'toc.snbf'), 'wb') + tocInfoFile.write(etree.tostring(tocInfoTree, pretty_print=True, encoding='utf-8')) + tocInfoFile.close() + + # Output Files + oldTree = None + mergeLast = False + lastName = None + for item in s: + from calibre.ebooks.oeb.base import OEB_DOCS, OEB_IMAGES + if m.hrefs[item.href].media_type in OEB_DOCS: + if not item.href in outputFiles: + log.debug('File %s is unused in TOC. Continue in last chapter' % item.href) + mergeLast = True + else: + if oldTree != None and mergeLast: + log.debug('Output the modified chapter again: %s' % lastName) + outputFile = open(os.path.join(snbcDir, lastName), 'wb') + outputFile.write(etree.tostring(oldTree, pretty_print=True, encoding='utf-8')) + outputFile.close() + mergeLast = False + + log.debug('Converting %s to snbc...' % item.href) + snbwriter = SNBMLizer(log) + snbcTrees = None + if not mergeLast: + snbcTrees = snbwriter.extract_content(oeb_book, item, outputFiles[item.href], opts) + for subName in snbcTrees: + postfix = '' + if subName != '': + postfix = '_' + subName + lastName = ProcessFileName(item.href + postfix + ".snbc") + oldTree = snbcTrees[subName] + outputFile = open(os.path.join(snbcDir, lastName), 'wb') + outputFile.write(etree.tostring(oldTree, pretty_print=True, encoding='utf-8')) + outputFile.close() + else: + log.debug('Merge %s with last TOC item...' % item.href) + snbwriter.merge_content(oldTree, oeb_book, item, [('', _("Start"))], opts) + + # Output the last one if needed + log.debug('Output the last modified chapter again: %s' % lastName) + if oldTree != None and mergeLast: + outputFile = open(os.path.join(snbcDir, lastName), 'wb') + outputFile.write(etree.tostring(oldTree, pretty_print=True, encoding='utf-8')) + outputFile.close() + mergeLast = False + + for item in m: + if m.hrefs[item.href].media_type in OEB_IMAGES: + log.debug('Converting image: %s ...' % item.href) + content = m.hrefs[item.href].data + # Convert & Resize image + self.HandleImage(content, os.path.join(snbiDir, ProcessFileName(item.href))) + + # Package as SNB File + snbFile = SNBFile() + snbFile.FromDir(tdir) + snbFile.Output(output_path) + + def HandleImage(self, imageData, imagePath): + from calibre.utils.magick import Image + img = Image() + img.load(imageData) + (x,y) = img.size + if self.opts: + SCREEN_Y, SCREEN_X = self.opts.output_profile.comic_screen_size + else: + SCREEN_X = 540 + SCREEN_Y = 700 + # Handle big image only + if x > SCREEN_X or y > SCREEN_Y: + xScale = float(x) / SCREEN_X + yScale = float(y) / SCREEN_Y + scale = max(xScale, yScale) + # TODO : intelligent image rotation + # img = img.rotate(90) + # x,y = y,x + img.size = (x / scale, y / scale) + img.save(imagePath) + +if __name__ == '__main__': + from calibre.ebooks.oeb.reader import OEBReader + from calibre.ebooks.oeb.base import OEBBook + from calibre.ebooks.conversion.preprocess import HTMLPreProcessor + from calibre.customize.profiles import HanlinV3Output + class OptionValues(object): + pass + + opts = OptionValues() + opts.output_profile = HanlinV3Output(None) + + html_preprocessor = HTMLPreProcessor(None, None, opts) + from calibre.utils.logging import default_log + oeb = OEBBook(default_log, html_preprocessor) + reader = OEBReader + reader()(oeb, '/tmp/bbb/processed/') + SNBOutput(None).convert(oeb, '/tmp/test.snb', None, None, default_log); diff --git a/src/calibre/ebooks/snb/snbfile.py b/src/calibre/ebooks/snb/snbfile.py new file mode 100644 index 0000000000..ed5aa45c08 --- /dev/null +++ b/src/calibre/ebooks/snb/snbfile.py @@ -0,0 +1,319 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL 3' +__copyright__ = '2010, Li Fanxi ' +__docformat__ = 'restructuredtext en' + +import sys, struct, zlib, bz2, os +from mimetypes import types_map + +class FileStream: + def IsBinary(self): + return self.attr & 0x41000000 != 0x41000000 + +def compareFileStream(file1, file2): + return cmp(file1.fileName, file2.fileName) + +class BlockData: + pass + +class SNBFile: + + MAGIC = 'SNBP000B' + REV80 = 0x00008000 + REVA3 = 0x00A3A3A3 + REVZ1 = 0x00000000 + REVZ2 = 0x00000000 + + def __init__(self, inputFile = None): + self.files = [] + self.blocks = [] + + if inputFile != None: + self.Open(inputFile) + + def Open(self, inputFile): + self.fileName = inputFile + + snbFile = open(self.fileName, "rb") + snbFile.seek(0) + self.Parse(snbFile) + snbFile.close() + + def Parse(self, snbFile, metaOnly = False): + # Read header + vmbr = snbFile.read(44) + (self.magic, self.rev80, self.revA3, self.revZ1, + self.fileCount, self.vfatSize, self.vfatCompressed, + self.binStreamSize, self.plainStreamSizeUncompressed, + self.revZ2) = struct.unpack('>8siiiiiiiii', vmbr) + + # Read FAT + self.vfat = zlib.decompress(snbFile.read(self.vfatCompressed)) + self.ParseFile(self.vfat, self.fileCount) + + # Read tail + snbFile.seek(-16, os.SEEK_END) + #plainStreamEnd = snbFile.tell() + tailblock = snbFile.read(16) + (self.tailSize, self.tailOffset, self.tailMagic) = struct.unpack('>ii8s', tailblock) + snbFile.seek(self.tailOffset) + self.vTailUncompressed = zlib.decompress(snbFile.read(self.tailSize)) + self.tailSizeUncompressed = len(self.vTailUncompressed) + self.ParseTail(self.vTailUncompressed, self.fileCount) + + # Uncompress file data + # Read files + binPos = 0 + plainPos = 0 + uncompressedData = None + for f in self.files: + if f.attr & 0x41000000 == 0x41000000: + # Compressed Files + if uncompressedData == None: + uncompressedData = "" + for i in range(self.plainBlock): + bzdc = bz2.BZ2Decompressor() + if (i < self.plainBlock - 1): + bSize = self.blocks[self.binBlock + i + 1].Offset - self.blocks[self.binBlock + i].Offset; + else: + bSize = self.tailOffset - self.blocks[self.binBlock + i].Offset; + snbFile.seek(self.blocks[self.binBlock + i].Offset); + try: + data = snbFile.read(bSize) + uncompressedData += bzdc.decompress(data) + except Exception, e: + print e + f.fileBody = uncompressedData[plainPos:plainPos+f.fileSize] + plainPos += f.fileSize + elif f.attr & 0x01000000 == 0x01000000: + # Binary Files + snbFile.seek(44 + self.vfatCompressed + binPos) + f.fileBody = snbFile.read(f.fileSize) + binPos += f.fileSize + else: + print f.attr, f.fileName + raise Exception("Invalid file") + + def ParseFile(self, vfat, fileCount): + fileNames = vfat[fileCount*12:].split('\0'); + for i in range(fileCount): + f = FileStream() + (f.attr, f.fileNameOffset, f.fileSize) = struct.unpack('>iii', vfat[i * 12 : (i+1)*12]) + f.fileName = fileNames[i] + self.files.append(f) + + def ParseTail(self, vtail, fileCount): + self.binBlock = (self.binStreamSize + 0x8000 - 1) / 0x8000; + self.plainBlock = (self.plainStreamSizeUncompressed + 0x8000 - 1) / 0x8000; + for i in range(self.binBlock + self.plainBlock): + block = BlockData() + (block.Offset,) = struct.unpack('>i', vtail[i * 4 : (i+1) * 4]) + self.blocks.append(block) + for i in range(fileCount): + (self.files[i].blockIndex, self.files[i].contentOffset) = struct.unpack('>ii', vtail[(self.binBlock + self.plainBlock) * 4 + i * 8 : (self.binBlock + self.plainBlock) * 4 + (i+1) * 8]) + + def IsValid(self): + if self.magic != SNBFile.MAGIC: + return False + if self.rev80 != SNBFile.REV80: + return False + if self.revA3 != SNBFile.REVA3: + return False + if self.revZ1 != SNBFile.REVZ1: + return False + if self.revZ2 != SNBFile.REVZ2: + return False + if self.vfatSize != len(self.vfat): + return False + if self.fileCount != len(self.files): + return False + if (self.binBlock + self.plainBlock) * 4 + self.fileCount * 8 != self.tailSizeUncompressed: + return False + if self.tailMagic != SNBFile.MAGIC: + print self.tailMagic + return False + return True + + def FromDir(self, tdir): + for root, dirs, files in os.walk(tdir): + for name in files: + p, ext = os.path.splitext(name) + if ext in [ ".snbf", ".snbc" ]: + self.AppendPlain(os.path.relpath(os.path.join(root, name), tdir), tdir) + else: + self.AppendBinary(os.path.relpath(os.path.join(root, name), tdir), tdir) + + def AppendPlain(self, fileName, tdir): + f = FileStream() + f.attr = 0x41000000 + f.fileSize = os.path.getsize(os.path.join(tdir,fileName)) + f.fileBody = open(os.path.join(tdir,fileName), 'rb').read() + f.fileName = fileName.replace(os.sep, '/') + self.files.append(f) + + def AppendBinary(self, fileName, tdir): + f = FileStream() + f.attr = 0x01000000 + f.fileSize = os.path.getsize(os.path.join(tdir,fileName)) + f.fileBody = open(os.path.join(tdir,fileName), 'rb').read() + f.fileName = fileName.replace(os.sep, '/') + self.files.append(f) + + def GetFileStream(self, fileName): + for file in self.files: + if file.fileName == fileName: + return file.fileBody + return None + + def OutputImageFiles(self, path): + fileNames = [] + for f in self.files: + fname = os.path.basename(f.fileName) + root, ext = os.path.splitext(fname) + if ext in [ '.jpeg', '.jpg', '.gif', '.svg', '.png' ]: + file = open(os.path.join(path, fname), 'wb') + file.write(f.fileBody) + file.close() + fileNames.append((fname, types_map[ext])) + return fileNames + + def Output(self, outputFile): + + # Sort the files in file buffer, + # requried by the SNB file format + self.files.sort(compareFileStream) + + outputFile = open(outputFile, 'wb') + # File header part 1 + vmbrp1 = struct.pack('>8siiii', SNBFile.MAGIC, SNBFile.REV80, SNBFile.REVA3, SNBFile.REVZ1, len(self.files)) + + # Create VFAT & file stream + vfat = '' + fileNameTable = '' + plainStream = '' + binStream = '' + for f in self.files: + vfat += struct.pack('>iii', f.attr, len(fileNameTable), f.fileSize); + fileNameTable += (f.fileName + '\0') + + if f.attr & 0x41000000 == 0x41000000: + # Plain Files + f.contentOffset = len(plainStream) + plainStream += f.fileBody + elif f.attr & 0x01000000 == 0x01000000: + # Binary Files + f.contentOffset = len(binStream) + binStream += f.fileBody + else: + print f.attr, f.fileName + raise Exception("Unknown file type") + vfatCompressed = zlib.compress(vfat+fileNameTable) + + # File header part 2 + vmbrp2 = struct.pack('>iiiii', len(vfat+fileNameTable), len(vfatCompressed), len(binStream), len(plainStream), SNBFile.REVZ2) + # Write header + outputFile.write(vmbrp1 + vmbrp2) + # Write vfat + outputFile.write(vfatCompressed) + + # Generate block information + binBlockOffset = 0x2C + len(vfatCompressed) + plainBlockOffset = binBlockOffset + len(binStream) + + binBlock = (len(binStream) + 0x8000 - 1) / 0x8000 + #plainBlock = (len(plainStream) + 0x8000 - 1) / 0x8000 + + offset = 0 + tailBlock = '' + for i in range(binBlock): + tailBlock += struct.pack('>i', binBlockOffset + offset) + offset += 0x8000; + tailRec = '' + for f in self.files: + t = 0 + if f.IsBinary(): + t = 0 + else: + t = binBlock + tailRec += struct.pack('>ii', f.contentOffset / 0x8000 + t, f.contentOffset % 0x8000); + + # Write binary stream + outputFile.write(binStream) + + # Write plain stream + pos = 0 + offset = 0 + while pos < len(plainStream): + tailBlock += struct.pack('>i', plainBlockOffset + offset); + block = plainStream[pos:pos+0x8000]; + compressed = bz2.compress(block) + outputFile.write(compressed) + offset += len(compressed) + pos += 0x8000 + + # Write tail block + compressedTail = zlib.compress(tailBlock + tailRec) + outputFile.write(compressedTail) + + # Write tail pointer + veom = struct.pack('>ii', len(compressedTail), plainBlockOffset + offset) + outputFile.write(veom) + + # Write file end mark + outputFile.write(SNBFile.MAGIC); + + # Close + outputFile.close() + return + + def Dump(self): + if self.fileName: + print "File Name:\t", self.fileName + print "File Count:\t", self.fileCount + print "VFAT Size(Compressed):\t%d(%d)" % (self.vfatSize, self.vfatCompressed) + print "Binary Stream Size:\t", self.binStreamSize + print "Plain Stream Uncompressed Size:\t", self.plainStreamSizeUncompressed + print "Binary Block Count:\t", self.binBlock + print "Plain Block Count:\t", self.plainBlock + for i in range(self.fileCount): + print "File ", i + f = self.files[i] + print "File Name: ", f.fileName + print "File Attr: ", f.attr + print "File Size: ", f.fileSize + print "Block Index: ", f.blockIndex + print "Content Offset: ", f.contentOffset + tempFile = open("/tmp/" + f.fileName, 'wb') + tempFile.write(f.fileBody) + tempFile.close() + +def usage(): + print "This unit test is for INTERNAL usage only!" + print "This unit test accept two parameters." + print "python snbfile.py " + print "The input file will be extracted and write to dest file. " + print "Meta data of the file will be shown during this process." + +def main(): + if len(sys.argv) != 3: + usage() + sys.exit(0) + inputFile = sys.argv[1] + outputFile = sys.argv[2] + + print "Input file: ", inputFile + print "Output file: ", outputFile + + snbFile = SNBFile(inputFile) + if snbFile.IsValid(): + snbFile.Dump() + snbFile.Output(outputFile) + else: + print "The input file is invalid." + return 1 + return 0 + +if __name__ == "__main__": + """SNB file unit test""" + sys.exit(main()) diff --git a/src/calibre/ebooks/snb/snbml.py b/src/calibre/ebooks/snb/snbml.py new file mode 100644 index 0000000000..e3eed5a476 --- /dev/null +++ b/src/calibre/ebooks/snb/snbml.py @@ -0,0 +1,263 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL 3' +__copyright__ = '2010, Li Fanxi ' +__docformat__ = 'restructuredtext en' + +''' +Transform OEB content into SNB format +''' + +import os +import re + +from lxml import etree + +from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace +from calibre.ebooks.oeb.stylizer import Stylizer + +def ProcessFileName(fileName): + # Flat the path + fileName = fileName.replace("/", "_").replace(os.sep, "_") + # Handle bookmark for HTML file + fileName = fileName.replace("#", "_") + # Make it lower case + fileName = fileName.lower() + # Change all images to jpg + root, ext = os.path.splitext(fileName) + if ext in [ '.jpeg', '.jpg', '.gif', '.svg', '.png' ]: + fileName = root + '.jpg' + return fileName + + +BLOCK_TAGS = [ + 'div', + 'p', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'li', + 'tr', +] + +BLOCK_STYLES = [ + 'block', +] + +SPACE_TAGS = [ + 'td', +] + +CALIBRE_SNB_IMG_TAG = "<$$calibre_snb_temp_img$$>" +CALIBRE_SNB_BM_TAG = "<$$calibre_snb_bm_tag$$>" +CALIBRE_SNB_PRE_TAG = "<$$calibre_snb_pre_tag$$>" + +class SNBMLizer(object): + + curSubItem = "" +# curText = [ ] + + def __init__(self, log): + self.log = log + + def extract_content(self, oeb_book, item, subitems, opts): + self.log.info('Converting XHTML to SNBC...') + self.oeb_book = oeb_book + self.opts = opts + self.item = item + self.subitems = subitems + return self.mlize(); + + def merge_content(self, old_tree, oeb_book, item, subitems, opts): + newTrees = self.extract_content(oeb_book, item, subitems, opts) + body = old_tree.find(".//body") + if body != None: + for subName in newTrees: + newbody = newTrees[subName].find(".//body") + for entity in newbody: + body.append(entity) + + def mlize(self): + output = [ u'' ] + stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile) + content = unicode(etree.tostring(self.item.data.find(XHTML('body')), encoding=unicode)) +# content = self.remove_newlines(content) + trees = { } + for subitem, subtitle in self.subitems: + snbcTree = etree.Element("snbc") + etree.SubElement(etree.SubElement(snbcTree, "head"), "title").text = subtitle + etree.SubElement(snbcTree, "body") + trees[subitem] = snbcTree + output.append(u'%s%s\n\n' % (CALIBRE_SNB_BM_TAG, "")) + output += self.dump_text(self.subitems, etree.fromstring(content), stylizer)[0] + output = self.cleanup_text(u''.join(output)) + + subitem = '' + for line in output.splitlines(): + if not line.find(CALIBRE_SNB_PRE_TAG) == 0: + line = line.strip(u' \t\n\r\u3000') + else: + etree.SubElement(trees[subitem].find(".//body"), "text").text = \ + etree.CDATA(line[len(CALIBRE_SNB_PRE_TAG):]) + continue + if len(line) != 0: + if line.find(CALIBRE_SNB_IMG_TAG) == 0: + prefix = ProcessFileName(os.path.dirname(self.item.href)) + if prefix != '': + etree.SubElement(trees[subitem].find(".//body"), "img").text = \ + prefix + '_' + line[len(CALIBRE_SNB_IMG_TAG):] + else: + etree.SubElement(trees[subitem].find(".//body"), "img").text = \ + line[len(CALIBRE_SNB_IMG_TAG):] + elif line.find(CALIBRE_SNB_BM_TAG) == 0: + subitem = line[len(CALIBRE_SNB_BM_TAG):] + else: + etree.SubElement(trees[subitem].find(".//body"), "text").text = \ + etree.CDATA(unicode(u'\u3000\u3000' + line)) + return trees + + def remove_newlines(self, text): + self.log.debug('\tRemove newlines for processing...') + text = text.replace('\r\n', ' ') + text = text.replace('\n', ' ') + text = text.replace('\r', ' ') + + return text + + def cleanup_text(self, text): + self.log.debug('\tClean up text...') + # Replace bad characters. + text = text.replace(u'\xc2', '') + text = text.replace(u'\xa0', ' ') + text = text.replace(u'\xa9', '(C)') + + # Replace tabs, vertical tags and form feeds with single space. + text = text.replace('\t+', ' ') + text = text.replace('\v+', ' ') + text = text.replace('\f+', ' ') + + # Single line paragraph. + text = re.sub('(?<=.)%s(?=.)' % os.linesep, ' ', text) + + # Remove multiple spaces. + #text = re.sub('[ ]{2,}', ' ', text) + + # Remove excessive newlines. + text = re.sub('\n[ ]+\n', '\n\n', text) + if self.opts.remove_paragraph_spacing: + text = re.sub('\n{2,}', '\n', text) + text = re.sub('(?imu)^(?=.)', '\t', text) + else: + text = re.sub('\n{3,}', '\n\n', text) + + # Replace spaces at the beginning and end of lines + text = re.sub('(?imu)^[ ]+', '', text) + text = re.sub('(?imu)[ ]+$', '', text) + + if self.opts.snb_max_line_length: + max_length = self.opts.snb_max_line_length + if self.opts.max_line_length < 25:# and not self.opts.force_max_line_length: + max_length = 25 + short_lines = [] + lines = text.splitlines() + for line in lines: + while len(line) > max_length: + space = line.rfind(' ', 0, max_length) + if space != -1: + # Space was found. + short_lines.append(line[:space]) + line = line[space + 1:] + else: + # Space was not found. + if False and self.opts.force_max_line_length: + # Force breaking at max_lenght. + short_lines.append(line[:max_length]) + line = line[max_length:] + else: + # Look for the first space after max_length. + space = line.find(' ', max_length, len(line)) + if space != -1: + # Space was found. + short_lines.append(line[:space]) + line = line[space + 1:] + else: + # No space was found cannot break line. + short_lines.append(line) + line = '' + # Add the text that was less than max_lengh to the list + short_lines.append(line) + text = '\n'.join(short_lines) + + return text + + def dump_text(self, subitems, elem, stylizer, end='', pre=False, li = ''): + + if not isinstance(elem.tag, basestring) \ + or namespace(elem.tag) != XHTML_NS: + return [''] + + + text = [''] + style = stylizer.style(elem) + + if elem.attrib.get('id') != None and elem.attrib['id'] in [ href for href, title in subitems ]: + if self.curSubItem != None and self.curSubItem != elem.attrib['id']: + self.curSubItem = elem.attrib['id'] + text.append(u'\n\n%s%s\n\n' % (CALIBRE_SNB_BM_TAG, self.curSubItem)) + + if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \ + or style['visibility'] == 'hidden': + return [''] + + tag = barename(elem.tag) + in_block = False + + # Are we in a paragraph block? + if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES: + in_block = True + if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text: + text.append(u'\n\n') + + if tag in SPACE_TAGS: + if not end.endswith('u ') and hasattr(elem, 'text') and elem.text: + text.append(u' ') + + if tag == 'img': + text.append(u'\n\n%s%s\n\n' % (CALIBRE_SNB_IMG_TAG, ProcessFileName(elem.attrib['src']))) + + if tag == 'br': + text.append(u'\n\n') + + if tag == 'li': + li = '- ' + + pre = (tag == 'pre' or pre) + # Process tags that contain text. + if hasattr(elem, 'text') and elem.text: + if pre: + text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG ).join((li + elem.text).splitlines())) + else: + text.append(li + elem.text) + li = '' + + for item in elem: + en = u'' + if len(text) >= 2: + en = text[-1][-2:] + t = self.dump_text(subitems, item, stylizer, en, pre, li)[0] + text += t + + if in_block: + text.append(u'\n\n') + + if hasattr(elem, 'tail') and elem.tail: + if pre: + text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG ).join(elem.tail.splitlines())) + else: + text.append(li + elem.tail) + li = '' + + return text, li diff --git a/src/calibre/gui2/actions/add.py b/src/calibre/gui2/actions/add.py index 9eb197984e..d2e7016e6f 100644 --- a/src/calibre/gui2/actions/add.py +++ b/src/calibre/gui2/actions/add.py @@ -166,6 +166,7 @@ class AddAction(InterfaceAction): (_('Topaz books'), ['tpz','azw1']), (_('Text books'), ['txt', 'rtf']), (_('PDF Books'), ['pdf']), + (_('SNB Books'), ['snb']), (_('Comics'), ['cbz', 'cbr', 'cbc']), (_('Archives'), ['zip', 'rar']), ] diff --git a/src/calibre/gui2/convert/snb_output.py b/src/calibre/gui2/convert/snb_output.py new file mode 100644 index 0000000000..b3ebfc747f --- /dev/null +++ b/src/calibre/gui2/convert/snb_output.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- + +__license__ = 'GPL 3' +__copyright__ = '2009, John Schember ' +__docformat__ = 'restructuredtext en' + +from calibre.gui2.convert.snb_output_ui import Ui_Form +from calibre.gui2.convert import Widget + +newline_model = None + +class PluginWidget(Widget, Ui_Form): + + TITLE = _('SNB Output') + HELP = _('Options specific to')+' SNB '+_('output') + COMMIT_NAME = 'snb_output' + ICON = I('mimetypes/snb.png') + + def __init__(self, parent, get_option, get_help, db=None, book_id=None): + Widget.__init__(self, parent, + []) + self.db, self.book_id = db, book_id + self.initialize_options(get_option, get_help, db, book_id) + + # default = self.opt_newline.currentText() + + # global newline_model + # if newline_model is None: + # newline_model = BasicComboModel(TxtNewlines.NEWLINE_TYPES.keys()) + # self.newline_model = newline_model + # self.opt_newline.setModel(self.newline_model) + + # default_index = self.opt_newline.findText(default) + # system_index = self.opt_newline.findText('system') + # self.opt_newline.setCurrentIndex(default_index if default_index != -1 else system_index if system_index != -1 else 0) diff --git a/src/calibre/gui2/convert/snb_output.ui b/src/calibre/gui2/convert/snb_output.ui new file mode 100644 index 0000000000..a5ff8ce7ef --- /dev/null +++ b/src/calibre/gui2/convert/snb_output.ui @@ -0,0 +1,74 @@ + + + Form + + + + 0 + 0 + 400 + 300 + + + + Form + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst index 359cc4755f..220e7ff9e4 100644 --- a/src/calibre/manual/faq.rst +++ b/src/calibre/manual/faq.rst @@ -20,9 +20,9 @@ What formats does |app| support conversion to/from? |app| supports the conversion of many input formats to many output formats. It can convert every input format in the following list, to every output format. -*Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, LIT, LRF, MOBI, ODT, PDF, PRC**, PDB, PML, RB, RTF, TCR, TXT +*Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, LIT, LRF, MOBI, ODT, PDF, PRC**, PDB, PML, RB, RTF, SNB, TCR, TXT -*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, PDB, PML, RB, PDF, TCR, TXT +*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, PDB, PML, RB, PDF, SNB, TCR, TXT ** PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers