mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Support for the SNB format used by the Bambook e-book reader
This commit is contained in:
commit
88bc2991b2
BIN
resources/images/mimetypes/snb.png
Normal file
BIN
resources/images/mimetypes/snb.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 6.1 KiB |
@ -293,6 +293,17 @@ class RTFMetadataReader(MetadataReaderPlugin):
|
|||||||
from calibre.ebooks.metadata.rtf import get_metadata
|
from calibre.ebooks.metadata.rtf import get_metadata
|
||||||
return get_metadata(stream)
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class SNBMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read SNB metadata'
|
||||||
|
file_types = set(['snb'])
|
||||||
|
description = _('Read metadata from %s files') % 'SNB'
|
||||||
|
author = 'Li Fanxi'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.snb import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
class TOPAZMetadataReader(MetadataReaderPlugin):
|
class TOPAZMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
name = 'Read Topaz metadata'
|
name = 'Read Topaz metadata'
|
||||||
@ -420,6 +431,7 @@ from calibre.ebooks.tcr.input import TCRInput
|
|||||||
from calibre.ebooks.txt.input import TXTInput
|
from calibre.ebooks.txt.input import TXTInput
|
||||||
from calibre.ebooks.lrf.input import LRFInput
|
from calibre.ebooks.lrf.input import LRFInput
|
||||||
from calibre.ebooks.chm.input import CHMInput
|
from calibre.ebooks.chm.input import CHMInput
|
||||||
|
from calibre.ebooks.snb.input import SNBInput
|
||||||
|
|
||||||
from calibre.ebooks.epub.output import EPUBOutput
|
from calibre.ebooks.epub.output import EPUBOutput
|
||||||
from calibre.ebooks.fb2.output import FB2Output
|
from calibre.ebooks.fb2.output import FB2Output
|
||||||
@ -434,6 +446,7 @@ from calibre.ebooks.rb.output import RBOutput
|
|||||||
from calibre.ebooks.rtf.output import RTFOutput
|
from calibre.ebooks.rtf.output import RTFOutput
|
||||||
from calibre.ebooks.tcr.output import TCROutput
|
from calibre.ebooks.tcr.output import TCROutput
|
||||||
from calibre.ebooks.txt.output import TXTOutput
|
from calibre.ebooks.txt.output import TXTOutput
|
||||||
|
from calibre.ebooks.snb.output import SNBOutput
|
||||||
|
|
||||||
from calibre.customize.profiles import input_profiles, output_profiles
|
from calibre.customize.profiles import input_profiles, output_profiles
|
||||||
|
|
||||||
@ -495,6 +508,7 @@ plugins += [
|
|||||||
TXTInput,
|
TXTInput,
|
||||||
LRFInput,
|
LRFInput,
|
||||||
CHMInput,
|
CHMInput,
|
||||||
|
SNBInput,
|
||||||
]
|
]
|
||||||
plugins += [
|
plugins += [
|
||||||
EPUBOutput,
|
EPUBOutput,
|
||||||
@ -510,6 +524,7 @@ plugins += [
|
|||||||
RTFOutput,
|
RTFOutput,
|
||||||
TCROutput,
|
TCROutput,
|
||||||
TXTOutput,
|
TXTOutput,
|
||||||
|
SNBOutput,
|
||||||
]
|
]
|
||||||
# Order here matters. The first matched device is the one used.
|
# Order here matters. The first matched device is the one used.
|
||||||
plugins += [
|
plugins += [
|
||||||
|
@ -647,11 +647,25 @@ class NookOutput(OutputProfile):
|
|||||||
fbase = 16
|
fbase = 16
|
||||||
fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
|
fsizes = [12, 12, 14, 16, 18, 20, 22, 24]
|
||||||
|
|
||||||
|
class BambookOutput(OutputProfile):
|
||||||
|
|
||||||
|
name = 'Sanda Bambook'
|
||||||
|
short_name = 'bambook'
|
||||||
|
description = _('This profile is intended for the Sanda Bambook.')
|
||||||
|
|
||||||
|
# Screen size is a best guess
|
||||||
|
screen_size = (800, 600)
|
||||||
|
comic_screen_size = (700, 540)
|
||||||
|
dpi = 168.451
|
||||||
|
fbase = 12
|
||||||
|
fsizes = [10, 12, 14, 16]
|
||||||
|
|
||||||
output_profiles = [OutputProfile, SonyReaderOutput, SonyReader300Output,
|
output_profiles = [OutputProfile, SonyReaderOutput, SonyReader300Output,
|
||||||
SonyReader900Output, MSReaderOutput, MobipocketOutput, HanlinV3Output,
|
SonyReader900Output, MSReaderOutput, MobipocketOutput, HanlinV3Output,
|
||||||
HanlinV5Output, CybookG3Output, CybookOpusOutput, KindleOutput,
|
HanlinV5Output, CybookG3Output, CybookOpusOutput, KindleOutput,
|
||||||
iPadOutput, KoboReaderOutput,
|
iPadOutput, KoboReaderOutput,
|
||||||
SonyReaderLandscapeOutput, KindleDXOutput, IlliadOutput,
|
SonyReaderLandscapeOutput, KindleDXOutput, IlliadOutput,
|
||||||
IRexDR1000Output, IRexDR800Output, JetBook5Output, NookOutput,]
|
IRexDR1000Output, IRexDR800Output, JetBook5Output, NookOutput,
|
||||||
|
BambookOutput, ]
|
||||||
|
|
||||||
output_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower()))
|
output_profiles.sort(cmp=lambda x,y:cmp(x.name.lower(), y.name.lower()))
|
||||||
|
@ -25,7 +25,7 @@ class DRMError(ValueError):
|
|||||||
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
|
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
|
||||||
'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
|
'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
|
||||||
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
|
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
|
||||||
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan']
|
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan', 'snb']
|
||||||
|
|
||||||
class HTMLRenderer(object):
|
class HTMLRenderer(object):
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ _METADATA_PRIORITIES = [
|
|||||||
'html', 'htm', 'xhtml', 'xhtm',
|
'html', 'htm', 'xhtml', 'xhtm',
|
||||||
'rtf', 'fb2', 'pdf', 'prc', 'odt',
|
'rtf', 'fb2', 'pdf', 'prc', 'odt',
|
||||||
'epub', 'lit', 'lrx', 'lrf', 'mobi',
|
'epub', 'lit', 'lrx', 'lrf', 'mobi',
|
||||||
'rb', 'imp', 'azw'
|
'rb', 'imp', 'azw', 'snb'
|
||||||
]
|
]
|
||||||
|
|
||||||
# The priorities for loading metadata from different file types
|
# The priorities for loading metadata from different file types
|
||||||
|
47
src/calibre/ebooks/metadata/snb.py
Executable file
47
src/calibre/ebooks/metadata/snb.py
Executable file
@ -0,0 +1,47 @@
|
|||||||
|
'''Read meta information from SNB files'''
|
||||||
|
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
|
||||||
|
|
||||||
|
import os
|
||||||
|
from StringIO import StringIO
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
from calibre.ebooks.snb.snbfile import SNBFile
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
def get_metadata(stream, extract_cover=True):
|
||||||
|
""" Return metadata as a L{MetaInfo} object """
|
||||||
|
mi = MetaInformation(_('Unknown'), [_('Unknown')])
|
||||||
|
snbFile = SNBFile()
|
||||||
|
|
||||||
|
try:
|
||||||
|
if not hasattr(stream, 'write'):
|
||||||
|
snbFile.Parse(StringIO(stream), True)
|
||||||
|
else:
|
||||||
|
stream.seek(0)
|
||||||
|
snbFile.Parse(stream, True)
|
||||||
|
|
||||||
|
meta = snbFile.GetFileStream('snbf/book.snbf')
|
||||||
|
|
||||||
|
if meta != None:
|
||||||
|
meta = etree.fromstring(meta)
|
||||||
|
mi.title = meta.find('.//head/name').text
|
||||||
|
mi.authors = [meta.find('.//head/author').text]
|
||||||
|
mi.language = meta.find('.//head/language').text.lower().replace('_', '-')
|
||||||
|
mi.publisher = meta.find('.//head/publisher').text
|
||||||
|
|
||||||
|
if extract_cover:
|
||||||
|
cover = meta.find('.//head/cover')
|
||||||
|
if cover != None and cover.text != None:
|
||||||
|
root, ext = os.path.splitext(cover.text)
|
||||||
|
if ext == '.jpeg':
|
||||||
|
ext = '.jpg'
|
||||||
|
mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text))
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
return mi
|
9
src/calibre/ebooks/snb/__init__.py
Normal file
9
src/calibre/ebooks/snb/__init__.py
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Used for snb output
|
||||||
|
'''
|
||||||
|
|
103
src/calibre/ebooks/snb/input.py
Executable file
103
src/calibre/ebooks/snb/input.py
Executable file
@ -0,0 +1,103 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, uuid
|
||||||
|
|
||||||
|
from calibre.customize.conversion import InputFormatPlugin
|
||||||
|
from calibre.ebooks.oeb.base import DirContainer
|
||||||
|
from calibre.ebooks.snb.snbfile import SNBFile
|
||||||
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
from calibre.utils.filenames import ascii_filename
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
|
||||||
|
|
||||||
|
def html_encode(s):
|
||||||
|
return s.replace(u'&', u'&').replace(u'<', u'<').replace(u'>', u'>').replace(u'"', u'"').replace(u"'", u''').replace(u'\n', u'<br/>').replace(u' ', u' ')
|
||||||
|
|
||||||
|
class SNBInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'SNB Input'
|
||||||
|
author = 'Li Fanxi'
|
||||||
|
description = 'Convert SNB files to OEB'
|
||||||
|
file_types = set(['snb'])
|
||||||
|
|
||||||
|
options = set([
|
||||||
|
])
|
||||||
|
|
||||||
|
def convert(self, stream, options, file_ext, log,
|
||||||
|
accelerators):
|
||||||
|
log.debug("Parsing SNB file...")
|
||||||
|
snbFile = SNBFile()
|
||||||
|
try:
|
||||||
|
snbFile.Parse(stream)
|
||||||
|
except:
|
||||||
|
raise ValueError("Invalid SNB file")
|
||||||
|
if not snbFile.IsValid():
|
||||||
|
log.debug("Invaild SNB file")
|
||||||
|
raise ValueError("Invalid SNB file")
|
||||||
|
log.debug("Handle meta data ...")
|
||||||
|
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||||
|
oeb = create_oebbook(log, None, options, self,
|
||||||
|
encoding=options.input_encoding, populate=False)
|
||||||
|
meta = snbFile.GetFileStream('snbf/book.snbf')
|
||||||
|
if meta != None:
|
||||||
|
meta = etree.fromstring(meta)
|
||||||
|
oeb.metadata.add('title', meta.find('.//head/name').text)
|
||||||
|
oeb.metadata.add('creator', meta.find('.//head/author').text, attrib={'role':'aut'})
|
||||||
|
oeb.metadata.add('language', meta.find('.//head/language').text.lower().replace('_', '-'))
|
||||||
|
oeb.metadata.add('creator', meta.find('.//head/generator').text)
|
||||||
|
oeb.metadata.add('publisher', meta.find('.//head/publisher').text)
|
||||||
|
cover = meta.find('.//head/cover')
|
||||||
|
if cover != None and cover.text != None:
|
||||||
|
oeb.guide.add('cover', 'Cover', cover.text)
|
||||||
|
|
||||||
|
bookid = str(uuid.uuid4())
|
||||||
|
oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
|
||||||
|
for ident in oeb.metadata.identifier:
|
||||||
|
if 'id' in ident.attrib:
|
||||||
|
oeb.uid = oeb.metadata.identifier[0]
|
||||||
|
break
|
||||||
|
|
||||||
|
with TemporaryDirectory('_chm2oeb', keep=True) as tdir:
|
||||||
|
log.debug('Process TOC ...')
|
||||||
|
toc = snbFile.GetFileStream('snbf/toc.snbf')
|
||||||
|
oeb.container = DirContainer(tdir, log)
|
||||||
|
if toc != None:
|
||||||
|
toc = etree.fromstring(toc)
|
||||||
|
i = 1
|
||||||
|
for ch in toc.find('.//body'):
|
||||||
|
chapterName = ch.text
|
||||||
|
chapterSrc = ch.get('src')
|
||||||
|
fname = 'ch_%d.htm' % i
|
||||||
|
data = snbFile.GetFileStream('snbc/' + chapterSrc)
|
||||||
|
if data != None:
|
||||||
|
snbc = etree.fromstring(data)
|
||||||
|
outputFile = open(os.path.join(tdir, fname), 'wb')
|
||||||
|
lines = []
|
||||||
|
for line in snbc.find('.//body'):
|
||||||
|
if line.tag == 'text':
|
||||||
|
lines.append(u'<p>%s</p>' % html_encode(line.text))
|
||||||
|
elif line.tag == 'img':
|
||||||
|
lines.append(u'<p><img src="%s" /></p>' % html_encode(line.text))
|
||||||
|
outputFile.write((HTML_TEMPLATE % (chapterName, u'\n'.join(lines))).encode('utf-8', 'replace'))
|
||||||
|
outputFile.close()
|
||||||
|
oeb.toc.add(ch.text, fname)
|
||||||
|
id, href = oeb.manifest.generate(id='html',
|
||||||
|
href=ascii_filename(fname))
|
||||||
|
item = oeb.manifest.add(id, href, 'text/html')
|
||||||
|
item.html_input_href = fname
|
||||||
|
oeb.spine.add(item, True)
|
||||||
|
i = i + 1
|
||||||
|
imageFiles = snbFile.OutputImageFiles(tdir)
|
||||||
|
for f, m in imageFiles:
|
||||||
|
id, href = oeb.manifest.generate(id='image',
|
||||||
|
href=ascii_filename(f))
|
||||||
|
item = oeb.manifest.add(id, href, m)
|
||||||
|
item.html_input_href = f
|
||||||
|
|
||||||
|
return oeb
|
||||||
|
|
265
src/calibre/ebooks/snb/output.py
Normal file
265
src/calibre/ebooks/snb/output.py
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, string
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
||||||
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
from calibre.constants import __appname__, __version__
|
||||||
|
from calibre.ebooks.snb.snbfile import SNBFile
|
||||||
|
from calibre.ebooks.snb.snbml import SNBMLizer, ProcessFileName
|
||||||
|
|
||||||
|
class SNBOutput(OutputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'SNB Output'
|
||||||
|
author = 'Li Fanxi'
|
||||||
|
file_type = 'snb'
|
||||||
|
|
||||||
|
options = set([
|
||||||
|
# OptionRecommendation(name='newline', recommended_value='system',
|
||||||
|
# level=OptionRecommendation.LOW,
|
||||||
|
# short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(),
|
||||||
|
# help=_('Type of newline to use. Options are %s. Default is \'system\'. '
|
||||||
|
# 'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. '
|
||||||
|
# 'For Mac OS X use \'unix\'. \'system\' will default to the newline '
|
||||||
|
# 'type used by this OS.') % sorted(TxtNewlines.NEWLINE_TYPES.keys())),
|
||||||
|
OptionRecommendation(name='snb_output_encoding', recommended_value='utf-8',
|
||||||
|
level=OptionRecommendation.LOW,
|
||||||
|
help=_('Specify the character encoding of the output document. ' \
|
||||||
|
'The default is utf-8. Note: This option is not honored by all ' \
|
||||||
|
'formats.')),
|
||||||
|
# OptionRecommendation(name='inline_toc',
|
||||||
|
# recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
# help=_('Add Table of Contents to beginning of the book.')),
|
||||||
|
OptionRecommendation(name='snb_max_line_length',
|
||||||
|
recommended_value=0, level=OptionRecommendation.LOW,
|
||||||
|
help=_('The maximum number of characters per line. This splits on '
|
||||||
|
'the first space before the specified value. If no space is found '
|
||||||
|
'the line will be broken at the space after and will exceed the '
|
||||||
|
'specified value. Also, there is a minimum of 25 characters. '
|
||||||
|
'Use 0 to disable line splitting.')),
|
||||||
|
# OptionRecommendation(name='force_max_line_length',
|
||||||
|
# recommended_value=False, level=OptionRecommendation.LOW,
|
||||||
|
# help=_('Force splitting on the max-line-length value when no space '
|
||||||
|
# 'is present. Also allows max-line-length to be below the minimum')),
|
||||||
|
])
|
||||||
|
|
||||||
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
||||||
|
self.opts = opts
|
||||||
|
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
|
||||||
|
try:
|
||||||
|
rasterizer = SVGRasterizer()
|
||||||
|
rasterizer(oeb_book, opts)
|
||||||
|
except Unavailable:
|
||||||
|
self.log.warn('SVG rasterizer unavailable, SVG will not be converted')
|
||||||
|
|
||||||
|
# Create temp dir
|
||||||
|
with TemporaryDirectory('_snb_output') as tdir:
|
||||||
|
# Create stub directories
|
||||||
|
snbfDir = os.path.join(tdir, 'snbf')
|
||||||
|
snbcDir = os.path.join(tdir, 'snbc')
|
||||||
|
snbiDir = os.path.join(tdir, 'snbc/images')
|
||||||
|
os.mkdir(snbfDir)
|
||||||
|
os.mkdir(snbcDir)
|
||||||
|
os.mkdir(snbiDir)
|
||||||
|
|
||||||
|
# Process Meta data
|
||||||
|
meta = oeb_book.metadata
|
||||||
|
if meta.title:
|
||||||
|
title = unicode(meta.title[0])
|
||||||
|
else:
|
||||||
|
title = ''
|
||||||
|
authors = [unicode(x) for x in meta.creator if x.role == 'aut']
|
||||||
|
if meta.publisher:
|
||||||
|
publishers = unicode(meta.publisher[0])
|
||||||
|
else:
|
||||||
|
publishers = ''
|
||||||
|
if meta.language:
|
||||||
|
lang = unicode(meta.language[0]).upper()
|
||||||
|
else:
|
||||||
|
lang = ''
|
||||||
|
if meta.description:
|
||||||
|
abstract = unicode(meta.description[0])
|
||||||
|
else:
|
||||||
|
abstract = ''
|
||||||
|
|
||||||
|
# Process Cover
|
||||||
|
g, m, s = oeb_book.guide, oeb_book.manifest, oeb_book.spine
|
||||||
|
href = None
|
||||||
|
if 'titlepage' not in g:
|
||||||
|
if 'cover' in g:
|
||||||
|
href = g['cover'].href
|
||||||
|
|
||||||
|
# Output book info file
|
||||||
|
bookInfoTree = etree.Element("book-snbf", version="1.0")
|
||||||
|
headTree = etree.SubElement(bookInfoTree, "head")
|
||||||
|
etree.SubElement(headTree, "name").text = title
|
||||||
|
etree.SubElement(headTree, "author").text = ' '.join(authors)
|
||||||
|
etree.SubElement(headTree, "language").text = lang
|
||||||
|
etree.SubElement(headTree, "rights")
|
||||||
|
etree.SubElement(headTree, "publisher").text = publishers
|
||||||
|
etree.SubElement(headTree, "generator").text = __appname__ + ' ' + __version__
|
||||||
|
etree.SubElement(headTree, "created")
|
||||||
|
etree.SubElement(headTree, "abstract").text = abstract
|
||||||
|
if href != None:
|
||||||
|
etree.SubElement(headTree, "cover").text = ProcessFileName(href)
|
||||||
|
else:
|
||||||
|
etree.SubElement(headTree, "cover")
|
||||||
|
bookInfoFile = open(os.path.join(snbfDir, 'book.snbf'), 'wb')
|
||||||
|
bookInfoFile.write(etree.tostring(bookInfoTree, pretty_print=True, encoding='utf-8'))
|
||||||
|
bookInfoFile.close()
|
||||||
|
|
||||||
|
# Output TOC
|
||||||
|
tocInfoTree = etree.Element("toc-snbf")
|
||||||
|
tocHead = etree.SubElement(tocInfoTree, "head")
|
||||||
|
tocBody = etree.SubElement(tocInfoTree, "body")
|
||||||
|
outputFiles = { }
|
||||||
|
if oeb_book.toc.count() == 0:
|
||||||
|
log.warn('This SNB file has no Table of Contents. '
|
||||||
|
'Creating a default TOC')
|
||||||
|
first = iter(oeb_book.spine).next()
|
||||||
|
oeb_book.toc.add(_('Start Page'), first.href)
|
||||||
|
else:
|
||||||
|
first = iter(oeb_book.spine).next()
|
||||||
|
if oeb_book.toc[0].href != first.href:
|
||||||
|
# The pages before the fist item in toc will be stored as
|
||||||
|
# "Cover Pages".
|
||||||
|
# oeb_book.toc does not support "insert", so we generate
|
||||||
|
# the tocInfoTree directly instead of modifying the toc
|
||||||
|
ch = etree.SubElement(tocBody, "chapter")
|
||||||
|
ch.set("src", ProcessFileName(first.href) + ".snbc")
|
||||||
|
ch.text = _('Cover Pages')
|
||||||
|
outputFiles[first.href] = []
|
||||||
|
outputFiles[first.href].append(("", _("Cover Pages")))
|
||||||
|
|
||||||
|
for tocitem in oeb_book.toc:
|
||||||
|
if tocitem.href.find('#') != -1:
|
||||||
|
item = string.split(tocitem.href, '#')
|
||||||
|
if len(item) != 2:
|
||||||
|
log.error('Error in TOC item: %s' % tocitem)
|
||||||
|
else:
|
||||||
|
if item[0] in outputFiles:
|
||||||
|
outputFiles[item[0]].append((item[1], tocitem.title))
|
||||||
|
else:
|
||||||
|
outputFiles[item[0]] = []
|
||||||
|
if not "" in outputFiles[item[0]]:
|
||||||
|
outputFiles[item[0]].append(("", tocitem.title + _(" (Preface)")))
|
||||||
|
ch = etree.SubElement(tocBody, "chapter")
|
||||||
|
ch.set("src", ProcessFileName(item[0]) + ".snbc")
|
||||||
|
ch.text = tocitem.title + _(" (Preface)")
|
||||||
|
outputFiles[item[0]].append((item[1], tocitem.title))
|
||||||
|
else:
|
||||||
|
if tocitem.href in outputFiles:
|
||||||
|
outputFiles[tocitem.href].append(("", tocitem.title))
|
||||||
|
else:
|
||||||
|
outputFiles[tocitem.href] = []
|
||||||
|
outputFiles[tocitem.href].append(("", tocitem.title))
|
||||||
|
ch = etree.SubElement(tocBody, "chapter")
|
||||||
|
ch.set("src", ProcessFileName(tocitem.href) + ".snbc")
|
||||||
|
ch.text = tocitem.title
|
||||||
|
|
||||||
|
|
||||||
|
etree.SubElement(tocHead, "chapters").text = '%d' % len(tocBody)
|
||||||
|
|
||||||
|
tocInfoFile = open(os.path.join(snbfDir, 'toc.snbf'), 'wb')
|
||||||
|
tocInfoFile.write(etree.tostring(tocInfoTree, pretty_print=True, encoding='utf-8'))
|
||||||
|
tocInfoFile.close()
|
||||||
|
|
||||||
|
# Output Files
|
||||||
|
oldTree = None
|
||||||
|
mergeLast = False
|
||||||
|
lastName = None
|
||||||
|
for item in s:
|
||||||
|
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_IMAGES
|
||||||
|
if m.hrefs[item.href].media_type in OEB_DOCS:
|
||||||
|
if not item.href in outputFiles:
|
||||||
|
log.debug('File %s is unused in TOC. Continue in last chapter' % item.href)
|
||||||
|
mergeLast = True
|
||||||
|
else:
|
||||||
|
if oldTree != None and mergeLast:
|
||||||
|
log.debug('Output the modified chapter again: %s' % lastName)
|
||||||
|
outputFile = open(os.path.join(snbcDir, lastName), 'wb')
|
||||||
|
outputFile.write(etree.tostring(oldTree, pretty_print=True, encoding='utf-8'))
|
||||||
|
outputFile.close()
|
||||||
|
mergeLast = False
|
||||||
|
|
||||||
|
log.debug('Converting %s to snbc...' % item.href)
|
||||||
|
snbwriter = SNBMLizer(log)
|
||||||
|
snbcTrees = None
|
||||||
|
if not mergeLast:
|
||||||
|
snbcTrees = snbwriter.extract_content(oeb_book, item, outputFiles[item.href], opts)
|
||||||
|
for subName in snbcTrees:
|
||||||
|
postfix = ''
|
||||||
|
if subName != '':
|
||||||
|
postfix = '_' + subName
|
||||||
|
lastName = ProcessFileName(item.href + postfix + ".snbc")
|
||||||
|
oldTree = snbcTrees[subName]
|
||||||
|
outputFile = open(os.path.join(snbcDir, lastName), 'wb')
|
||||||
|
outputFile.write(etree.tostring(oldTree, pretty_print=True, encoding='utf-8'))
|
||||||
|
outputFile.close()
|
||||||
|
else:
|
||||||
|
log.debug('Merge %s with last TOC item...' % item.href)
|
||||||
|
snbwriter.merge_content(oldTree, oeb_book, item, [('', _("Start"))], opts)
|
||||||
|
|
||||||
|
# Output the last one if needed
|
||||||
|
log.debug('Output the last modified chapter again: %s' % lastName)
|
||||||
|
if oldTree != None and mergeLast:
|
||||||
|
outputFile = open(os.path.join(snbcDir, lastName), 'wb')
|
||||||
|
outputFile.write(etree.tostring(oldTree, pretty_print=True, encoding='utf-8'))
|
||||||
|
outputFile.close()
|
||||||
|
mergeLast = False
|
||||||
|
|
||||||
|
for item in m:
|
||||||
|
if m.hrefs[item.href].media_type in OEB_IMAGES:
|
||||||
|
log.debug('Converting image: %s ...' % item.href)
|
||||||
|
content = m.hrefs[item.href].data
|
||||||
|
# Convert & Resize image
|
||||||
|
self.HandleImage(content, os.path.join(snbiDir, ProcessFileName(item.href)))
|
||||||
|
|
||||||
|
# Package as SNB File
|
||||||
|
snbFile = SNBFile()
|
||||||
|
snbFile.FromDir(tdir)
|
||||||
|
snbFile.Output(output_path)
|
||||||
|
|
||||||
|
def HandleImage(self, imageData, imagePath):
|
||||||
|
from calibre.utils.magick import Image
|
||||||
|
img = Image()
|
||||||
|
img.load(imageData)
|
||||||
|
(x,y) = img.size
|
||||||
|
if self.opts:
|
||||||
|
SCREEN_Y, SCREEN_X = self.opts.output_profile.comic_screen_size
|
||||||
|
else:
|
||||||
|
SCREEN_X = 540
|
||||||
|
SCREEN_Y = 700
|
||||||
|
# Handle big image only
|
||||||
|
if x > SCREEN_X or y > SCREEN_Y:
|
||||||
|
xScale = float(x) / SCREEN_X
|
||||||
|
yScale = float(y) / SCREEN_Y
|
||||||
|
scale = max(xScale, yScale)
|
||||||
|
# TODO : intelligent image rotation
|
||||||
|
# img = img.rotate(90)
|
||||||
|
# x,y = y,x
|
||||||
|
img.size = (x / scale, y / scale)
|
||||||
|
img.save(imagePath)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
from calibre.ebooks.oeb.reader import OEBReader
|
||||||
|
from calibre.ebooks.oeb.base import OEBBook
|
||||||
|
from calibre.ebooks.conversion.preprocess import HTMLPreProcessor
|
||||||
|
from calibre.customize.profiles import HanlinV3Output
|
||||||
|
class OptionValues(object):
|
||||||
|
pass
|
||||||
|
|
||||||
|
opts = OptionValues()
|
||||||
|
opts.output_profile = HanlinV3Output(None)
|
||||||
|
|
||||||
|
html_preprocessor = HTMLPreProcessor(None, None, opts)
|
||||||
|
from calibre.utils.logging import default_log
|
||||||
|
oeb = OEBBook(default_log, html_preprocessor)
|
||||||
|
reader = OEBReader
|
||||||
|
reader()(oeb, '/tmp/bbb/processed/')
|
||||||
|
SNBOutput(None).convert(oeb, '/tmp/test.snb', None, None, default_log);
|
319
src/calibre/ebooks/snb/snbfile.py
Normal file
319
src/calibre/ebooks/snb/snbfile.py
Normal file
@ -0,0 +1,319 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import sys, struct, zlib, bz2, os
|
||||||
|
from mimetypes import types_map
|
||||||
|
|
||||||
|
class FileStream:
|
||||||
|
def IsBinary(self):
|
||||||
|
return self.attr & 0x41000000 != 0x41000000
|
||||||
|
|
||||||
|
def compareFileStream(file1, file2):
|
||||||
|
return cmp(file1.fileName, file2.fileName)
|
||||||
|
|
||||||
|
class BlockData:
|
||||||
|
pass
|
||||||
|
|
||||||
|
class SNBFile:
|
||||||
|
|
||||||
|
MAGIC = 'SNBP000B'
|
||||||
|
REV80 = 0x00008000
|
||||||
|
REVA3 = 0x00A3A3A3
|
||||||
|
REVZ1 = 0x00000000
|
||||||
|
REVZ2 = 0x00000000
|
||||||
|
|
||||||
|
def __init__(self, inputFile = None):
|
||||||
|
self.files = []
|
||||||
|
self.blocks = []
|
||||||
|
|
||||||
|
if inputFile != None:
|
||||||
|
self.Open(inputFile)
|
||||||
|
|
||||||
|
def Open(self, inputFile):
|
||||||
|
self.fileName = inputFile
|
||||||
|
|
||||||
|
snbFile = open(self.fileName, "rb")
|
||||||
|
snbFile.seek(0)
|
||||||
|
self.Parse(snbFile)
|
||||||
|
snbFile.close()
|
||||||
|
|
||||||
|
def Parse(self, snbFile, metaOnly = False):
|
||||||
|
# Read header
|
||||||
|
vmbr = snbFile.read(44)
|
||||||
|
(self.magic, self.rev80, self.revA3, self.revZ1,
|
||||||
|
self.fileCount, self.vfatSize, self.vfatCompressed,
|
||||||
|
self.binStreamSize, self.plainStreamSizeUncompressed,
|
||||||
|
self.revZ2) = struct.unpack('>8siiiiiiiii', vmbr)
|
||||||
|
|
||||||
|
# Read FAT
|
||||||
|
self.vfat = zlib.decompress(snbFile.read(self.vfatCompressed))
|
||||||
|
self.ParseFile(self.vfat, self.fileCount)
|
||||||
|
|
||||||
|
# Read tail
|
||||||
|
snbFile.seek(-16, os.SEEK_END)
|
||||||
|
#plainStreamEnd = snbFile.tell()
|
||||||
|
tailblock = snbFile.read(16)
|
||||||
|
(self.tailSize, self.tailOffset, self.tailMagic) = struct.unpack('>ii8s', tailblock)
|
||||||
|
snbFile.seek(self.tailOffset)
|
||||||
|
self.vTailUncompressed = zlib.decompress(snbFile.read(self.tailSize))
|
||||||
|
self.tailSizeUncompressed = len(self.vTailUncompressed)
|
||||||
|
self.ParseTail(self.vTailUncompressed, self.fileCount)
|
||||||
|
|
||||||
|
# Uncompress file data
|
||||||
|
# Read files
|
||||||
|
binPos = 0
|
||||||
|
plainPos = 0
|
||||||
|
uncompressedData = None
|
||||||
|
for f in self.files:
|
||||||
|
if f.attr & 0x41000000 == 0x41000000:
|
||||||
|
# Compressed Files
|
||||||
|
if uncompressedData == None:
|
||||||
|
uncompressedData = ""
|
||||||
|
for i in range(self.plainBlock):
|
||||||
|
bzdc = bz2.BZ2Decompressor()
|
||||||
|
if (i < self.plainBlock - 1):
|
||||||
|
bSize = self.blocks[self.binBlock + i + 1].Offset - self.blocks[self.binBlock + i].Offset;
|
||||||
|
else:
|
||||||
|
bSize = self.tailOffset - self.blocks[self.binBlock + i].Offset;
|
||||||
|
snbFile.seek(self.blocks[self.binBlock + i].Offset);
|
||||||
|
try:
|
||||||
|
data = snbFile.read(bSize)
|
||||||
|
uncompressedData += bzdc.decompress(data)
|
||||||
|
except Exception, e:
|
||||||
|
print e
|
||||||
|
f.fileBody = uncompressedData[plainPos:plainPos+f.fileSize]
|
||||||
|
plainPos += f.fileSize
|
||||||
|
elif f.attr & 0x01000000 == 0x01000000:
|
||||||
|
# Binary Files
|
||||||
|
snbFile.seek(44 + self.vfatCompressed + binPos)
|
||||||
|
f.fileBody = snbFile.read(f.fileSize)
|
||||||
|
binPos += f.fileSize
|
||||||
|
else:
|
||||||
|
print f.attr, f.fileName
|
||||||
|
raise Exception("Invalid file")
|
||||||
|
|
||||||
|
def ParseFile(self, vfat, fileCount):
|
||||||
|
fileNames = vfat[fileCount*12:].split('\0');
|
||||||
|
for i in range(fileCount):
|
||||||
|
f = FileStream()
|
||||||
|
(f.attr, f.fileNameOffset, f.fileSize) = struct.unpack('>iii', vfat[i * 12 : (i+1)*12])
|
||||||
|
f.fileName = fileNames[i]
|
||||||
|
self.files.append(f)
|
||||||
|
|
||||||
|
def ParseTail(self, vtail, fileCount):
|
||||||
|
self.binBlock = (self.binStreamSize + 0x8000 - 1) / 0x8000;
|
||||||
|
self.plainBlock = (self.plainStreamSizeUncompressed + 0x8000 - 1) / 0x8000;
|
||||||
|
for i in range(self.binBlock + self.plainBlock):
|
||||||
|
block = BlockData()
|
||||||
|
(block.Offset,) = struct.unpack('>i', vtail[i * 4 : (i+1) * 4])
|
||||||
|
self.blocks.append(block)
|
||||||
|
for i in range(fileCount):
|
||||||
|
(self.files[i].blockIndex, self.files[i].contentOffset) = struct.unpack('>ii', vtail[(self.binBlock + self.plainBlock) * 4 + i * 8 : (self.binBlock + self.plainBlock) * 4 + (i+1) * 8])
|
||||||
|
|
||||||
|
def IsValid(self):
|
||||||
|
if self.magic != SNBFile.MAGIC:
|
||||||
|
return False
|
||||||
|
if self.rev80 != SNBFile.REV80:
|
||||||
|
return False
|
||||||
|
if self.revA3 != SNBFile.REVA3:
|
||||||
|
return False
|
||||||
|
if self.revZ1 != SNBFile.REVZ1:
|
||||||
|
return False
|
||||||
|
if self.revZ2 != SNBFile.REVZ2:
|
||||||
|
return False
|
||||||
|
if self.vfatSize != len(self.vfat):
|
||||||
|
return False
|
||||||
|
if self.fileCount != len(self.files):
|
||||||
|
return False
|
||||||
|
if (self.binBlock + self.plainBlock) * 4 + self.fileCount * 8 != self.tailSizeUncompressed:
|
||||||
|
return False
|
||||||
|
if self.tailMagic != SNBFile.MAGIC:
|
||||||
|
print self.tailMagic
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def FromDir(self, tdir):
|
||||||
|
for root, dirs, files in os.walk(tdir):
|
||||||
|
for name in files:
|
||||||
|
p, ext = os.path.splitext(name)
|
||||||
|
if ext in [ ".snbf", ".snbc" ]:
|
||||||
|
self.AppendPlain(os.path.relpath(os.path.join(root, name), tdir), tdir)
|
||||||
|
else:
|
||||||
|
self.AppendBinary(os.path.relpath(os.path.join(root, name), tdir), tdir)
|
||||||
|
|
||||||
|
def AppendPlain(self, fileName, tdir):
|
||||||
|
f = FileStream()
|
||||||
|
f.attr = 0x41000000
|
||||||
|
f.fileSize = os.path.getsize(os.path.join(tdir,fileName))
|
||||||
|
f.fileBody = open(os.path.join(tdir,fileName), 'rb').read()
|
||||||
|
f.fileName = fileName.replace(os.sep, '/')
|
||||||
|
self.files.append(f)
|
||||||
|
|
||||||
|
def AppendBinary(self, fileName, tdir):
|
||||||
|
f = FileStream()
|
||||||
|
f.attr = 0x01000000
|
||||||
|
f.fileSize = os.path.getsize(os.path.join(tdir,fileName))
|
||||||
|
f.fileBody = open(os.path.join(tdir,fileName), 'rb').read()
|
||||||
|
f.fileName = fileName.replace(os.sep, '/')
|
||||||
|
self.files.append(f)
|
||||||
|
|
||||||
|
def GetFileStream(self, fileName):
|
||||||
|
for file in self.files:
|
||||||
|
if file.fileName == fileName:
|
||||||
|
return file.fileBody
|
||||||
|
return None
|
||||||
|
|
||||||
|
def OutputImageFiles(self, path):
|
||||||
|
fileNames = []
|
||||||
|
for f in self.files:
|
||||||
|
fname = os.path.basename(f.fileName)
|
||||||
|
root, ext = os.path.splitext(fname)
|
||||||
|
if ext in [ '.jpeg', '.jpg', '.gif', '.svg', '.png' ]:
|
||||||
|
file = open(os.path.join(path, fname), 'wb')
|
||||||
|
file.write(f.fileBody)
|
||||||
|
file.close()
|
||||||
|
fileNames.append((fname, types_map[ext]))
|
||||||
|
return fileNames
|
||||||
|
|
||||||
|
def Output(self, outputFile):
|
||||||
|
|
||||||
|
# Sort the files in file buffer,
|
||||||
|
# requried by the SNB file format
|
||||||
|
self.files.sort(compareFileStream)
|
||||||
|
|
||||||
|
outputFile = open(outputFile, 'wb')
|
||||||
|
# File header part 1
|
||||||
|
vmbrp1 = struct.pack('>8siiii', SNBFile.MAGIC, SNBFile.REV80, SNBFile.REVA3, SNBFile.REVZ1, len(self.files))
|
||||||
|
|
||||||
|
# Create VFAT & file stream
|
||||||
|
vfat = ''
|
||||||
|
fileNameTable = ''
|
||||||
|
plainStream = ''
|
||||||
|
binStream = ''
|
||||||
|
for f in self.files:
|
||||||
|
vfat += struct.pack('>iii', f.attr, len(fileNameTable), f.fileSize);
|
||||||
|
fileNameTable += (f.fileName + '\0')
|
||||||
|
|
||||||
|
if f.attr & 0x41000000 == 0x41000000:
|
||||||
|
# Plain Files
|
||||||
|
f.contentOffset = len(plainStream)
|
||||||
|
plainStream += f.fileBody
|
||||||
|
elif f.attr & 0x01000000 == 0x01000000:
|
||||||
|
# Binary Files
|
||||||
|
f.contentOffset = len(binStream)
|
||||||
|
binStream += f.fileBody
|
||||||
|
else:
|
||||||
|
print f.attr, f.fileName
|
||||||
|
raise Exception("Unknown file type")
|
||||||
|
vfatCompressed = zlib.compress(vfat+fileNameTable)
|
||||||
|
|
||||||
|
# File header part 2
|
||||||
|
vmbrp2 = struct.pack('>iiiii', len(vfat+fileNameTable), len(vfatCompressed), len(binStream), len(plainStream), SNBFile.REVZ2)
|
||||||
|
# Write header
|
||||||
|
outputFile.write(vmbrp1 + vmbrp2)
|
||||||
|
# Write vfat
|
||||||
|
outputFile.write(vfatCompressed)
|
||||||
|
|
||||||
|
# Generate block information
|
||||||
|
binBlockOffset = 0x2C + len(vfatCompressed)
|
||||||
|
plainBlockOffset = binBlockOffset + len(binStream)
|
||||||
|
|
||||||
|
binBlock = (len(binStream) + 0x8000 - 1) / 0x8000
|
||||||
|
#plainBlock = (len(plainStream) + 0x8000 - 1) / 0x8000
|
||||||
|
|
||||||
|
offset = 0
|
||||||
|
tailBlock = ''
|
||||||
|
for i in range(binBlock):
|
||||||
|
tailBlock += struct.pack('>i', binBlockOffset + offset)
|
||||||
|
offset += 0x8000;
|
||||||
|
tailRec = ''
|
||||||
|
for f in self.files:
|
||||||
|
t = 0
|
||||||
|
if f.IsBinary():
|
||||||
|
t = 0
|
||||||
|
else:
|
||||||
|
t = binBlock
|
||||||
|
tailRec += struct.pack('>ii', f.contentOffset / 0x8000 + t, f.contentOffset % 0x8000);
|
||||||
|
|
||||||
|
# Write binary stream
|
||||||
|
outputFile.write(binStream)
|
||||||
|
|
||||||
|
# Write plain stream
|
||||||
|
pos = 0
|
||||||
|
offset = 0
|
||||||
|
while pos < len(plainStream):
|
||||||
|
tailBlock += struct.pack('>i', plainBlockOffset + offset);
|
||||||
|
block = plainStream[pos:pos+0x8000];
|
||||||
|
compressed = bz2.compress(block)
|
||||||
|
outputFile.write(compressed)
|
||||||
|
offset += len(compressed)
|
||||||
|
pos += 0x8000
|
||||||
|
|
||||||
|
# Write tail block
|
||||||
|
compressedTail = zlib.compress(tailBlock + tailRec)
|
||||||
|
outputFile.write(compressedTail)
|
||||||
|
|
||||||
|
# Write tail pointer
|
||||||
|
veom = struct.pack('>ii', len(compressedTail), plainBlockOffset + offset)
|
||||||
|
outputFile.write(veom)
|
||||||
|
|
||||||
|
# Write file end mark
|
||||||
|
outputFile.write(SNBFile.MAGIC);
|
||||||
|
|
||||||
|
# Close
|
||||||
|
outputFile.close()
|
||||||
|
return
|
||||||
|
|
||||||
|
def Dump(self):
|
||||||
|
if self.fileName:
|
||||||
|
print "File Name:\t", self.fileName
|
||||||
|
print "File Count:\t", self.fileCount
|
||||||
|
print "VFAT Size(Compressed):\t%d(%d)" % (self.vfatSize, self.vfatCompressed)
|
||||||
|
print "Binary Stream Size:\t", self.binStreamSize
|
||||||
|
print "Plain Stream Uncompressed Size:\t", self.plainStreamSizeUncompressed
|
||||||
|
print "Binary Block Count:\t", self.binBlock
|
||||||
|
print "Plain Block Count:\t", self.plainBlock
|
||||||
|
for i in range(self.fileCount):
|
||||||
|
print "File ", i
|
||||||
|
f = self.files[i]
|
||||||
|
print "File Name: ", f.fileName
|
||||||
|
print "File Attr: ", f.attr
|
||||||
|
print "File Size: ", f.fileSize
|
||||||
|
print "Block Index: ", f.blockIndex
|
||||||
|
print "Content Offset: ", f.contentOffset
|
||||||
|
tempFile = open("/tmp/" + f.fileName, 'wb')
|
||||||
|
tempFile.write(f.fileBody)
|
||||||
|
tempFile.close()
|
||||||
|
|
||||||
|
def usage():
|
||||||
|
print "This unit test is for INTERNAL usage only!"
|
||||||
|
print "This unit test accept two parameters."
|
||||||
|
print "python snbfile.py <INPUTFILE> <DESTFILE>"
|
||||||
|
print "The input file will be extracted and write to dest file. "
|
||||||
|
print "Meta data of the file will be shown during this process."
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if len(sys.argv) != 3:
|
||||||
|
usage()
|
||||||
|
sys.exit(0)
|
||||||
|
inputFile = sys.argv[1]
|
||||||
|
outputFile = sys.argv[2]
|
||||||
|
|
||||||
|
print "Input file: ", inputFile
|
||||||
|
print "Output file: ", outputFile
|
||||||
|
|
||||||
|
snbFile = SNBFile(inputFile)
|
||||||
|
if snbFile.IsValid():
|
||||||
|
snbFile.Dump()
|
||||||
|
snbFile.Output(outputFile)
|
||||||
|
else:
|
||||||
|
print "The input file is invalid."
|
||||||
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
"""SNB file unit test"""
|
||||||
|
sys.exit(main())
|
263
src/calibre/ebooks/snb/snbml.py
Normal file
263
src/calibre/ebooks/snb/snbml.py
Normal file
@ -0,0 +1,263 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Transform OEB content into SNB format
|
||||||
|
'''
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
|
||||||
|
from calibre.ebooks.oeb.stylizer import Stylizer
|
||||||
|
|
||||||
|
def ProcessFileName(fileName):
|
||||||
|
# Flat the path
|
||||||
|
fileName = fileName.replace("/", "_").replace(os.sep, "_")
|
||||||
|
# Handle bookmark for HTML file
|
||||||
|
fileName = fileName.replace("#", "_")
|
||||||
|
# Make it lower case
|
||||||
|
fileName = fileName.lower()
|
||||||
|
# Change all images to jpg
|
||||||
|
root, ext = os.path.splitext(fileName)
|
||||||
|
if ext in [ '.jpeg', '.jpg', '.gif', '.svg', '.png' ]:
|
||||||
|
fileName = root + '.jpg'
|
||||||
|
return fileName
|
||||||
|
|
||||||
|
|
||||||
|
BLOCK_TAGS = [
|
||||||
|
'div',
|
||||||
|
'p',
|
||||||
|
'h1',
|
||||||
|
'h2',
|
||||||
|
'h3',
|
||||||
|
'h4',
|
||||||
|
'h5',
|
||||||
|
'h6',
|
||||||
|
'li',
|
||||||
|
'tr',
|
||||||
|
]
|
||||||
|
|
||||||
|
BLOCK_STYLES = [
|
||||||
|
'block',
|
||||||
|
]
|
||||||
|
|
||||||
|
SPACE_TAGS = [
|
||||||
|
'td',
|
||||||
|
]
|
||||||
|
|
||||||
|
CALIBRE_SNB_IMG_TAG = "<$$calibre_snb_temp_img$$>"
|
||||||
|
CALIBRE_SNB_BM_TAG = "<$$calibre_snb_bm_tag$$>"
|
||||||
|
CALIBRE_SNB_PRE_TAG = "<$$calibre_snb_pre_tag$$>"
|
||||||
|
|
||||||
|
class SNBMLizer(object):
|
||||||
|
|
||||||
|
curSubItem = ""
|
||||||
|
# curText = [ ]
|
||||||
|
|
||||||
|
def __init__(self, log):
|
||||||
|
self.log = log
|
||||||
|
|
||||||
|
def extract_content(self, oeb_book, item, subitems, opts):
|
||||||
|
self.log.info('Converting XHTML to SNBC...')
|
||||||
|
self.oeb_book = oeb_book
|
||||||
|
self.opts = opts
|
||||||
|
self.item = item
|
||||||
|
self.subitems = subitems
|
||||||
|
return self.mlize();
|
||||||
|
|
||||||
|
def merge_content(self, old_tree, oeb_book, item, subitems, opts):
|
||||||
|
newTrees = self.extract_content(oeb_book, item, subitems, opts)
|
||||||
|
body = old_tree.find(".//body")
|
||||||
|
if body != None:
|
||||||
|
for subName in newTrees:
|
||||||
|
newbody = newTrees[subName].find(".//body")
|
||||||
|
for entity in newbody:
|
||||||
|
body.append(entity)
|
||||||
|
|
||||||
|
def mlize(self):
|
||||||
|
output = [ u'' ]
|
||||||
|
stylizer = Stylizer(self.item.data, self.item.href, self.oeb_book, self.opts, self.opts.output_profile)
|
||||||
|
content = unicode(etree.tostring(self.item.data.find(XHTML('body')), encoding=unicode))
|
||||||
|
# content = self.remove_newlines(content)
|
||||||
|
trees = { }
|
||||||
|
for subitem, subtitle in self.subitems:
|
||||||
|
snbcTree = etree.Element("snbc")
|
||||||
|
etree.SubElement(etree.SubElement(snbcTree, "head"), "title").text = subtitle
|
||||||
|
etree.SubElement(snbcTree, "body")
|
||||||
|
trees[subitem] = snbcTree
|
||||||
|
output.append(u'%s%s\n\n' % (CALIBRE_SNB_BM_TAG, ""))
|
||||||
|
output += self.dump_text(self.subitems, etree.fromstring(content), stylizer)[0]
|
||||||
|
output = self.cleanup_text(u''.join(output))
|
||||||
|
|
||||||
|
subitem = ''
|
||||||
|
for line in output.splitlines():
|
||||||
|
if not line.find(CALIBRE_SNB_PRE_TAG) == 0:
|
||||||
|
line = line.strip(u' \t\n\r\u3000')
|
||||||
|
else:
|
||||||
|
etree.SubElement(trees[subitem].find(".//body"), "text").text = \
|
||||||
|
etree.CDATA(line[len(CALIBRE_SNB_PRE_TAG):])
|
||||||
|
continue
|
||||||
|
if len(line) != 0:
|
||||||
|
if line.find(CALIBRE_SNB_IMG_TAG) == 0:
|
||||||
|
prefix = ProcessFileName(os.path.dirname(self.item.href))
|
||||||
|
if prefix != '':
|
||||||
|
etree.SubElement(trees[subitem].find(".//body"), "img").text = \
|
||||||
|
prefix + '_' + line[len(CALIBRE_SNB_IMG_TAG):]
|
||||||
|
else:
|
||||||
|
etree.SubElement(trees[subitem].find(".//body"), "img").text = \
|
||||||
|
line[len(CALIBRE_SNB_IMG_TAG):]
|
||||||
|
elif line.find(CALIBRE_SNB_BM_TAG) == 0:
|
||||||
|
subitem = line[len(CALIBRE_SNB_BM_TAG):]
|
||||||
|
else:
|
||||||
|
etree.SubElement(trees[subitem].find(".//body"), "text").text = \
|
||||||
|
etree.CDATA(unicode(u'\u3000\u3000' + line))
|
||||||
|
return trees
|
||||||
|
|
||||||
|
def remove_newlines(self, text):
|
||||||
|
self.log.debug('\tRemove newlines for processing...')
|
||||||
|
text = text.replace('\r\n', ' ')
|
||||||
|
text = text.replace('\n', ' ')
|
||||||
|
text = text.replace('\r', ' ')
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
def cleanup_text(self, text):
|
||||||
|
self.log.debug('\tClean up text...')
|
||||||
|
# Replace bad characters.
|
||||||
|
text = text.replace(u'\xc2', '')
|
||||||
|
text = text.replace(u'\xa0', ' ')
|
||||||
|
text = text.replace(u'\xa9', '(C)')
|
||||||
|
|
||||||
|
# Replace tabs, vertical tags and form feeds with single space.
|
||||||
|
text = text.replace('\t+', ' ')
|
||||||
|
text = text.replace('\v+', ' ')
|
||||||
|
text = text.replace('\f+', ' ')
|
||||||
|
|
||||||
|
# Single line paragraph.
|
||||||
|
text = re.sub('(?<=.)%s(?=.)' % os.linesep, ' ', text)
|
||||||
|
|
||||||
|
# Remove multiple spaces.
|
||||||
|
#text = re.sub('[ ]{2,}', ' ', text)
|
||||||
|
|
||||||
|
# Remove excessive newlines.
|
||||||
|
text = re.sub('\n[ ]+\n', '\n\n', text)
|
||||||
|
if self.opts.remove_paragraph_spacing:
|
||||||
|
text = re.sub('\n{2,}', '\n', text)
|
||||||
|
text = re.sub('(?imu)^(?=.)', '\t', text)
|
||||||
|
else:
|
||||||
|
text = re.sub('\n{3,}', '\n\n', text)
|
||||||
|
|
||||||
|
# Replace spaces at the beginning and end of lines
|
||||||
|
text = re.sub('(?imu)^[ ]+', '', text)
|
||||||
|
text = re.sub('(?imu)[ ]+$', '', text)
|
||||||
|
|
||||||
|
if self.opts.snb_max_line_length:
|
||||||
|
max_length = self.opts.snb_max_line_length
|
||||||
|
if self.opts.max_line_length < 25:# and not self.opts.force_max_line_length:
|
||||||
|
max_length = 25
|
||||||
|
short_lines = []
|
||||||
|
lines = text.splitlines()
|
||||||
|
for line in lines:
|
||||||
|
while len(line) > max_length:
|
||||||
|
space = line.rfind(' ', 0, max_length)
|
||||||
|
if space != -1:
|
||||||
|
# Space was found.
|
||||||
|
short_lines.append(line[:space])
|
||||||
|
line = line[space + 1:]
|
||||||
|
else:
|
||||||
|
# Space was not found.
|
||||||
|
if False and self.opts.force_max_line_length:
|
||||||
|
# Force breaking at max_lenght.
|
||||||
|
short_lines.append(line[:max_length])
|
||||||
|
line = line[max_length:]
|
||||||
|
else:
|
||||||
|
# Look for the first space after max_length.
|
||||||
|
space = line.find(' ', max_length, len(line))
|
||||||
|
if space != -1:
|
||||||
|
# Space was found.
|
||||||
|
short_lines.append(line[:space])
|
||||||
|
line = line[space + 1:]
|
||||||
|
else:
|
||||||
|
# No space was found cannot break line.
|
||||||
|
short_lines.append(line)
|
||||||
|
line = ''
|
||||||
|
# Add the text that was less than max_lengh to the list
|
||||||
|
short_lines.append(line)
|
||||||
|
text = '\n'.join(short_lines)
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
def dump_text(self, subitems, elem, stylizer, end='', pre=False, li = ''):
|
||||||
|
|
||||||
|
if not isinstance(elem.tag, basestring) \
|
||||||
|
or namespace(elem.tag) != XHTML_NS:
|
||||||
|
return ['']
|
||||||
|
|
||||||
|
|
||||||
|
text = ['']
|
||||||
|
style = stylizer.style(elem)
|
||||||
|
|
||||||
|
if elem.attrib.get('id') != None and elem.attrib['id'] in [ href for href, title in subitems ]:
|
||||||
|
if self.curSubItem != None and self.curSubItem != elem.attrib['id']:
|
||||||
|
self.curSubItem = elem.attrib['id']
|
||||||
|
text.append(u'\n\n%s%s\n\n' % (CALIBRE_SNB_BM_TAG, self.curSubItem))
|
||||||
|
|
||||||
|
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
||||||
|
or style['visibility'] == 'hidden':
|
||||||
|
return ['']
|
||||||
|
|
||||||
|
tag = barename(elem.tag)
|
||||||
|
in_block = False
|
||||||
|
|
||||||
|
# Are we in a paragraph block?
|
||||||
|
if tag in BLOCK_TAGS or style['display'] in BLOCK_STYLES:
|
||||||
|
in_block = True
|
||||||
|
if not end.endswith(u'\n\n') and hasattr(elem, 'text') and elem.text:
|
||||||
|
text.append(u'\n\n')
|
||||||
|
|
||||||
|
if tag in SPACE_TAGS:
|
||||||
|
if not end.endswith('u ') and hasattr(elem, 'text') and elem.text:
|
||||||
|
text.append(u' ')
|
||||||
|
|
||||||
|
if tag == 'img':
|
||||||
|
text.append(u'\n\n%s%s\n\n' % (CALIBRE_SNB_IMG_TAG, ProcessFileName(elem.attrib['src'])))
|
||||||
|
|
||||||
|
if tag == 'br':
|
||||||
|
text.append(u'\n\n')
|
||||||
|
|
||||||
|
if tag == 'li':
|
||||||
|
li = '- '
|
||||||
|
|
||||||
|
pre = (tag == 'pre' or pre)
|
||||||
|
# Process tags that contain text.
|
||||||
|
if hasattr(elem, 'text') and elem.text:
|
||||||
|
if pre:
|
||||||
|
text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG ).join((li + elem.text).splitlines()))
|
||||||
|
else:
|
||||||
|
text.append(li + elem.text)
|
||||||
|
li = ''
|
||||||
|
|
||||||
|
for item in elem:
|
||||||
|
en = u''
|
||||||
|
if len(text) >= 2:
|
||||||
|
en = text[-1][-2:]
|
||||||
|
t = self.dump_text(subitems, item, stylizer, en, pre, li)[0]
|
||||||
|
text += t
|
||||||
|
|
||||||
|
if in_block:
|
||||||
|
text.append(u'\n\n')
|
||||||
|
|
||||||
|
if hasattr(elem, 'tail') and elem.tail:
|
||||||
|
if pre:
|
||||||
|
text.append((u'\n\n%s' % CALIBRE_SNB_PRE_TAG ).join(elem.tail.splitlines()))
|
||||||
|
else:
|
||||||
|
text.append(li + elem.tail)
|
||||||
|
li = ''
|
||||||
|
|
||||||
|
return text, li
|
@ -166,6 +166,7 @@ class AddAction(InterfaceAction):
|
|||||||
(_('Topaz books'), ['tpz','azw1']),
|
(_('Topaz books'), ['tpz','azw1']),
|
||||||
(_('Text books'), ['txt', 'rtf']),
|
(_('Text books'), ['txt', 'rtf']),
|
||||||
(_('PDF Books'), ['pdf']),
|
(_('PDF Books'), ['pdf']),
|
||||||
|
(_('SNB Books'), ['snb']),
|
||||||
(_('Comics'), ['cbz', 'cbr', 'cbc']),
|
(_('Comics'), ['cbz', 'cbr', 'cbc']),
|
||||||
(_('Archives'), ['zip', 'rar']),
|
(_('Archives'), ['zip', 'rar']),
|
||||||
]
|
]
|
||||||
|
35
src/calibre/gui2/convert/snb_output.py
Normal file
35
src/calibre/gui2/convert/snb_output.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from calibre.gui2.convert.snb_output_ui import Ui_Form
|
||||||
|
from calibre.gui2.convert import Widget
|
||||||
|
|
||||||
|
newline_model = None
|
||||||
|
|
||||||
|
class PluginWidget(Widget, Ui_Form):
|
||||||
|
|
||||||
|
TITLE = _('SNB Output')
|
||||||
|
HELP = _('Options specific to')+' SNB '+_('output')
|
||||||
|
COMMIT_NAME = 'snb_output'
|
||||||
|
ICON = I('mimetypes/snb.png')
|
||||||
|
|
||||||
|
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||||
|
Widget.__init__(self, parent,
|
||||||
|
[])
|
||||||
|
self.db, self.book_id = db, book_id
|
||||||
|
self.initialize_options(get_option, get_help, db, book_id)
|
||||||
|
|
||||||
|
# default = self.opt_newline.currentText()
|
||||||
|
|
||||||
|
# global newline_model
|
||||||
|
# if newline_model is None:
|
||||||
|
# newline_model = BasicComboModel(TxtNewlines.NEWLINE_TYPES.keys())
|
||||||
|
# self.newline_model = newline_model
|
||||||
|
# self.opt_newline.setModel(self.newline_model)
|
||||||
|
|
||||||
|
# default_index = self.opt_newline.findText(default)
|
||||||
|
# system_index = self.opt_newline.findText('system')
|
||||||
|
# self.opt_newline.setCurrentIndex(default_index if default_index != -1 else system_index if system_index != -1 else 0)
|
74
src/calibre/gui2/convert/snb_output.ui
Normal file
74
src/calibre/gui2/convert/snb_output.ui
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<ui version="4.0">
|
||||||
|
<class>Form</class>
|
||||||
|
<widget class="QWidget" name="Form">
|
||||||
|
<property name="geometry">
|
||||||
|
<rect>
|
||||||
|
<x>0</x>
|
||||||
|
<y>0</y>
|
||||||
|
<width>400</width>
|
||||||
|
<height>300</height>
|
||||||
|
</rect>
|
||||||
|
</property>
|
||||||
|
<property name="windowTitle">
|
||||||
|
<string>Form</string>
|
||||||
|
</property>
|
||||||
|
<layout class="QGridLayout" name="gridLayout">
|
||||||
|
<!-- <item row="0" column="0"> -->
|
||||||
|
<!-- <widget class="QLabel" name="label"> -->
|
||||||
|
<!-- <property name="text"> -->
|
||||||
|
<!-- <string>&Line ending style:</string> -->
|
||||||
|
<!-- </property> -->
|
||||||
|
<!-- <property name="buddy"> -->
|
||||||
|
<!-- <cstring>opt_newline</cstring> -->
|
||||||
|
<!-- </property> -->
|
||||||
|
<!-- </widget> -->
|
||||||
|
<!-- </item> -->
|
||||||
|
<!-- <item row="0" column="1"> -->
|
||||||
|
<!-- <widget class="QComboBox" name="opt_newline"/> -->
|
||||||
|
<!-- </item> -->
|
||||||
|
<!-- <item row="4" column="0"> -->
|
||||||
|
<!-- <spacer name="verticalSpacer"> -->
|
||||||
|
<!-- <property name="orientation"> -->
|
||||||
|
<!-- <enum>Qt::Vertical</enum> -->
|
||||||
|
<!-- </property> -->
|
||||||
|
<!-- <property name="sizeHint" stdset="0"> -->
|
||||||
|
<!-- <size> -->
|
||||||
|
<!-- <width>20</width> -->
|
||||||
|
<!-- <height>246</height> -->
|
||||||
|
<!-- </size> -->
|
||||||
|
<!-- </property> -->
|
||||||
|
<!-- </spacer> -->
|
||||||
|
<!-- </item> -->
|
||||||
|
<!-- <item row="3" column="0" colspan="2"> -->
|
||||||
|
<!-- <widget class="QCheckBox" name="opt_inline_toc"> -->
|
||||||
|
<!-- <property name="text"> -->
|
||||||
|
<!-- <string>&Inline TOC</string> -->
|
||||||
|
<!-- </property> -->
|
||||||
|
<!-- </widget> -->
|
||||||
|
<!-- </item> -->
|
||||||
|
<!-- <item row="1" column="1"> -->
|
||||||
|
<!-- <widget class="QSpinBox" name="opt_max_line_length"/> -->
|
||||||
|
<!-- </item> -->
|
||||||
|
<!-- <item row="1" column="0"> -->
|
||||||
|
<!-- <widget class="QLabel" name="label_2"> -->
|
||||||
|
<!-- <property name="text"> -->
|
||||||
|
<!-- <string>&Maximum line length:</string> -->
|
||||||
|
<!-- </property> -->
|
||||||
|
<!-- <property name="buddy"> -->
|
||||||
|
<!-- <cstring>opt_max_line_length</cstring> -->
|
||||||
|
<!-- </property> -->
|
||||||
|
<!-- </widget> -->
|
||||||
|
<!-- </item> -->
|
||||||
|
<!-- <item row="2" column="0" colspan="2"> -->
|
||||||
|
<!-- <widget class="QCheckBox" name="opt_force_max_line_length"> -->
|
||||||
|
<!-- <property name="text"> -->
|
||||||
|
<!-- <string>Force maximum line length</string> -->
|
||||||
|
<!-- </property> -->
|
||||||
|
<!-- </widget> -->
|
||||||
|
<!-- </item> -->
|
||||||
|
</layout>
|
||||||
|
</widget>
|
||||||
|
<resources/>
|
||||||
|
<connections/>
|
||||||
|
</ui>
|
@ -20,9 +20,9 @@ What formats does |app| support conversion to/from?
|
|||||||
|app| supports the conversion of many input formats to many output formats.
|
|app| supports the conversion of many input formats to many output formats.
|
||||||
It can convert every input format in the following list, to every output format.
|
It can convert every input format in the following list, to every output format.
|
||||||
|
|
||||||
*Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, LIT, LRF, MOBI, ODT, PDF, PRC**, PDB, PML, RB, RTF, TCR, TXT
|
*Input Formats:* CBZ, CBR, CBC, CHM, EPUB, FB2, HTML, LIT, LRF, MOBI, ODT, PDF, PRC**, PDB, PML, RB, RTF, SNB, TCR, TXT
|
||||||
|
|
||||||
*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, PDB, PML, RB, PDF, TCR, TXT
|
*Output Formats:* EPUB, FB2, OEB, LIT, LRF, MOBI, PDB, PML, RB, PDF, SNB, TCR, TXT
|
||||||
|
|
||||||
** PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers
|
** PRC is a generic format, |app| supports PRC files with TextRead and MOBIBook headers
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user