[SNBInput][SNBMetadataReader] Add SNB input plugin and SNB Metadata Reader plugin.

This commit is contained in:
Li Fanxi 2010-10-14 16:59:22 +08:00
parent 6c9541723f
commit 83cf5d5f28
6 changed files with 167 additions and 2 deletions

View File

@ -293,6 +293,17 @@ class RTFMetadataReader(MetadataReaderPlugin):
from calibre.ebooks.metadata.rtf import get_metadata from calibre.ebooks.metadata.rtf import get_metadata
return get_metadata(stream) return get_metadata(stream)
class SNBMetadataReader(MetadataReaderPlugin):
name = 'Read SNB metadata'
file_types = set(['snb'])
description = _('Read metadata from %s files') % 'SNB'
author = 'Li Fanxi'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.snb import get_metadata
return get_metadata(stream)
class TOPAZMetadataReader(MetadataReaderPlugin): class TOPAZMetadataReader(MetadataReaderPlugin):
name = 'Read Topaz metadata' name = 'Read Topaz metadata'
@ -420,6 +431,7 @@ from calibre.ebooks.tcr.input import TCRInput
from calibre.ebooks.txt.input import TXTInput from calibre.ebooks.txt.input import TXTInput
from calibre.ebooks.lrf.input import LRFInput from calibre.ebooks.lrf.input import LRFInput
from calibre.ebooks.chm.input import CHMInput from calibre.ebooks.chm.input import CHMInput
from calibre.ebooks.snb.input import SNBInput
from calibre.ebooks.epub.output import EPUBOutput from calibre.ebooks.epub.output import EPUBOutput
from calibre.ebooks.fb2.output import FB2Output from calibre.ebooks.fb2.output import FB2Output
@ -496,6 +508,7 @@ plugins += [
TXTInput, TXTInput,
LRFInput, LRFInput,
CHMInput, CHMInput,
SNBInput,
] ]
plugins += [ plugins += [
EPUBOutput, EPUBOutput,

View File

@ -25,7 +25,7 @@ class DRMError(ValueError):
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm', BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc', 'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip', 'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan'] 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan', 'snb']
class HTMLRenderer(object): class HTMLRenderer(object):

View File

@ -15,7 +15,7 @@ _METADATA_PRIORITIES = [
'html', 'htm', 'xhtml', 'xhtm', 'html', 'htm', 'xhtml', 'xhtm',
'rtf', 'fb2', 'pdf', 'prc', 'odt', 'rtf', 'fb2', 'pdf', 'prc', 'odt',
'epub', 'lit', 'lrx', 'lrf', 'mobi', 'epub', 'lit', 'lrx', 'lrf', 'mobi',
'rb', 'imp', 'azw' 'rb', 'imp', 'azw', 'snb'
] ]
# The priorities for loading metadata from different file types # The priorities for loading metadata from different file types

View File

@ -0,0 +1,47 @@
'''Read meta information from SNB files'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
import re, os
from StringIO import StringIO
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.snb.snbfile import SNBFile
from lxml import etree
def get_metadata(stream, extract_cover=True):
""" Return metadata as a L{MetaInfo} object """
mi = MetaInformation(_('Unknown'), [_('Unknown')])
snbFile = SNBFile()
try:
if not hasattr(stream, 'write'):
snbFile.Parse(StringIO(stream), True)
else:
stream.seek(0)
snbFile.Parse(stream, True)
meta = snbFile.GetFileStream('snbf/book.snbf')
if meta != None:
meta = etree.fromstring(meta)
mi.title = meta.find('.//head/name').text
mi.authors = [meta.find('.//head/author').text]
mi.language = meta.find('.//head/language').text.lower().replace('_', '-')
mi.publisher = meta.find('.//head/publisher').text
if extract_cover:
cover = meta.find('.//head/cover')
if cover != None and cover.text != None:
root, ext = os.path.splitext(cover.text)
if ext == '.jpeg':
ext = '.jpg'
mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text))
except Exception, e:
print e
pass
return mi

104
src/calibre/ebooks/snb/input.py Executable file
View File

@ -0,0 +1,104 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
__docformat__ = 'restructuredtext en'
import os, uuid
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.ebooks.oeb.base import DirContainer
from calibre.ebooks.snb.snbfile import SNBFile
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.filenames import ascii_filename
from calibre import prepare_string_for_xml
from lxml import etree
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
def html_encode(s):
return s.replace(u'&', u'&amp;').replace(u'<', u'&lt;').replace(u'>', u'&gt;').replace(u'"', u'&quot;').replace(u"'", u'&apos;').replace(u'\n', u'<br/>').replace(u' ', u'&nbsp;')
class SNBInput(InputFormatPlugin):
name = 'SNB Input'
author = 'Li Fanxi'
description = 'Convert SNB files to OEB'
file_types = set(['snb'])
options = set([
])
def convert(self, stream, options, file_ext, log,
accelerators):
log.debug("Parsing SNB file...")
snbFile = SNBFile()
try:
snbFile.Parse(stream)
except:
raise ValueError("Invalid SNB file")
if not snbFile.IsValid():
log.debug("Invaild SNB file")
raise ValueError("Invalid SNB file")
log.debug("Handle meta data ...")
from calibre.ebooks.conversion.plumber import create_oebbook
oeb = create_oebbook(log, None, options, self,
encoding=options.input_encoding, populate=False)
meta = snbFile.GetFileStream('snbf/book.snbf')
if meta != None:
meta = etree.fromstring(meta)
oeb.metadata.add('title', meta.find('.//head/name').text)
oeb.metadata.add('creator', meta.find('.//head/author').text, attrib={'role':'aut'})
oeb.metadata.add('language', meta.find('.//head/language').text.lower().replace('_', '-'))
oeb.metadata.add('creator', meta.find('.//head/generator').text)
oeb.metadata.add('publisher', meta.find('.//head/publisher').text)
cover = meta.find('.//head/cover')
if cover != None and cover.text != None:
oeb.guide.add('cover', 'Cover', cover.text)
bookid = str(uuid.uuid4())
oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
for ident in oeb.metadata.identifier:
if 'id' in ident.attrib:
oeb.uid = oeb.metadata.identifier[0]
break
with TemporaryDirectory('_chm2oeb', keep=True) as tdir:
log.debug('Process TOC ...')
toc = snbFile.GetFileStream('snbf/toc.snbf')
oeb.container = DirContainer(tdir, log)
if toc != None:
toc = etree.fromstring(toc)
i = 1
for ch in toc.find('.//body'):
chapterName = ch.text
chapterSrc = ch.get('src')
fname = 'ch_%d.htm' % i
data = snbFile.GetFileStream('snbc/' + chapterSrc)
if data != None:
snbc = etree.fromstring(data)
outputFile = open(os.path.join(tdir, fname), 'wb')
lines = []
for line in snbc.find('.//body'):
if line.tag == 'text':
lines.append(u'<p>%s</p>' % html_encode(line.text))
elif line.tag == 'img':
lines.append(u'<p><img src="%s" /></p>' % html_encode(line.text))
outputFile.write((HTML_TEMPLATE % (chapterName, u'\n'.join(lines))).encode('utf-8', 'replace'))
outputFile.close()
oeb.toc.add(ch.text, fname)
id, href = oeb.manifest.generate(id='html',
href=ascii_filename(fname))
item = oeb.manifest.add(id, href, 'text/html')
item.html_input_href = fname
oeb.spine.add(item, True)
i = i + 1
imageFiles = snbFile.OutputImageFiles(tdir)
for f, m in imageFiles:
id, href = oeb.manifest.generate(id='image',
href=ascii_filename(f))
item = oeb.manifest.add(id, href, m)
item.html_input_href = f
return oeb

View File

@ -166,6 +166,7 @@ class AddAction(InterfaceAction):
(_('Topaz books'), ['tpz','azw1']), (_('Topaz books'), ['tpz','azw1']),
(_('Text books'), ['txt', 'rtf']), (_('Text books'), ['txt', 'rtf']),
(_('PDF Books'), ['pdf']), (_('PDF Books'), ['pdf']),
(_('SNB Books'), ['snb']),
(_('Comics'), ['cbz', 'cbr', 'cbc']), (_('Comics'), ['cbz', 'cbr', 'cbc']),
(_('Archives'), ['zip', 'rar']), (_('Archives'), ['zip', 'rar']),
] ]