[SNBInput][SNBMetadataReader] Add SNB input plugin and SNB Metadata Reader plugin.

This commit is contained in:
Li Fanxi 2010-10-14 16:59:22 +08:00
parent 6c9541723f
commit 83cf5d5f28
6 changed files with 167 additions and 2 deletions

View File

@ -292,6 +292,17 @@ class RTFMetadataReader(MetadataReaderPlugin):
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.rtf import get_metadata
return get_metadata(stream)
class SNBMetadataReader(MetadataReaderPlugin):
name = 'Read SNB metadata'
file_types = set(['snb'])
description = _('Read metadata from %s files') % 'SNB'
author = 'Li Fanxi'
def get_metadata(self, stream, ftype):
from calibre.ebooks.metadata.snb import get_metadata
return get_metadata(stream)
class TOPAZMetadataReader(MetadataReaderPlugin):
@ -420,6 +431,7 @@ from calibre.ebooks.tcr.input import TCRInput
from calibre.ebooks.txt.input import TXTInput
from calibre.ebooks.lrf.input import LRFInput
from calibre.ebooks.chm.input import CHMInput
from calibre.ebooks.snb.input import SNBInput
from calibre.ebooks.epub.output import EPUBOutput
from calibre.ebooks.fb2.output import FB2Output
@ -496,6 +508,7 @@ plugins += [
TXTInput,
LRFInput,
CHMInput,
SNBInput,
]
plugins += [
EPUBOutput,

View File

@ -25,7 +25,7 @@ class DRMError(ValueError):
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan']
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan', 'snb']
class HTMLRenderer(object):

View File

@ -15,7 +15,7 @@ _METADATA_PRIORITIES = [
'html', 'htm', 'xhtml', 'xhtm',
'rtf', 'fb2', 'pdf', 'prc', 'odt',
'epub', 'lit', 'lrx', 'lrf', 'mobi',
'rb', 'imp', 'azw'
'rb', 'imp', 'azw', 'snb'
]
# The priorities for loading metadata from different file types

View File

@ -0,0 +1,47 @@
'''Read meta information from SNB files'''
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
import re, os
from StringIO import StringIO
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.snb.snbfile import SNBFile
from lxml import etree
def get_metadata(stream, extract_cover=True):
""" Return metadata as a L{MetaInfo} object """
mi = MetaInformation(_('Unknown'), [_('Unknown')])
snbFile = SNBFile()
try:
if not hasattr(stream, 'write'):
snbFile.Parse(StringIO(stream), True)
else:
stream.seek(0)
snbFile.Parse(stream, True)
meta = snbFile.GetFileStream('snbf/book.snbf')
if meta != None:
meta = etree.fromstring(meta)
mi.title = meta.find('.//head/name').text
mi.authors = [meta.find('.//head/author').text]
mi.language = meta.find('.//head/language').text.lower().replace('_', '-')
mi.publisher = meta.find('.//head/publisher').text
if extract_cover:
cover = meta.find('.//head/cover')
if cover != None and cover.text != None:
root, ext = os.path.splitext(cover.text)
if ext == '.jpeg':
ext = '.jpg'
mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text))
except Exception, e:
print e
pass
return mi

104
src/calibre/ebooks/snb/input.py Executable file
View File

@ -0,0 +1,104 @@
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
__docformat__ = 'restructuredtext en'
import os, uuid
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
from calibre.ebooks.oeb.base import DirContainer
from calibre.ebooks.snb.snbfile import SNBFile
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.filenames import ascii_filename
from calibre import prepare_string_for_xml
from lxml import etree
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
def html_encode(s):
return s.replace(u'&', u'&amp;').replace(u'<', u'&lt;').replace(u'>', u'&gt;').replace(u'"', u'&quot;').replace(u"'", u'&apos;').replace(u'\n', u'<br/>').replace(u' ', u'&nbsp;')
class SNBInput(InputFormatPlugin):
name = 'SNB Input'
author = 'Li Fanxi'
description = 'Convert SNB files to OEB'
file_types = set(['snb'])
options = set([
])
def convert(self, stream, options, file_ext, log,
accelerators):
log.debug("Parsing SNB file...")
snbFile = SNBFile()
try:
snbFile.Parse(stream)
except:
raise ValueError("Invalid SNB file")
if not snbFile.IsValid():
log.debug("Invaild SNB file")
raise ValueError("Invalid SNB file")
log.debug("Handle meta data ...")
from calibre.ebooks.conversion.plumber import create_oebbook
oeb = create_oebbook(log, None, options, self,
encoding=options.input_encoding, populate=False)
meta = snbFile.GetFileStream('snbf/book.snbf')
if meta != None:
meta = etree.fromstring(meta)
oeb.metadata.add('title', meta.find('.//head/name').text)
oeb.metadata.add('creator', meta.find('.//head/author').text, attrib={'role':'aut'})
oeb.metadata.add('language', meta.find('.//head/language').text.lower().replace('_', '-'))
oeb.metadata.add('creator', meta.find('.//head/generator').text)
oeb.metadata.add('publisher', meta.find('.//head/publisher').text)
cover = meta.find('.//head/cover')
if cover != None and cover.text != None:
oeb.guide.add('cover', 'Cover', cover.text)
bookid = str(uuid.uuid4())
oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
for ident in oeb.metadata.identifier:
if 'id' in ident.attrib:
oeb.uid = oeb.metadata.identifier[0]
break
with TemporaryDirectory('_chm2oeb', keep=True) as tdir:
log.debug('Process TOC ...')
toc = snbFile.GetFileStream('snbf/toc.snbf')
oeb.container = DirContainer(tdir, log)
if toc != None:
toc = etree.fromstring(toc)
i = 1
for ch in toc.find('.//body'):
chapterName = ch.text
chapterSrc = ch.get('src')
fname = 'ch_%d.htm' % i
data = snbFile.GetFileStream('snbc/' + chapterSrc)
if data != None:
snbc = etree.fromstring(data)
outputFile = open(os.path.join(tdir, fname), 'wb')
lines = []
for line in snbc.find('.//body'):
if line.tag == 'text':
lines.append(u'<p>%s</p>' % html_encode(line.text))
elif line.tag == 'img':
lines.append(u'<p><img src="%s" /></p>' % html_encode(line.text))
outputFile.write((HTML_TEMPLATE % (chapterName, u'\n'.join(lines))).encode('utf-8', 'replace'))
outputFile.close()
oeb.toc.add(ch.text, fname)
id, href = oeb.manifest.generate(id='html',
href=ascii_filename(fname))
item = oeb.manifest.add(id, href, 'text/html')
item.html_input_href = fname
oeb.spine.add(item, True)
i = i + 1
imageFiles = snbFile.OutputImageFiles(tdir)
for f, m in imageFiles:
id, href = oeb.manifest.generate(id='image',
href=ascii_filename(f))
item = oeb.manifest.add(id, href, m)
item.html_input_href = f
return oeb

View File

@ -166,6 +166,7 @@ class AddAction(InterfaceAction):
(_('Topaz books'), ['tpz','azw1']),
(_('Text books'), ['txt', 'rtf']),
(_('PDF Books'), ['pdf']),
(_('SNB Books'), ['snb']),
(_('Comics'), ['cbz', 'cbr', 'cbc']),
(_('Archives'), ['zip', 'rar']),
]