mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
[SNBInput][SNBMetadataReader] Add SNB input plugin and SNB Metadata Reader plugin.
This commit is contained in:
parent
6c9541723f
commit
83cf5d5f28
@ -293,6 +293,17 @@ class RTFMetadataReader(MetadataReaderPlugin):
|
|||||||
from calibre.ebooks.metadata.rtf import get_metadata
|
from calibre.ebooks.metadata.rtf import get_metadata
|
||||||
return get_metadata(stream)
|
return get_metadata(stream)
|
||||||
|
|
||||||
|
class SNBMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
|
name = 'Read SNB metadata'
|
||||||
|
file_types = set(['snb'])
|
||||||
|
description = _('Read metadata from %s files') % 'SNB'
|
||||||
|
author = 'Li Fanxi'
|
||||||
|
|
||||||
|
def get_metadata(self, stream, ftype):
|
||||||
|
from calibre.ebooks.metadata.snb import get_metadata
|
||||||
|
return get_metadata(stream)
|
||||||
|
|
||||||
class TOPAZMetadataReader(MetadataReaderPlugin):
|
class TOPAZMetadataReader(MetadataReaderPlugin):
|
||||||
|
|
||||||
name = 'Read Topaz metadata'
|
name = 'Read Topaz metadata'
|
||||||
@ -420,6 +431,7 @@ from calibre.ebooks.tcr.input import TCRInput
|
|||||||
from calibre.ebooks.txt.input import TXTInput
|
from calibre.ebooks.txt.input import TXTInput
|
||||||
from calibre.ebooks.lrf.input import LRFInput
|
from calibre.ebooks.lrf.input import LRFInput
|
||||||
from calibre.ebooks.chm.input import CHMInput
|
from calibre.ebooks.chm.input import CHMInput
|
||||||
|
from calibre.ebooks.snb.input import SNBInput
|
||||||
|
|
||||||
from calibre.ebooks.epub.output import EPUBOutput
|
from calibre.ebooks.epub.output import EPUBOutput
|
||||||
from calibre.ebooks.fb2.output import FB2Output
|
from calibre.ebooks.fb2.output import FB2Output
|
||||||
@ -496,6 +508,7 @@ plugins += [
|
|||||||
TXTInput,
|
TXTInput,
|
||||||
LRFInput,
|
LRFInput,
|
||||||
CHMInput,
|
CHMInput,
|
||||||
|
SNBInput,
|
||||||
]
|
]
|
||||||
plugins += [
|
plugins += [
|
||||||
EPUBOutput,
|
EPUBOutput,
|
||||||
|
@ -25,7 +25,7 @@ class DRMError(ValueError):
|
|||||||
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
|
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
|
||||||
'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
|
'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
|
||||||
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
|
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
|
||||||
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan']
|
'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'mbp', 'tan', 'snb']
|
||||||
|
|
||||||
class HTMLRenderer(object):
|
class HTMLRenderer(object):
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ _METADATA_PRIORITIES = [
|
|||||||
'html', 'htm', 'xhtml', 'xhtm',
|
'html', 'htm', 'xhtml', 'xhtm',
|
||||||
'rtf', 'fb2', 'pdf', 'prc', 'odt',
|
'rtf', 'fb2', 'pdf', 'prc', 'odt',
|
||||||
'epub', 'lit', 'lrx', 'lrf', 'mobi',
|
'epub', 'lit', 'lrx', 'lrf', 'mobi',
|
||||||
'rb', 'imp', 'azw'
|
'rb', 'imp', 'azw', 'snb'
|
||||||
]
|
]
|
||||||
|
|
||||||
# The priorities for loading metadata from different file types
|
# The priorities for loading metadata from different file types
|
||||||
|
47
src/calibre/ebooks/metadata/snb.py
Executable file
47
src/calibre/ebooks/metadata/snb.py
Executable file
@ -0,0 +1,47 @@
|
|||||||
|
'''Read meta information from SNB files'''
|
||||||
|
|
||||||
|
from __future__ import with_statement
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
|
||||||
|
|
||||||
|
import re, os
|
||||||
|
from StringIO import StringIO
|
||||||
|
from calibre.ebooks.metadata import MetaInformation
|
||||||
|
from calibre.ebooks.snb.snbfile import SNBFile
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
def get_metadata(stream, extract_cover=True):
|
||||||
|
""" Return metadata as a L{MetaInfo} object """
|
||||||
|
mi = MetaInformation(_('Unknown'), [_('Unknown')])
|
||||||
|
snbFile = SNBFile()
|
||||||
|
|
||||||
|
try:
|
||||||
|
if not hasattr(stream, 'write'):
|
||||||
|
snbFile.Parse(StringIO(stream), True)
|
||||||
|
else:
|
||||||
|
stream.seek(0)
|
||||||
|
snbFile.Parse(stream, True)
|
||||||
|
|
||||||
|
meta = snbFile.GetFileStream('snbf/book.snbf')
|
||||||
|
|
||||||
|
if meta != None:
|
||||||
|
meta = etree.fromstring(meta)
|
||||||
|
mi.title = meta.find('.//head/name').text
|
||||||
|
mi.authors = [meta.find('.//head/author').text]
|
||||||
|
mi.language = meta.find('.//head/language').text.lower().replace('_', '-')
|
||||||
|
mi.publisher = meta.find('.//head/publisher').text
|
||||||
|
|
||||||
|
if extract_cover:
|
||||||
|
cover = meta.find('.//head/cover')
|
||||||
|
if cover != None and cover.text != None:
|
||||||
|
root, ext = os.path.splitext(cover.text)
|
||||||
|
if ext == '.jpeg':
|
||||||
|
ext = '.jpg'
|
||||||
|
mi.cover_data = (ext[-3:], snbFile.GetFileStream('snbc/images/' + cover.text))
|
||||||
|
|
||||||
|
except Exception, e:
|
||||||
|
print e
|
||||||
|
pass
|
||||||
|
|
||||||
|
return mi
|
104
src/calibre/ebooks/snb/input.py
Executable file
104
src/calibre/ebooks/snb/input.py
Executable file
@ -0,0 +1,104 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
__license__ = 'GPL 3'
|
||||||
|
__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import os, uuid
|
||||||
|
|
||||||
|
from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
|
||||||
|
from calibre.ebooks.oeb.base import DirContainer
|
||||||
|
from calibre.ebooks.snb.snbfile import SNBFile
|
||||||
|
from calibre.ptempfile import TemporaryDirectory
|
||||||
|
from calibre.utils.filenames import ascii_filename
|
||||||
|
from calibre import prepare_string_for_xml
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
|
||||||
|
|
||||||
|
def html_encode(s):
|
||||||
|
return s.replace(u'&', u'&').replace(u'<', u'<').replace(u'>', u'>').replace(u'"', u'"').replace(u"'", u''').replace(u'\n', u'<br/>').replace(u' ', u' ')
|
||||||
|
|
||||||
|
class SNBInput(InputFormatPlugin):
|
||||||
|
|
||||||
|
name = 'SNB Input'
|
||||||
|
author = 'Li Fanxi'
|
||||||
|
description = 'Convert SNB files to OEB'
|
||||||
|
file_types = set(['snb'])
|
||||||
|
|
||||||
|
options = set([
|
||||||
|
])
|
||||||
|
|
||||||
|
def convert(self, stream, options, file_ext, log,
|
||||||
|
accelerators):
|
||||||
|
log.debug("Parsing SNB file...")
|
||||||
|
snbFile = SNBFile()
|
||||||
|
try:
|
||||||
|
snbFile.Parse(stream)
|
||||||
|
except:
|
||||||
|
raise ValueError("Invalid SNB file")
|
||||||
|
if not snbFile.IsValid():
|
||||||
|
log.debug("Invaild SNB file")
|
||||||
|
raise ValueError("Invalid SNB file")
|
||||||
|
log.debug("Handle meta data ...")
|
||||||
|
from calibre.ebooks.conversion.plumber import create_oebbook
|
||||||
|
oeb = create_oebbook(log, None, options, self,
|
||||||
|
encoding=options.input_encoding, populate=False)
|
||||||
|
meta = snbFile.GetFileStream('snbf/book.snbf')
|
||||||
|
if meta != None:
|
||||||
|
meta = etree.fromstring(meta)
|
||||||
|
oeb.metadata.add('title', meta.find('.//head/name').text)
|
||||||
|
oeb.metadata.add('creator', meta.find('.//head/author').text, attrib={'role':'aut'})
|
||||||
|
oeb.metadata.add('language', meta.find('.//head/language').text.lower().replace('_', '-'))
|
||||||
|
oeb.metadata.add('creator', meta.find('.//head/generator').text)
|
||||||
|
oeb.metadata.add('publisher', meta.find('.//head/publisher').text)
|
||||||
|
cover = meta.find('.//head/cover')
|
||||||
|
if cover != None and cover.text != None:
|
||||||
|
oeb.guide.add('cover', 'Cover', cover.text)
|
||||||
|
|
||||||
|
bookid = str(uuid.uuid4())
|
||||||
|
oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
|
||||||
|
for ident in oeb.metadata.identifier:
|
||||||
|
if 'id' in ident.attrib:
|
||||||
|
oeb.uid = oeb.metadata.identifier[0]
|
||||||
|
break
|
||||||
|
|
||||||
|
with TemporaryDirectory('_chm2oeb', keep=True) as tdir:
|
||||||
|
log.debug('Process TOC ...')
|
||||||
|
toc = snbFile.GetFileStream('snbf/toc.snbf')
|
||||||
|
oeb.container = DirContainer(tdir, log)
|
||||||
|
if toc != None:
|
||||||
|
toc = etree.fromstring(toc)
|
||||||
|
i = 1
|
||||||
|
for ch in toc.find('.//body'):
|
||||||
|
chapterName = ch.text
|
||||||
|
chapterSrc = ch.get('src')
|
||||||
|
fname = 'ch_%d.htm' % i
|
||||||
|
data = snbFile.GetFileStream('snbc/' + chapterSrc)
|
||||||
|
if data != None:
|
||||||
|
snbc = etree.fromstring(data)
|
||||||
|
outputFile = open(os.path.join(tdir, fname), 'wb')
|
||||||
|
lines = []
|
||||||
|
for line in snbc.find('.//body'):
|
||||||
|
if line.tag == 'text':
|
||||||
|
lines.append(u'<p>%s</p>' % html_encode(line.text))
|
||||||
|
elif line.tag == 'img':
|
||||||
|
lines.append(u'<p><img src="%s" /></p>' % html_encode(line.text))
|
||||||
|
outputFile.write((HTML_TEMPLATE % (chapterName, u'\n'.join(lines))).encode('utf-8', 'replace'))
|
||||||
|
outputFile.close()
|
||||||
|
oeb.toc.add(ch.text, fname)
|
||||||
|
id, href = oeb.manifest.generate(id='html',
|
||||||
|
href=ascii_filename(fname))
|
||||||
|
item = oeb.manifest.add(id, href, 'text/html')
|
||||||
|
item.html_input_href = fname
|
||||||
|
oeb.spine.add(item, True)
|
||||||
|
i = i + 1
|
||||||
|
imageFiles = snbFile.OutputImageFiles(tdir)
|
||||||
|
for f, m in imageFiles:
|
||||||
|
id, href = oeb.manifest.generate(id='image',
|
||||||
|
href=ascii_filename(f))
|
||||||
|
item = oeb.manifest.add(id, href, m)
|
||||||
|
item.html_input_href = f
|
||||||
|
|
||||||
|
return oeb
|
||||||
|
|
@ -166,6 +166,7 @@ class AddAction(InterfaceAction):
|
|||||||
(_('Topaz books'), ['tpz','azw1']),
|
(_('Topaz books'), ['tpz','azw1']),
|
||||||
(_('Text books'), ['txt', 'rtf']),
|
(_('Text books'), ['txt', 'rtf']),
|
||||||
(_('PDF Books'), ['pdf']),
|
(_('PDF Books'), ['pdf']),
|
||||||
|
(_('SNB Books'), ['snb']),
|
||||||
(_('Comics'), ['cbz', 'cbr', 'cbc']),
|
(_('Comics'), ['cbz', 'cbr', 'cbc']),
|
||||||
(_('Archives'), ['zip', 'rar']),
|
(_('Archives'), ['zip', 'rar']),
|
||||||
]
|
]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user