Support for reading metadata and adding .rb and .imp books to the ebook library (thanks to ashkulz)

This commit is contained in:
Kovid Goyal 2008-09-02 15:03:45 -07:00
commit ce3391ff9a
5 changed files with 143 additions and 5 deletions

View File

@ -16,6 +16,7 @@ class UnknownFormatError(Exception):
pass
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
'html', 'xhtml', 'epub', 'pdf', 'prc', 'mobi', 'azw',
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'oebzip']
BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'htm', 'xhtm',
'html', 'xhtml', 'epub', 'pdf', 'prc', 'mobi', 'azw',
'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'oebzip',
'rb', 'imp']

View File

@ -0,0 +1,62 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
'''Read meta information from IMP files'''
import sys, os
from calibre.ebooks.metadata import MetaInformation
MAGIC = ['\x00\x01BOOKDOUG', '\x00\x02BOOKDOUG']
def get_metadata(stream):
""" Return metadata as a L{MetaInfo} object """
title = 'Unknown'
mi = MetaInformation(title, ['Unknown'])
stream.seek(0)
try:
if stream.read(10) not in MAGIC:
print >>sys.stderr, u'Couldn\'t read IMP header from file'
return mi
def cString(skip=0):
result = ''
while 1:
data = stream.read(1)
if data == '\x00':
if not skip: return result
skip -= 1
result, data = '', ''
result += data
stream.read(38) # skip past some uninteresting headers
_, category, title, author = cString(), cString(), cString(1), cString(2)
if title:
mi.title = title
if author:
src = author.split('&')
authors = []
for au in src:
authors += au.split(',')
mi.authors = authors
mi.author = author
if category:
mi.category = category
except Exception, err:
msg = u'Couldn\'t read metadata from imp: %s with error %s'%(mi.title, unicode(err))
print >>sys.stderr, msg.encode('utf8')
return mi
def main(args=sys.argv):
if len(args) != 2:
print >>sys.stderr, _('Usage: imp-meta file.imp')
print >>sys.stderr, _('No filename specified.')
return 1
path = os.path.abspath(os.path.expanduser(args[1]))
print get_metadata(open(path, 'rb'))
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -9,6 +9,8 @@ from calibre.ebooks.metadata.fb2 import get_metadata as fb2_metadata
from calibre.ebooks.lrf.meta import get_metadata as lrf_metadata
from calibre.ebooks.metadata.pdf import get_metadata as pdf_metadata
from calibre.ebooks.metadata.lit import get_metadata as lit_metadata
from calibre.ebooks.metadata.imp import get_metadata as imp_metadata
from calibre.ebooks.metadata.rb import get_metadata as rb_metadata
from calibre.ebooks.metadata.epub import get_metadata as epub_metadata
from calibre.ebooks.metadata.html import get_metadata as html_metadata
from calibre.ebooks.mobi.reader import get_metadata as mobi_metadata
@ -22,7 +24,7 @@ from calibre.ebooks.metadata import MetaInformation
_METADATA_PRIORITIES = [
'html', 'htm', 'xhtml', 'xhtm',
'rtf', 'fb2', 'pdf', 'prc',
'epub', 'lit', 'lrf', 'mobi',
'epub', 'lit', 'lrf', 'mobi', 'rb', 'imp'
]
# The priorities for loading metadata from different file types

View File

@ -0,0 +1,68 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Ashish Kulkarni <kulkarni.ashish@gmail.com>'
'''Read meta information from RB files'''
import sys, os, struct
from calibre.ebooks.metadata import MetaInformation
MAGIC = '\xb0\x0c\xb0\x0c\x02\x00NUVO\x00\x00\x00\x00'
def get_metadata(stream):
""" Return metadata as a L{MetaInfo} object """
title = 'Unknown'
mi = MetaInformation(title, ['Unknown'])
stream.seek(0)
try:
if not stream.read(14) == MAGIC:
print >>sys.stderr, u'Couldn\'t read RB header from file'
return mi
stream.read(10)
read_i32 = lambda: struct.unpack('<I', stream.read(4))[0]
stream.seek(read_i32())
toc_count = read_i32()
for i in range(toc_count):
stream.read(32)
length, offset, flag = read_i32(), read_i32(), read_i32()
if flag == 2: break
else:
print >>sys.stderr, u'Couldn\'t find INFO from RB file'
return mi
stream.seek(offset)
info = stream.read(length).splitlines()
for line in info:
if not '=' in line:
continue
key, value = line.split('=')
if key.strip() == 'TITLE':
mi.title = value.strip()
elif key.strip() == 'AUTHOR':
src = value.split('&')
authors = []
for au in src:
authors += au.split(',')
mi.authors = authors
mi.author = value
except Exception, err:
msg = u'Couldn\'t read metadata from rb: %s with error %s'%(mi.title, unicode(err))
print >>sys.stderr, msg.encode('utf8')
raise
return mi
def main(args=sys.argv):
if len(args) != 2:
print >>sys.stderr, _('Usage: rb-meta file.rb')
print >>sys.stderr, _('No filename specified.')
return 1
path = os.path.abspath(os.path.expanduser(args[1]))
print get_metadata(open(path, 'rb'))
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -21,6 +21,8 @@ entry_points = {
'rtf-meta = calibre.ebooks.metadata.rtf:main',
'pdf-meta = calibre.ebooks.metadata.pdf:main',
'lit-meta = calibre.ebooks.metadata.lit:main',
'imp-meta = calibre.ebooks.metadata.imp:main',
'rb-meta = calibre.ebooks.metadata.rb:main',
'opf-meta = calibre.ebooks.metadata.opf:main',
'epub-meta = calibre.ebooks.metadata.epub:main',
'txt2lrf = calibre.ebooks.lrf.txt.convert_from:main',
@ -197,6 +199,8 @@ def setup_completion(fatal_errors):
f.write(opts_and_exts('rtf-meta', metaop, ['rtf']))
f.write(opts_and_exts('pdf-meta', metaop, ['pdf']))
f.write(opts_and_exts('lit-meta', metaop, ['lit']))
f.write(opts_and_exts('imp-meta', metaop, ['imp']))
f.write(opts_and_exts('rb-meta', metaop, ['rb']))
f.write(opts_and_exts('opf-meta', metaop, ['opf']))
f.write(opts_and_exts('epub-meta', epub_meta, ['epub']))
f.write(opts_and_exts('lrfviewer', lrfviewerop, ['lrf']))
@ -377,7 +381,8 @@ def install_man_pages(fatal_errors):
prog = src[:src.index('=')].strip()
if prog in ('prs500', 'pdf-meta', 'epub-meta', 'lit-meta',
'markdown-calibre', 'calibre-debug', 'fb2-meta',
'calibre-fontconfig', 'calibre-parallel'):
'calibre-fontconfig', 'calibre-parallel',
'rb-meta', 'imp-meta'):
continue
help2man = ('help2man', prog, '--name', 'part of %s'%__appname__,