Support for reading metadata from FB2 files thanks to Anatoly Shipitsin

This commit is contained in:
Kovid Goyal 2008-05-19 09:24:51 -07:00
commit f2db341ac1
6 changed files with 69 additions and 11 deletions

View File

@ -90,9 +90,9 @@ def option_parser(usage, gui_mode=False):
dest="author", help=_("Set the author(s). Multiple authors should be set as a comma separated list. Default: %default"),
default=_('Unknown'))
metadata.add_option("--comment", action="store", type="string", \
dest="freetext", help=_("Set the comment."), default=' ')
dest="freetext", help=_("Set the comment."), default='Unknown')
metadata.add_option("--category", action="store", type="string", \
dest="category", help=_("Set the category"), default=' ')
dest="category", help=_("Set the category"), default='Unknown')
metadata.add_option('--title-sort', action='store', default='', dest='title_sort',
help=_('Sort key for the title'))
metadata.add_option('--author-sort', action='store', default='', dest='author_sort',

View File

@ -71,7 +71,7 @@ def handle_archive(path):
files = []
cdir = traverse_subdirs(tdir)
file = None
for ext in ('lit', 'rtf', 'pdf', 'txt', 'epub', 'mobi', 'prc'):
for ext in ('lit', 'rtf', 'fb2','pdf', 'txt', 'epub', 'mobi', 'prc'):
pat = os.path.join(cdir, '*.'+ext)
files.extend(glob.glob(pat))
file = largest_file(files)

View File

@ -7,6 +7,7 @@ import os, sys, tempfile, subprocess, shutil, logging, glob
from calibre.ptempfile import PersistentTemporaryFile
from calibre.ebooks.lrf import option_parser as lrf_option_parser
from calibre.ebooks.metadata.meta import get_metadata
from calibre.ebooks import ConversionError
from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
from calibre import setup_cli_handlers, __appname__
@ -59,16 +60,29 @@ def process_file(path, options, logger=None):
logger = logging.getLogger('fb22lrf')
setup_cli_handlers(logger, level)
fb2 = os.path.abspath(os.path.expanduser(path))
f = open(fb2, 'rb')
mi = get_metadata(f, 'fb2')
f.close()
htmlfile = generate_html(fb2, options.encoding, logger)
tdir = os.path.dirname(htmlfile)
cwd = os.getcwdu()
try:
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
options.output = os.path.abspath(os.path.expanduser(options.output))
if not options.output:
ext = '.lrs' if options.lrs else '.lrf'
options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
options.output = os.path.abspath(os.path.expanduser(options.output))
if not mi.title:
mi.title = os.path.splitext(os.path.basename(rtf))[0]
if (not options.title or options.title == 'Unknown'):
options.title = mi.title
if (not options.author or options.author == 'Unknown') and mi.authors:
options.author = mi.authors.pop()
if (not options.category or options.category == 'Unknown') and mi.category:
options.category = mi.category
if (not options.freetext or options.freetext == 'Unknown') and mi.comments:
options.freetext = mi.comments
os.chdir(tdir)
html_process_file(htmlfile, options, logger)
html_process_file(htmlfile, options, logger)
finally:
os.chdir(cwd)
if hasattr(options, 'keep_intermediate_files') and options.keep_intermediate_files:

View File

@ -0,0 +1,42 @@
#!/usr/bin/env python
from __future__ import with_statement
__license__ = 'GPL v3'
__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
'''Read meta information from fb2 files'''
import sys, os
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
from calibre.ebooks.metadata import MetaInformation
def get_metadata(stream):
""" Return metadata as a L{MetaInfo} object """
soup = BeautifulStoneSoup(stream.read())
firstname = soup.find("first-name").contents[0]
lastname = soup.find("last-name").contents[0]
author= [firstname+" "+lastname]
title = soup.find("book-title").string
comments = soup.find("annotation")
if comments and len(comments) > 1:
comments = comments.p.contents[0]
series = soup.find("sequence")
series_name = series['name']
# series_index = series.index
mi = MetaInformation(title, author)
mi.comments = comments
mi.category = series_name
# mi.series_index = series_index
return mi
def main(args=sys.argv):
if len(args) != 2 or '--help' in args or '-h' in args:
print >>sys.stderr, _('Usage:'), args[0], 'mybook.fb2'
return 1
path = os.path.abspath(os.path.expanduser(args[1]))
print unicode(get_metadata(open(path, 'rb')))
return 0
if __name__ == '__main__':
sys.exit(main())

View File

@ -4,6 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, re, collections
from calibre.ebooks.metadata.rtf import get_metadata as rtf_metadata
from calibre.ebooks.metadata.fb2 import get_metadata as fb2_metadata
from calibre.ebooks.lrf.meta import get_metadata as lrf_metadata
from calibre.ebooks.metadata.pdf import get_metadata as pdf_metadata
from calibre.ebooks.metadata.lit import get_metadata as lit_metadata
@ -18,7 +19,7 @@ from calibre.ebooks.metadata import MetaInformation
_METADATA_PRIORITIES = [
'html', 'htm', 'xhtml', 'xhtm',
'rtf', 'pdf', 'prc',
'rtf', 'fb2', 'pdf', 'prc',
'epub', 'lit', 'lrf', 'mobi',
]

View File

@ -37,6 +37,7 @@ entry_points = {
'pdf2lrf = calibre.ebooks.lrf.pdf.convert_from:main',
'mobi2lrf = calibre.ebooks.lrf.mobi.convert_from:main',
'fb22lrf = calibre.ebooks.lrf.fb2.convert_from:main',
'fb2-meta = calibre.ebooks.metadata.fb2:main',
'any2lrf = calibre.ebooks.lrf.any.convert_from:main',
'lrf2lrs = calibre.ebooks.lrf.parser:main',
'lrs2lrf = calibre.ebooks.lrf.lrs.convert_from:main',
@ -45,7 +46,7 @@ entry_points = {
'librarything = calibre.ebooks.metadata.library_thing:main',
'mobi2oeb = calibre.ebooks.mobi.reader:main',
'lrf2html = calibre.ebooks.lrf.html.convert_to:main',
'calibre-debug = calibre.debug:main',
'calibre-debug = calibre.debug:main',
],
'gui_scripts' : [
__appname__+' = calibre.gui2.main:main',
@ -347,7 +348,7 @@ def install_man_pages(fatal_errors):
for src in entry_points['console_scripts']:
prog = src[:src.index('=')].strip()
if prog in ('prs500', 'pdf-meta', 'epub-meta', 'lit-meta',
'markdown-calibre', 'calibre-debug'):
'markdown-calibre', 'calibre-debug', 'fb2-meta'):
continue
help2man = ('help2man', prog, '--name', 'part of %s'%__appname__,
'--section', '1', '--no-info', '--include',