Support for reading metadata from FB2 files thanks to Anatoly Shipitsin

2025-12-06 05:05:03 -05:00 · 2008-05-19 09:24:51 -07:00 · 2008-05-19 09:24:51 -07:00 · f2db341ac1
commit f2db341ac1
parent b6b30e9576 afadf5dcac
6 changed files with 69 additions and 11 deletions
--- a/src/calibre/ebooks/lrf/init.py
+++ b/src/calibre/ebooks/lrf/init.py
@ -90,9 +90,9 @@ def option_parser(usage, gui_mode=False):
                    dest="author", help=_("Set the author(s). Multiple authors should be set as a comma separated list. Default: %default"), 
                    default=_('Unknown'))
    metadata.add_option("--comment", action="store", type="string", \
-                    dest="freetext", help=_("Set the comment."), default='  ')
+                    dest="freetext", help=_("Set the comment."), default='Unknown')
    metadata.add_option("--category", action="store", type="string", \
-                    dest="category", help=_("Set the category"), default='  ')    
+                    dest="category", help=_("Set the category"), default='Unknown')    
    metadata.add_option('--title-sort', action='store', default='', dest='title_sort',
                      help=_('Sort key for the title'))
    metadata.add_option('--author-sort', action='store', default='', dest='author_sort',
--- a/src/calibre/ebooks/lrf/any/convert_from.py
+++ b/src/calibre/ebooks/lrf/any/convert_from.py
@ -71,7 +71,7 @@ def handle_archive(path):
    files = []
    cdir = traverse_subdirs(tdir)
    file = None
-    for ext in ('lit', 'rtf', 'pdf', 'txt', 'epub', 'mobi', 'prc'):
+    for ext in ('lit', 'rtf', 'fb2','pdf', 'txt', 'epub', 'mobi', 'prc'):
        pat = os.path.join(cdir, '*.'+ext)
        files.extend(glob.glob(pat))
    file = largest_file(files)
--- a/src/calibre/ebooks/lrf/fb2/convert_from.py
+++ b/src/calibre/ebooks/lrf/fb2/convert_from.py
@ -7,6 +7,7 @@ import os, sys, tempfile, subprocess, shutil, logging, glob
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.ebooks.lrf import option_parser as lrf_option_parser
 from calibre.ebooks.metadata.meta import get_metadata
 from calibre.ebooks import ConversionError
 from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
 from calibre import setup_cli_handlers, __appname__
@ -59,16 +60,29 @@ def process_file(path, options, logger=None):
        logger = logging.getLogger('fb22lrf')
        setup_cli_handlers(logger, level)
    fb2 = os.path.abspath(os.path.expanduser(path))
    f = open(fb2, 'rb')
    mi = get_metadata(f, 'fb2')
    f.close()
    htmlfile = generate_html(fb2, options.encoding, logger)
    tdir = os.path.dirname(htmlfile)
    cwd = os.getcwdu()
    try:
-	if not options.output:
+        if not options.output:
-	    ext = '.lrs' if options.lrs else '.lrf'
+            ext = '.lrs' if options.lrs else '.lrf'
-	    options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
+            options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
-	options.output = os.path.abspath(os.path.expanduser(options.output))
+        options.output = os.path.abspath(os.path.expanduser(options.output))
        if not mi.title:
            mi.title = os.path.splitext(os.path.basename(rtf))[0]
        if (not options.title or options.title == 'Unknown'):
            options.title = mi.title
        if (not options.author or options.author == 'Unknown') and mi.authors:
            options.author = mi.authors.pop()
        if (not options.category or options.category == 'Unknown') and mi.category:
            options.category = mi.category
        if (not options.freetext or options.freetext == 'Unknown') and mi.comments:
            options.freetext = mi.comments
        os.chdir(tdir)
-	html_process_file(htmlfile, options, logger)
+        html_process_file(htmlfile, options, logger)
    finally:
        os.chdir(cwd)
        if hasattr(options, 'keep_intermediate_files') and options.keep_intermediate_files:
--- a/src/calibre/ebooks/metadata/fb2.py
+++ b/src/calibre/ebooks/metadata/fb2.py
@ -0,0 +1,42 @@
 #!/usr/bin/env python
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
 '''Read meta information from fb2 files'''
 import sys, os
 from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
 from calibre.ebooks.metadata import MetaInformation
 def get_metadata(stream):
    """ Return metadata as a L{MetaInfo} object """
    soup =  BeautifulStoneSoup(stream.read())
    firstname = soup.find("first-name").contents[0]
    lastname = soup.find("last-name").contents[0]
    author= [firstname+" "+lastname]
    title = soup.find("book-title").string
    comments = soup.find("annotation")
    if comments and len(comments) > 1:
            comments = comments.p.contents[0]
    series = soup.find("sequence")
    series_name = series['name']
 #   series_index = series.index
    mi = MetaInformation(title, author)
    mi.comments = comments
    mi.category = series_name
 #   mi.series_index = series_index
    return mi
 def main(args=sys.argv):
    if len(args) != 2 or '--help' in args or '-h' in args:
        print >>sys.stderr, _('Usage:'), args[0], 'mybook.fb2'
        return 1
    path = os.path.abspath(os.path.expanduser(args[1]))
    print unicode(get_metadata(open(path, 'rb')))
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -4,6 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import os, re, collections
 from calibre.ebooks.metadata.rtf  import get_metadata as rtf_metadata
 from calibre.ebooks.metadata.fb2  import get_metadata as fb2_metadata
 from calibre.ebooks.lrf.meta      import get_metadata as lrf_metadata
 from calibre.ebooks.metadata.pdf  import get_metadata as pdf_metadata
 from calibre.ebooks.metadata.lit  import get_metadata as lit_metadata
@ -18,7 +19,7 @@ from calibre.ebooks.metadata import MetaInformation
 _METADATA_PRIORITIES = [
                       'html', 'htm', 'xhtml', 'xhtm',
-                       'rtf', 'pdf', 'prc',
+                       'rtf', 'fb2', 'pdf', 'prc',
                       'epub', 'lit', 'lrf', 'mobi',
                      ]
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -37,6 +37,7 @@ entry_points = {
                             'pdf2lrf   = calibre.ebooks.lrf.pdf.convert_from:main',
                             'mobi2lrf  = calibre.ebooks.lrf.mobi.convert_from:main',
                             'fb22lrf   = calibre.ebooks.lrf.fb2.convert_from:main',
                             'fb2-meta  = calibre.ebooks.metadata.fb2:main',
                             'any2lrf   = calibre.ebooks.lrf.any.convert_from:main',
                             'lrf2lrs   = calibre.ebooks.lrf.parser:main',
                             'lrs2lrf   = calibre.ebooks.lrf.lrs.convert_from:main',
@ -347,7 +348,7 @@ def install_man_pages(fatal_errors):
    for src in entry_points['console_scripts']:
        prog = src[:src.index('=')].strip()
        if prog in ('prs500', 'pdf-meta', 'epub-meta', 'lit-meta', 
-                    'markdown-calibre', 'calibre-debug'):
+                    'markdown-calibre', 'calibre-debug', 'fb2-meta'):
            continue
        help2man = ('help2man', prog, '--name', 'part of %s'%__appname__,
                    '--section', '1', '--no-info', '--include',