FB2 metadata support

2026-06-06 22:15:22 -04:00 · 2008-05-19 10:13:42 +06:00
parent cbf7c8bf8c
commit 091b570092
15 changed files with 4452 additions and 3545 deletions
@@ -90,9 +90,9 @@ def option_parser(usage, gui_mode=False):
                    dest="author", help=_("Set the author(s). Multiple authors should be set as a comma separated list. Default: %default"), 
                    default=_('Unknown'))
    metadata.add_option("--comment", action="store", type="string", \
-                    dest="freetext", help=_("Set the comment."), default='  ')
+                    dest="freetext", help=_("Set the comment."), default='Unknown')
    metadata.add_option("--category", action="store", type="string", \
-                    dest="category", help=_("Set the category"), default='  ')    
+                    dest="category", help=_("Set the category"), default='Unknown')    
    metadata.add_option('--title-sort', action='store', default='', dest='title_sort',
                      help=_('Sort key for the title'))
    metadata.add_option('--author-sort', action='store', default='', dest='author_sort',
@@ -71,7 +71,7 @@ def handle_archive(path):
    files = []
    cdir = traverse_subdirs(tdir)
    file = None
-    for ext in ('lit', 'rtf', 'pdf', 'txt', 'epub', 'mobi', 'prc'):
+    for ext in ('lit', 'rtf', 'fb2','pdf', 'txt', 'epub', 'mobi', 'prc'):
        pat = os.path.join(cdir, '*.'+ext)
        files.extend(glob.glob(pat))
    file = largest_file(files)
@@ -7,6 +7,7 @@ import os, sys, tempfile, subprocess, shutil, logging, glob

 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.ebooks.lrf import option_parser as lrf_option_parser
+from calibre.ebooks.metadata.meta import get_metadata
 from calibre.ebooks import ConversionError
 from calibre.ebooks.lrf.html.convert_from import process_file as html_process_file
 from calibre import setup_cli_handlers, __appname__
@@ -59,16 +60,29 @@ def process_file(path, options, logger=None):
        logger = logging.getLogger('fb22lrf')
        setup_cli_handlers(logger, level)
    fb2 = os.path.abspath(os.path.expanduser(path))
+    f = open(fb2, 'rb')
+    mi = get_metadata(f, 'fb2')
+    f.close()
    htmlfile = generate_html(fb2, options.encoding, logger)
    tdir = os.path.dirname(htmlfile)
    cwd = os.getcwdu()
    try:
-	if not options.output:
-	    ext = '.lrs' if options.lrs else '.lrf'
-	    options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
-	options.output = os.path.abspath(os.path.expanduser(options.output))
+        if not options.output:
+            ext = '.lrs' if options.lrs else '.lrf'
+            options.output = os.path.abspath(os.path.basename(os.path.splitext(path)[0]) + ext)
+        options.output = os.path.abspath(os.path.expanduser(options.output))
+        if not mi.title:
+            mi.title = os.path.splitext(os.path.basename(rtf))[0]
+        if (not options.title or options.title == 'Unknown'):
+            options.title = mi.title
+        if (not options.author or options.author == 'Unknown') and mi.authors:
+            options.author = mi.authors.pop()
+        if (not options.category or options.category == 'Unknown') and mi.category:
+            options.category = mi.category
+        if (not options.freetext or options.freetext == 'Unknown') and mi.comments:
+            options.freetext = mi.comments
        os.chdir(tdir)
-	html_process_file(htmlfile, options, logger)
+        html_process_file(htmlfile, options, logger)
    finally:
        os.chdir(cwd)
        if hasattr(options, 'keep_intermediate_files') and options.keep_intermediate_files:
@@ -0,0 +1,42 @@
+#!/usr/bin/env python
+from __future__ import with_statement
+__license__   = 'GPL v3'
+__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
+
+'''Read meta information from fb2 files'''
+
+import sys, os
+
+from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
+from calibre.ebooks.metadata import MetaInformation
+
+def get_metadata(stream):
+    """ Return metadata as a L{MetaInfo} object """
+    soup =  BeautifulStoneSoup(stream.read())
+    firstname = soup.find("first-name").contents[0]
+    lastname = soup.find("last-name").contents[0]
+    author= [firstname+" "+lastname]
+    title = soup.find("book-title").string
+    comments = soup.find("annotation")
+    if comments and len(comments) > 1:
+            comments = comments.p.contents[0]
+    series = soup.find("sequence")
+    series_name = series['name']
+ #   series_index = series.index
+    mi = MetaInformation(title, author)
+    mi.comments = comments
+    mi.category = series_name
+ #   mi.series_index = series_index
+    return mi
+
+def main(args=sys.argv):
+    if len(args) != 2 or '--help' in args or '-h' in args:
+        print >>sys.stderr, _('Usage:'), args[0], _('mybook.fb2')
+        return 1
+    
+    path = os.path.abspath(os.path.expanduser(args[1]))
+    print unicode(get_metadata(open(path, 'rb')))
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
@@ -4,6 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import os, re, collections

 from calibre.ebooks.metadata.rtf  import get_metadata as rtf_metadata
+from calibre.ebooks.metadata.fb2  import get_metadata as fb2_metadata
 from calibre.ebooks.lrf.meta      import get_metadata as lrf_metadata
 from calibre.ebooks.metadata.pdf  import get_metadata as pdf_metadata
 from calibre.ebooks.metadata.lit  import get_metadata as lit_metadata
@@ -18,7 +19,7 @@ from calibre.ebooks.metadata import MetaInformation

 _METADATA_PRIORITIES = [
                       'html', 'htm', 'xhtml', 'xhtm',
-                       'rtf', 'pdf', 'prc',
+                       'rtf', 'fb2', 'pdf', 'prc',
                       'epub', 'lit', 'lrf', 'mobi',
                      ]

@@ -26,6 +26,7 @@ TRANSLATIONS = [
                'it',
                'bg',
                'nds',
+                'ru',
                ]

 def source_files():
@@ -38,6 +39,8 @@ def source_files():
                

 def update_po_files(tarball):
+    if not os.getcwd().endswith('translations'):
+        os.chdir('translations')
    tf = tarfile.open(tarball, 'r:gz')
    next = tf.next()
    while next is not None: