Add XML metadata export to calibredb. Fixes #1159 (Feature request for calibredb's list output to have CSV option)

2025-08-30 23:00:21 -04:00 · 2008-10-26 12:19:14 -07:00 · 2008-10-26 12:19:14 -07:00 · 59701a7bb8
commit 59701a7bb8
parent 731b76779b
5 changed files with 98 additions and 8 deletions
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@ -44,7 +44,6 @@ from calibre.ebooks.html import Processor, merge_metadata, get_filelist,\
    opf_traverse, create_metadata, rebase_toc
 from calibre.ebooks.epub import config as common_config
 from calibre.ptempfile import TemporaryDirectory
-from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ebooks.epub import initialize_container, PROFILES
@ -299,8 +298,10 @@ def convert(htmlfile, opts, notification=None):
        if has_title_page:
            opf.create_guide_element()
            opf.add_guide_item('cover', 'Cover', 'content/'+spine[0])
-            with open(opf_path, 'wb') as f:
-                f.write(opf.render())
+        
+        opf.add_path_to_manifest(os.path.join(tdir, 'content', 'resources', '_cover_.jpg'), 'image/jpeg')    
+        with open(opf_path, 'wb') as f:
+            f.write(opf.render())
        epub = initialize_container(opts.output)
        epub.add_dir(tdir)
        if opts.show_opf:
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@ -934,6 +934,11 @@ def merge_metadata(htmlfile, opf, opts):
        if attr in ('authors', 'tags'):
            val = [i.strip() for i in val.split(',') if i.strip()]
        setattr(mi, attr, val)
+        
+    cover = getattr(opts, 'cover', False)
+    if cover and os.path.exists(cover):
+        mi.cover = os.path.abspath(cover)
+        
    if not mi.title:
        mi.title = os.path.splitext(os.path.basename(htmlfile))[0]
    if not mi.authors:
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -418,7 +418,8 @@ class OPF(object):
    tags_path       = XPath('descendant::*[re:match(name(), "subject", "i")]')
    isbn_path       = XPath('descendant::*[re:match(name(), "identifier", "i") and '+
                            '(re:match(@scheme, "isbn", "i") or re:match(@opf:scheme, "isbn", "i"))]')
-    manifest_path   = XPath('descendant::*[re:match(name(), "manifest", "i")]/*[re:match(name(), "item", "i")]') 
+    manifest_path   = XPath('descendant::*[re:match(name(), "manifest", "i")]/*[re:match(name(), "item", "i")]')
+    manifest_ppath  = XPath('descendant::*[re:match(name(), "manifest", "i")]') 
    spine_path      = XPath('descendant::*[re:match(name(), "spine", "i")]/*[re:match(name(), "itemref", "i")]')
    guide_path      = XPath('descendant::*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]')
    
@ -520,6 +521,20 @@ class OPF(object):
        manifest[index:index+1] = items
        return [i.get('id') for i in items]
    
+    def add_path_to_manifest(self, path, media_type):
+        has_path = False
+        path = os.path.abspath(path)
+        for i in self.itermanifest():
+            xpath = os.path.join(self.base_dir, *(i.get('href', '').split('/')))
+            if os.path.abspath(xpath) == path:
+                has_path = True
+                break
+        if not has_path:
+            href = relpath(path, self.base_dir).replace(os.sep, '/')
+            item = self.create_manifest_item(href, media_type)
+            manifest = self.manifest_ppath(self.root)[0]
+            manifest.append(item)
+    
    def iterspine(self):
        return self.spine_path(self.root)
    
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -23,6 +23,41 @@ from calibre.ebooks.metadata.opf import OPFCreator, OPFReader

 FIELDS = set(['title', 'authors', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', 'formats', 'isbn', 'path'])

+XML_TEMPLATE = '''\
+<?xml version="1.0"  encoding="UTF-8"?>
+<calibredb xmlns:py="http://genshi.edgewall.org/">
+<py:for each="record in data">
+    <record>
+        <id>${record['id']}</id>
+        <title>${record['title']}</title>
+        <authors>
+        <py:for each="author in record['authors']">
+            <author>$author</author>
+        </py:for>
+        </authors>
+        <publisher>${record['publisher']}</publisher>
+        <rating>${record['rating']}</rating>
+        <date>${record['timestamp']}</date>
+        <size>${record['size']}</size>
+        <tags py:if="record['tags']">
+        <py:for each="tag in record['tags']">
+            <tag>$tag</tag>
+        </py:for>
+        </tags>
+        <comments>${record['comments']}</comments>
+        <series py:if="record['series']" index="${record['series_index']}">${record['series']}</series>
+        <isbn>${record['isbn']}</isbn>
+        <cover py:if="record['cover']">${record['cover']}</cover>
+        <formats py:if="record['formats']">
+        <py:for each="path in record['formats']">
+            <format>$path</format>
+        </py:for>
+        </formats>
+    </record>
+</py:for> 
+</calibredb>
+'''
+
 def get_parser(usage):
    parser = OptionParser(usage)
    go = parser.add_option_group('GLOBAL OPTIONS')
@ -417,9 +452,45 @@ an opf file). You can get id numbers from the list command.
    do_export(get_db(dbpath, opts), ids, dir, opts.single_dir, opts.by_author)
    return 0

+def do_export_db(db):
+    db.refresh('timestamp', True)
+    data = []
+    for record in db.data:
+        x = {}
+        for field in FIELDS:
+            if field != 'path':
+                x[field] = record[field]
+        data.append(x)
+        x['id'] = record[0]
+        x['formats'] = []
+        x['authors'] = [i.replace('|', ',') for i in x['authors'].split(',')]
+        x['tags'] = [i.replace('|', ',').strip() for i in x['tags'].split(',')] if x['tags'] else []
+        path = os.path.join(db.library_path, db.path(record['id'], index_is_id=True))
+        x['cover'] = os.path.join(path, 'cover.jpg')
+        if not os.path.exists(x['cover']):
+            x['cover'] = None
+        path += os.sep +  db.construct_file_name(record['id']) + '.%s'
+        formats = db.formats(record['id'], index_is_id=True)
+        if formats:
+            for fmt in formats.split(','):
+                x['formats'].append(path%fmt.lower())
+    from calibre.utils.genshi.template import MarkupTemplate
+    template = MarkupTemplate(XML_TEMPLATE)
+    print template.generate(data=data).render('xml')
+
+def command_export_db(args, dbpath):
+    parser = get_parser(_('''\
+%prog export_db [options]
+
+Export the metadata in the database as an XML file. 
+'''))
+    opts, args = parser.parse_args(sys.argv[1:]+args)
+    do_export_db(get_db(dbpath, opts))
+    return 0
+
 def main(args=sys.argv):
    commands = ('list', 'add', 'remove', 'add_format', 'remove_format',
-                'show_metadata', 'set_metadata', 'export')
+                'show_metadata', 'set_metadata', 'export', 'export_db')
    parser = OptionParser(_(
 '''\
 %%prog command [options] [arguments]
--- a/src/calibre/library/database.py
+++ b/src/calibre/library/database.py
@ -1573,9 +1573,7 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
                au = _('Unknown')
            fname = '%s - %s.%s'%(title, au, format.lower())
            fname = sanitize_file_name(fname)
-            f = open(os.path.join(dir, fname), 'r+b')
-            f.seek(0)
-            f.truncate()
+            f = open(os.path.join(dir, fname), 'w+b')
            f.write(data)
            f.seek(0)
            try: