Add XML metadata export to calibredb. Fixes #1159 (Feature request for calibredb's list output to have CSV option)

2025-07-09 03:04:10 -04:00 · 2008-10-26 12:19:14 -07:00 · 2008-10-26 12:19:14 -07:00 · 59701a7bb8
commit 59701a7bb8
parent 731b76779b
5 changed files with 98 additions and 8 deletions
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@ -44,7 +44,6 @@ from calibre.ebooks.html import Processor, merge_metadata, get_filelist,\
    opf_traverse, create_metadata, rebase_toc
 from calibre.ebooks.epub import config as common_config
 from calibre.ptempfile import TemporaryDirectory
 from calibre.ebooks.metadata import MetaInformation
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ebooks.epub import initialize_container, PROFILES
@ -299,8 +298,10 @@ def convert(htmlfile, opts, notification=None):
        if has_title_page:
            opf.create_guide_element()
            opf.add_guide_item('cover', 'Cover', 'content/'+spine[0])
-            with open(opf_path, 'wb') as f:
+        
-                f.write(opf.render())
+        opf.add_path_to_manifest(os.path.join(tdir, 'content', 'resources', '_cover_.jpg'), 'image/jpeg')    
        with open(opf_path, 'wb') as f:
            f.write(opf.render())
        epub = initialize_container(opts.output)
        epub.add_dir(tdir)
        if opts.show_opf:
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@ -934,6 +934,11 @@ def merge_metadata(htmlfile, opf, opts):
        if attr in ('authors', 'tags'):
            val = [i.strip() for i in val.split(',') if i.strip()]
        setattr(mi, attr, val)
    cover = getattr(opts, 'cover', False)
    if cover and os.path.exists(cover):
        mi.cover = os.path.abspath(cover)
    if not mi.title:
        mi.title = os.path.splitext(os.path.basename(htmlfile))[0]
    if not mi.authors:
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -418,7 +418,8 @@ class OPF(object):
    tags_path       = XPath('descendant::*[re:match(name(), "subject", "i")]')
    isbn_path       = XPath('descendant::*[re:match(name(), "identifier", "i") and '+
                            '(re:match(@scheme, "isbn", "i") or re:match(@opf:scheme, "isbn", "i"))]')
-    manifest_path   = XPath('descendant::*[re:match(name(), "manifest", "i")]/*[re:match(name(), "item", "i")]') 
+    manifest_path   = XPath('descendant::*[re:match(name(), "manifest", "i")]/*[re:match(name(), "item", "i")]')
    manifest_ppath  = XPath('descendant::*[re:match(name(), "manifest", "i")]') 
    spine_path      = XPath('descendant::*[re:match(name(), "spine", "i")]/*[re:match(name(), "itemref", "i")]')
    guide_path      = XPath('descendant::*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]')
@ -520,6 +521,20 @@ class OPF(object):
        manifest[index:index+1] = items
        return [i.get('id') for i in items]
    def add_path_to_manifest(self, path, media_type):
        has_path = False
        path = os.path.abspath(path)
        for i in self.itermanifest():
            xpath = os.path.join(self.base_dir, *(i.get('href', '').split('/')))
            if os.path.abspath(xpath) == path:
                has_path = True
                break
        if not has_path:
            href = relpath(path, self.base_dir).replace(os.sep, '/')
            item = self.create_manifest_item(href, media_type)
            manifest = self.manifest_ppath(self.root)[0]
            manifest.append(item)
    def iterspine(self):
        return self.spine_path(self.root)
--- a/src/calibre/library/cli.py
+++ b/src/calibre/library/cli.py
@ -23,6 +23,41 @@ from calibre.ebooks.metadata.opf import OPFCreator, OPFReader
 FIELDS = set(['title', 'authors', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', 'formats', 'isbn', 'path'])
 XML_TEMPLATE = '''\
 <?xml version="1.0"  encoding="UTF-8"?>
 <calibredb xmlns:py="http://genshi.edgewall.org/">
 <py:for each="record in data">
    <record>
        <id>${record['id']}</id>
        <title>${record['title']}</title>
        <authors>
        <py:for each="author in record['authors']">
            <author>$author</author>
        </py:for>
        </authors>
        <publisher>${record['publisher']}</publisher>
        <rating>${record['rating']}</rating>
        <date>${record['timestamp']}</date>
        <size>${record['size']}</size>
        <tags py:if="record['tags']">
        <py:for each="tag in record['tags']">
            <tag>$tag</tag>
        </py:for>
        </tags>
        <comments>${record['comments']}</comments>
        <series py:if="record['series']" index="${record['series_index']}">${record['series']}</series>
        <isbn>${record['isbn']}</isbn>
        <cover py:if="record['cover']">${record['cover']}</cover>
        <formats py:if="record['formats']">
        <py:for each="path in record['formats']">
            <format>$path</format>
        </py:for>
        </formats>
    </record>
 </py:for> 
 </calibredb>
 '''
 def get_parser(usage):
    parser = OptionParser(usage)
    go = parser.add_option_group('GLOBAL OPTIONS')
@ -417,9 +452,45 @@ an opf file). You can get id numbers from the list command.
    do_export(get_db(dbpath, opts), ids, dir, opts.single_dir, opts.by_author)
    return 0
 def do_export_db(db):
    db.refresh('timestamp', True)
    data = []
    for record in db.data:
        x = {}
        for field in FIELDS:
            if field != 'path':
                x[field] = record[field]
        data.append(x)
        x['id'] = record[0]
        x['formats'] = []
        x['authors'] = [i.replace('|', ',') for i in x['authors'].split(',')]
        x['tags'] = [i.replace('|', ',').strip() for i in x['tags'].split(',')] if x['tags'] else []
        path = os.path.join(db.library_path, db.path(record['id'], index_is_id=True))
        x['cover'] = os.path.join(path, 'cover.jpg')
        if not os.path.exists(x['cover']):
            x['cover'] = None
        path += os.sep +  db.construct_file_name(record['id']) + '.%s'
        formats = db.formats(record['id'], index_is_id=True)
        if formats:
            for fmt in formats.split(','):
                x['formats'].append(path%fmt.lower())
    from calibre.utils.genshi.template import MarkupTemplate
    template = MarkupTemplate(XML_TEMPLATE)
    print template.generate(data=data).render('xml')
 def command_export_db(args, dbpath):
    parser = get_parser(_('''\
 %prog export_db [options]
 Export the metadata in the database as an XML file. 
 '''))
    opts, args = parser.parse_args(sys.argv[1:]+args)
    do_export_db(get_db(dbpath, opts))
    return 0
 def main(args=sys.argv):
    commands = ('list', 'add', 'remove', 'add_format', 'remove_format',
-                'show_metadata', 'set_metadata', 'export')
+                'show_metadata', 'set_metadata', 'export', 'export_db')
    parser = OptionParser(_(
 '''\
 %%prog command [options] [arguments]
--- a/src/calibre/library/database.py
+++ b/src/calibre/library/database.py
@ -1573,9 +1573,7 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
                au = _('Unknown')
            fname = '%s - %s.%s'%(title, au, format.lower())
            fname = sanitize_file_name(fname)
-            f = open(os.path.join(dir, fname), 'r+b')
+            f = open(os.path.join(dir, fname), 'w+b')
            f.seek(0)
            f.truncate()
            f.write(data)
            f.seek(0)
            try: