Add XML metadata export to calibredb. Fixes #1159 (Feature request for calibredb's list output to have CSV option)

This commit is contained in:
Kovid Goyal 2008-10-26 12:19:14 -07:00
parent 731b76779b
commit 59701a7bb8
5 changed files with 98 additions and 8 deletions

View File

@ -44,7 +44,6 @@ from calibre.ebooks.html import Processor, merge_metadata, get_filelist,\
opf_traverse, create_metadata, rebase_toc opf_traverse, create_metadata, rebase_toc
from calibre.ebooks.epub import config as common_config from calibre.ebooks.epub import config as common_config
from calibre.ptempfile import TemporaryDirectory from calibre.ptempfile import TemporaryDirectory
from calibre.ebooks.metadata import MetaInformation
from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata.opf2 import OPF from calibre.ebooks.metadata.opf2 import OPF
from calibre.ebooks.epub import initialize_container, PROFILES from calibre.ebooks.epub import initialize_container, PROFILES
@ -299,8 +298,10 @@ def convert(htmlfile, opts, notification=None):
if has_title_page: if has_title_page:
opf.create_guide_element() opf.create_guide_element()
opf.add_guide_item('cover', 'Cover', 'content/'+spine[0]) opf.add_guide_item('cover', 'Cover', 'content/'+spine[0])
with open(opf_path, 'wb') as f:
f.write(opf.render()) opf.add_path_to_manifest(os.path.join(tdir, 'content', 'resources', '_cover_.jpg'), 'image/jpeg')
with open(opf_path, 'wb') as f:
f.write(opf.render())
epub = initialize_container(opts.output) epub = initialize_container(opts.output)
epub.add_dir(tdir) epub.add_dir(tdir)
if opts.show_opf: if opts.show_opf:

View File

@ -934,6 +934,11 @@ def merge_metadata(htmlfile, opf, opts):
if attr in ('authors', 'tags'): if attr in ('authors', 'tags'):
val = [i.strip() for i in val.split(',') if i.strip()] val = [i.strip() for i in val.split(',') if i.strip()]
setattr(mi, attr, val) setattr(mi, attr, val)
cover = getattr(opts, 'cover', False)
if cover and os.path.exists(cover):
mi.cover = os.path.abspath(cover)
if not mi.title: if not mi.title:
mi.title = os.path.splitext(os.path.basename(htmlfile))[0] mi.title = os.path.splitext(os.path.basename(htmlfile))[0]
if not mi.authors: if not mi.authors:

View File

@ -418,7 +418,8 @@ class OPF(object):
tags_path = XPath('descendant::*[re:match(name(), "subject", "i")]') tags_path = XPath('descendant::*[re:match(name(), "subject", "i")]')
isbn_path = XPath('descendant::*[re:match(name(), "identifier", "i") and '+ isbn_path = XPath('descendant::*[re:match(name(), "identifier", "i") and '+
'(re:match(@scheme, "isbn", "i") or re:match(@opf:scheme, "isbn", "i"))]') '(re:match(@scheme, "isbn", "i") or re:match(@opf:scheme, "isbn", "i"))]')
manifest_path = XPath('descendant::*[re:match(name(), "manifest", "i")]/*[re:match(name(), "item", "i")]') manifest_path = XPath('descendant::*[re:match(name(), "manifest", "i")]/*[re:match(name(), "item", "i")]')
manifest_ppath = XPath('descendant::*[re:match(name(), "manifest", "i")]')
spine_path = XPath('descendant::*[re:match(name(), "spine", "i")]/*[re:match(name(), "itemref", "i")]') spine_path = XPath('descendant::*[re:match(name(), "spine", "i")]/*[re:match(name(), "itemref", "i")]')
guide_path = XPath('descendant::*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]') guide_path = XPath('descendant::*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]')
@ -520,6 +521,20 @@ class OPF(object):
manifest[index:index+1] = items manifest[index:index+1] = items
return [i.get('id') for i in items] return [i.get('id') for i in items]
def add_path_to_manifest(self, path, media_type):
has_path = False
path = os.path.abspath(path)
for i in self.itermanifest():
xpath = os.path.join(self.base_dir, *(i.get('href', '').split('/')))
if os.path.abspath(xpath) == path:
has_path = True
break
if not has_path:
href = relpath(path, self.base_dir).replace(os.sep, '/')
item = self.create_manifest_item(href, media_type)
manifest = self.manifest_ppath(self.root)[0]
manifest.append(item)
def iterspine(self): def iterspine(self):
return self.spine_path(self.root) return self.spine_path(self.root)

View File

@ -23,6 +23,41 @@ from calibre.ebooks.metadata.opf import OPFCreator, OPFReader
FIELDS = set(['title', 'authors', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', 'formats', 'isbn', 'path']) FIELDS = set(['title', 'authors', 'publisher', 'rating', 'timestamp', 'size', 'tags', 'comments', 'series', 'series_index', 'formats', 'isbn', 'path'])
XML_TEMPLATE = '''\
<?xml version="1.0" encoding="UTF-8"?>
<calibredb xmlns:py="http://genshi.edgewall.org/">
<py:for each="record in data">
<record>
<id>${record['id']}</id>
<title>${record['title']}</title>
<authors>
<py:for each="author in record['authors']">
<author>$author</author>
</py:for>
</authors>
<publisher>${record['publisher']}</publisher>
<rating>${record['rating']}</rating>
<date>${record['timestamp']}</date>
<size>${record['size']}</size>
<tags py:if="record['tags']">
<py:for each="tag in record['tags']">
<tag>$tag</tag>
</py:for>
</tags>
<comments>${record['comments']}</comments>
<series py:if="record['series']" index="${record['series_index']}">${record['series']}</series>
<isbn>${record['isbn']}</isbn>
<cover py:if="record['cover']">${record['cover']}</cover>
<formats py:if="record['formats']">
<py:for each="path in record['formats']">
<format>$path</format>
</py:for>
</formats>
</record>
</py:for>
</calibredb>
'''
def get_parser(usage): def get_parser(usage):
parser = OptionParser(usage) parser = OptionParser(usage)
go = parser.add_option_group('GLOBAL OPTIONS') go = parser.add_option_group('GLOBAL OPTIONS')
@ -417,9 +452,45 @@ an opf file). You can get id numbers from the list command.
do_export(get_db(dbpath, opts), ids, dir, opts.single_dir, opts.by_author) do_export(get_db(dbpath, opts), ids, dir, opts.single_dir, opts.by_author)
return 0 return 0
def do_export_db(db):
db.refresh('timestamp', True)
data = []
for record in db.data:
x = {}
for field in FIELDS:
if field != 'path':
x[field] = record[field]
data.append(x)
x['id'] = record[0]
x['formats'] = []
x['authors'] = [i.replace('|', ',') for i in x['authors'].split(',')]
x['tags'] = [i.replace('|', ',').strip() for i in x['tags'].split(',')] if x['tags'] else []
path = os.path.join(db.library_path, db.path(record['id'], index_is_id=True))
x['cover'] = os.path.join(path, 'cover.jpg')
if not os.path.exists(x['cover']):
x['cover'] = None
path += os.sep + db.construct_file_name(record['id']) + '.%s'
formats = db.formats(record['id'], index_is_id=True)
if formats:
for fmt in formats.split(','):
x['formats'].append(path%fmt.lower())
from calibre.utils.genshi.template import MarkupTemplate
template = MarkupTemplate(XML_TEMPLATE)
print template.generate(data=data).render('xml')
def command_export_db(args, dbpath):
parser = get_parser(_('''\
%prog export_db [options]
Export the metadata in the database as an XML file.
'''))
opts, args = parser.parse_args(sys.argv[1:]+args)
do_export_db(get_db(dbpath, opts))
return 0
def main(args=sys.argv): def main(args=sys.argv):
commands = ('list', 'add', 'remove', 'add_format', 'remove_format', commands = ('list', 'add', 'remove', 'add_format', 'remove_format',
'show_metadata', 'set_metadata', 'export') 'show_metadata', 'set_metadata', 'export', 'export_db')
parser = OptionParser(_( parser = OptionParser(_(
'''\ '''\
%%prog command [options] [arguments] %%prog command [options] [arguments]

View File

@ -1573,9 +1573,7 @@ ALTER TABLE books ADD COLUMN isbn TEXT DEFAULT "" COLLATE NOCASE;
au = _('Unknown') au = _('Unknown')
fname = '%s - %s.%s'%(title, au, format.lower()) fname = '%s - %s.%s'%(title, au, format.lower())
fname = sanitize_file_name(fname) fname = sanitize_file_name(fname)
f = open(os.path.join(dir, fname), 'r+b') f = open(os.path.join(dir, fname), 'w+b')
f.seek(0)
f.truncate()
f.write(data) f.write(data)
f.seek(0) f.seek(0)
try: try: