Custom column metadata for PDF

This commit is contained in:
Kovid Goyal 2014-02-17 18:39:08 +05:30
parent 467e57093a
commit 6bec8a9743

View File

@ -6,14 +6,14 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3'
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
import re, sys
import re, sys, copy, json
from itertools import repeat
from collections import defaultdict
from lxml import etree
from lxml.builder import ElementMaker
from calibre import replace_entities
from calibre import prints
from calibre.ebooks.metadata import check_isbn, check_doi
from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import parse_date, isoformat, now
@ -39,6 +39,7 @@ NS_MAP = {
'x': 'adobe:ns:meta/',
'calibre': 'http://calibre-ebook.com/xmp-namespace',
'calibreSI': 'http://calibre-ebook.com/xmp-namespace-series-index',
'calibreCC': 'http://calibre-ebook.com/xmp-namespace-custom-columns',
}
KNOWN_ID_SCHEMES = {'isbn', 'url', 'doi'}
@ -80,8 +81,8 @@ def serialize_xmp_packet(root, encoding='utf-8'):
def read_simple_property(elem):
# A simple property
if elem.text:
return replace_entities(elem.text)
return replace_entities(elem.get(expand('rdf:resource'), ''))
return elem.text
return elem.get(expand('rdf:resource'), '')
def read_lang_alt(parent):
# A text value with possible alternate values in different languages
@ -153,6 +154,28 @@ def read_series(root):
return series, series_index
return None, None
def read_user_metadata(mi, root):
from calibre.utils.config import from_json
from calibre.ebooks.metadata.book.json_codec import decode_is_multiple
fields = set()
for item in XPath('//calibre:custom_metadata')(root):
for li in XPath('./rdf:Bag/rdf:li')(item):
name = XPath('descendant::calibreCC:name')(li)
if name:
name = name[0].text
if name.startswith('#') and name not in fields:
val = XPath('descendant::rdf:value')(li)
if val:
fm = val[0].text
try:
fm = json.loads(fm, object_hook=from_json)
decode_is_multiple(fm)
mi.set_user_metadata(name, fm)
fields.add(name)
except:
prints('Failed to read user metadata:', name)
import traceback
traceback.print_exc()
def read_xmp_identifers(parent):
''' For example:
@ -254,6 +277,8 @@ def metadata_from_xmp_packet(raw_bytes):
if identifiers:
mi.set_identifiers(identifiers)
read_user_metadata(mi, root)
return mi
def consolidate_metadata(info_mi, xmp_packet):
@ -326,6 +351,35 @@ def create_series(calibre, series, series_index):
si.text = '%.2f' % series_index
s.append(si)
def create_user_metadata(calibre, all_user_metadata):
from calibre.utils.config import to_json
from calibre.ebooks.metadata.book.json_codec import object_to_unicode, encode_is_multiple
s = calibre.makeelement(expand('calibre:custom_metadata'))
calibre.append(s)
bag = s.makeelement(expand('rdf:Bag'))
s.append(bag)
for name, fm in all_user_metadata.iteritems():
try:
fm = copy.copy(fm)
encode_is_multiple(fm)
fm = object_to_unicode(fm)
fm = json.dumps(fm, default=to_json, ensure_ascii=False)
except:
prints('Failed to write user metadata:', name)
import traceback
traceback.print_exc()
continue
li = bag.makeelement(expand('rdf:li'))
li.set(expand('rdf:parseType'), 'Resource')
bag.append(li)
n = li.makeelement(expand('calibreCC:name'))
li.append(n)
n.text = name
val = li.makeelement(expand('rdf:value'))
val.text = fm
li.append(val)
def metadata_to_xmp_packet(mi):
A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x'))
R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf'))
@ -374,7 +428,7 @@ def metadata_to_xmp_packet(mi):
d.text = isoformat(now(), as_utc=False)
xmp.append(d)
calibre = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('calibre', 'calibreSI'))
calibre = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('calibre', 'calibreSI', 'calibreCC'))
calibre.set(expand('rdf:about'), '')
rdf.append(calibre)
if not mi.is_null('rating'):
@ -389,6 +443,10 @@ def metadata_to_xmp_packet(mi):
for x in ('title_sort', 'author_sort'):
if not mi.is_null(x):
create_simple_property(calibre, 'calibre:'+x, getattr(mi, x))
all_user_metadata = mi.get_all_user_metadata(True)
if all_user_metadata:
create_user_metadata(calibre, all_user_metadata)
return serialize_xmp_packet(root)
def find_used_namespaces(elem):