Custom column metadata for PDF

This commit is contained in:
Kovid Goyal 2014-02-17 18:39:08 +05:30
parent 467e57093a
commit 6bec8a9743

View File

@ -6,14 +6,14 @@ from __future__ import (unicode_literals, division, absolute_import,
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>' __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
import re, sys import re, sys, copy, json
from itertools import repeat from itertools import repeat
from collections import defaultdict from collections import defaultdict
from lxml import etree from lxml import etree
from lxml.builder import ElementMaker from lxml.builder import ElementMaker
from calibre import replace_entities from calibre import prints
from calibre.ebooks.metadata import check_isbn, check_doi from calibre.ebooks.metadata import check_isbn, check_doi
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import parse_date, isoformat, now from calibre.utils.date import parse_date, isoformat, now
@ -39,6 +39,7 @@ NS_MAP = {
'x': 'adobe:ns:meta/', 'x': 'adobe:ns:meta/',
'calibre': 'http://calibre-ebook.com/xmp-namespace', 'calibre': 'http://calibre-ebook.com/xmp-namespace',
'calibreSI': 'http://calibre-ebook.com/xmp-namespace-series-index', 'calibreSI': 'http://calibre-ebook.com/xmp-namespace-series-index',
'calibreCC': 'http://calibre-ebook.com/xmp-namespace-custom-columns',
} }
KNOWN_ID_SCHEMES = {'isbn', 'url', 'doi'} KNOWN_ID_SCHEMES = {'isbn', 'url', 'doi'}
@ -80,8 +81,8 @@ def serialize_xmp_packet(root, encoding='utf-8'):
def read_simple_property(elem): def read_simple_property(elem):
# A simple property # A simple property
if elem.text: if elem.text:
return replace_entities(elem.text) return elem.text
return replace_entities(elem.get(expand('rdf:resource'), '')) return elem.get(expand('rdf:resource'), '')
def read_lang_alt(parent): def read_lang_alt(parent):
# A text value with possible alternate values in different languages # A text value with possible alternate values in different languages
@ -153,6 +154,28 @@ def read_series(root):
return series, series_index return series, series_index
return None, None return None, None
def read_user_metadata(mi, root):
from calibre.utils.config import from_json
from calibre.ebooks.metadata.book.json_codec import decode_is_multiple
fields = set()
for item in XPath('//calibre:custom_metadata')(root):
for li in XPath('./rdf:Bag/rdf:li')(item):
name = XPath('descendant::calibreCC:name')(li)
if name:
name = name[0].text
if name.startswith('#') and name not in fields:
val = XPath('descendant::rdf:value')(li)
if val:
fm = val[0].text
try:
fm = json.loads(fm, object_hook=from_json)
decode_is_multiple(fm)
mi.set_user_metadata(name, fm)
fields.add(name)
except:
prints('Failed to read user metadata:', name)
import traceback
traceback.print_exc()
def read_xmp_identifers(parent): def read_xmp_identifers(parent):
''' For example: ''' For example:
@ -254,6 +277,8 @@ def metadata_from_xmp_packet(raw_bytes):
if identifiers: if identifiers:
mi.set_identifiers(identifiers) mi.set_identifiers(identifiers)
read_user_metadata(mi, root)
return mi return mi
def consolidate_metadata(info_mi, xmp_packet): def consolidate_metadata(info_mi, xmp_packet):
@ -326,6 +351,35 @@ def create_series(calibre, series, series_index):
si.text = '%.2f' % series_index si.text = '%.2f' % series_index
s.append(si) s.append(si)
def create_user_metadata(calibre, all_user_metadata):
from calibre.utils.config import to_json
from calibre.ebooks.metadata.book.json_codec import object_to_unicode, encode_is_multiple
s = calibre.makeelement(expand('calibre:custom_metadata'))
calibre.append(s)
bag = s.makeelement(expand('rdf:Bag'))
s.append(bag)
for name, fm in all_user_metadata.iteritems():
try:
fm = copy.copy(fm)
encode_is_multiple(fm)
fm = object_to_unicode(fm)
fm = json.dumps(fm, default=to_json, ensure_ascii=False)
except:
prints('Failed to write user metadata:', name)
import traceback
traceback.print_exc()
continue
li = bag.makeelement(expand('rdf:li'))
li.set(expand('rdf:parseType'), 'Resource')
bag.append(li)
n = li.makeelement(expand('calibreCC:name'))
li.append(n)
n.text = name
val = li.makeelement(expand('rdf:value'))
val.text = fm
li.append(val)
def metadata_to_xmp_packet(mi): def metadata_to_xmp_packet(mi):
A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x')) A = ElementMaker(namespace=NS_MAP['x'], nsmap=nsmap('x'))
R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf')) R = ElementMaker(namespace=NS_MAP['rdf'], nsmap=nsmap('rdf'))
@ -374,7 +428,7 @@ def metadata_to_xmp_packet(mi):
d.text = isoformat(now(), as_utc=False) d.text = isoformat(now(), as_utc=False)
xmp.append(d) xmp.append(d)
calibre = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('calibre', 'calibreSI')) calibre = rdf.makeelement(expand('rdf:Description'), nsmap=nsmap('calibre', 'calibreSI', 'calibreCC'))
calibre.set(expand('rdf:about'), '') calibre.set(expand('rdf:about'), '')
rdf.append(calibre) rdf.append(calibre)
if not mi.is_null('rating'): if not mi.is_null('rating'):
@ -389,6 +443,10 @@ def metadata_to_xmp_packet(mi):
for x in ('title_sort', 'author_sort'): for x in ('title_sort', 'author_sort'):
if not mi.is_null(x): if not mi.is_null(x):
create_simple_property(calibre, 'calibre:'+x, getattr(mi, x)) create_simple_property(calibre, 'calibre:'+x, getattr(mi, x))
all_user_metadata = mi.get_all_user_metadata(True)
if all_user_metadata:
create_user_metadata(calibre, all_user_metadata)
return serialize_xmp_packet(root) return serialize_xmp_packet(root)
def find_used_namespaces(elem): def find_used_namespaces(elem):