Reading of <dc:creator>

This commit is contained in:
Kovid Goyal 2016-06-20 15:24:56 +05:30
parent 95f29656fa
commit 999575bec0
2 changed files with 84 additions and 4 deletions

View File

@ -4,13 +4,13 @@
from __future__ import (unicode_literals, division, absolute_import,
print_function)
from collections import defaultdict
from collections import defaultdict, namedtuple
from functools import wraps
import re
from lxml import etree
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata import check_isbn, authors_to_string
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.utils import parse_opf, pretty_print_opf, ensure_unique, normalize_languages
from calibre.ebooks.oeb.base import OPF2_NSMAP, OPF, DC
@ -73,6 +73,26 @@ def properties_for_id(item_id, refines):
ans[key] = val
return ans
def properties_for_id_with_scheme(item_id, prefixes, refines):
ans = {}
if item_id:
for elem in refines[item_id]:
key = elem.get('property')
if key:
val = (elem.text or '').strip()
if val:
scheme = elem.get('scheme') or None
scheme_ns = None
if scheme is not None:
p, r = scheme.partition(':')[::2]
if p and r:
ns = prefixes.get(p)
if ns:
scheme_ns = ns
scheme = r
ans[key] = (scheme_ns, scheme, val)
return ans
def ensure_id(root, elem):
eid = elem.get('id')
if not eid:
@ -306,6 +326,41 @@ def set_languages(root, prefixes, refines, languages):
l = metadata.makeelement(DC('language'))
l.text = lang
metadata.append(l)
# }}}
# Creator/Contributor {{{
Author = namedtuple('Author', 'name sort')
def read_authors(root, prefixes, refines):
roled_authors, unroled_authors = [], []
def author(item, props, val):
aus = None
file_as = props.get('file-as')
if file_as:
aus = file_as[-1]
else:
aus = item.get(OPF('file-as')) or None
return Author(normalize_whitespace(val), normalize_whitespace(aus))
for item in XPath('./opf:metadata/dc:creator')(root):
val = (item.text or '').strip()
if val:
props = properties_for_id_with_scheme(item.get('id'), prefixes, refines)
role = props.get('role')
opf_role = item.get(OPF('role'))
if role:
scheme_ns, scheme, role = role
if role.lower() == 'aut' and (scheme_ns is None or (scheme_ns, scheme) == (reserved_prefixes['marc'], 'relators')):
roled_authors.append(author(item, props, val))
elif opf_role:
if opf_role.lower() == 'aut':
roled_authors.append(author(item, props, val))
else:
unroled_authors.append(author(item, props, val))
return uniq(roled_authors or unroled_authors)
# }}}
@ -323,7 +378,11 @@ def read_metadata(root):
ans.title = read_title(root, prefixes, refines) or ans.title
ans.title_sort = read_title_sort(root, prefixes, refines) or ans.title_sort
ans.languages = read_languages(root, prefixes, refines) or ans.languages
auts, aus = [], []
for a in read_authors(root, prefixes, refines):
auts.append(a.name), aus.append(a.sort)
ans.authors = auts or ans.authors
ans.author_sort = authors_to_string(aus) or ans.author_sort
return ans
def get_metadata(stream):

View File

@ -12,7 +12,8 @@ from lxml import etree
from calibre.ebooks.metadata.opf3 import (
parse_prefixes, reserved_prefixes, expand_prefix, read_identifiers,
read_metadata, set_identifiers, XPath, set_application_id, read_title,
read_refines, set_title, read_title_sort, read_languages, set_languages
read_refines, set_title, read_title_sort, read_languages, set_languages,
read_authors, Author
)
TEMPLATE = '''<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="uid"><metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">{metadata}</metadata></package>''' # noqa
@ -102,6 +103,26 @@ class TestOPF3(unittest.TestCase):
self.ae(st(root, []), [])
# }}}
def test_authors(self): # {{{
def rl(root):
return read_authors(root, reserved_prefixes, read_refines(root))
def st(root, languages):
set_languages(root, reserved_prefixes, read_refines(root), languages)
return rl(root)
root = self.get_opf('''<dc:creator>a b</dc:creator>''')
self.ae([Author('a b', None)], rl(root))
for scheme in ('scheme="marc:relators"', ''):
root = self.get_opf('''<dc:creator>a b</dc:creator><dc:creator id="1">c d</dc:creator>'''
'''<meta refines="#1" property="role" %s>aut</meta>''' % scheme)
self.ae([Author('c d', None)], rl(root))
root = self.get_opf('''<dc:creator>a b</dc:creator><dc:creator opf:role="aut">c d</dc:creator>''')
self.ae([Author('c d', None)], rl(root))
root = self.get_opf('''<dc:creator opf:file-as="b, a">a b</dc:creator><dc:creator id="1">c d</dc:creator>
<meta refines="#1" property="file-as">d, c</meta>''')
self.ae([Author('a b', 'b, a'), Author('c d', 'd, c')], rl(root))
# }}}
# Run tests {{{
class TestRunner(unittest.main):