Reading of <dc:creator>

This commit is contained in:
Kovid Goyal 2016-06-20 15:24:56 +05:30
parent 95f29656fa
commit 999575bec0
2 changed files with 84 additions and 4 deletions

View File

@ -4,13 +4,13 @@
from __future__ import (unicode_literals, division, absolute_import, from __future__ import (unicode_literals, division, absolute_import,
print_function) print_function)
from collections import defaultdict from collections import defaultdict, namedtuple
from functools import wraps from functools import wraps
import re import re
from lxml import etree from lxml import etree
from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata import check_isbn, authors_to_string
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.utils import parse_opf, pretty_print_opf, ensure_unique, normalize_languages from calibre.ebooks.metadata.utils import parse_opf, pretty_print_opf, ensure_unique, normalize_languages
from calibre.ebooks.oeb.base import OPF2_NSMAP, OPF, DC from calibre.ebooks.oeb.base import OPF2_NSMAP, OPF, DC
@ -73,6 +73,26 @@ def properties_for_id(item_id, refines):
ans[key] = val ans[key] = val
return ans return ans
def properties_for_id_with_scheme(item_id, prefixes, refines):
ans = {}
if item_id:
for elem in refines[item_id]:
key = elem.get('property')
if key:
val = (elem.text or '').strip()
if val:
scheme = elem.get('scheme') or None
scheme_ns = None
if scheme is not None:
p, r = scheme.partition(':')[::2]
if p and r:
ns = prefixes.get(p)
if ns:
scheme_ns = ns
scheme = r
ans[key] = (scheme_ns, scheme, val)
return ans
def ensure_id(root, elem): def ensure_id(root, elem):
eid = elem.get('id') eid = elem.get('id')
if not eid: if not eid:
@ -306,6 +326,41 @@ def set_languages(root, prefixes, refines, languages):
l = metadata.makeelement(DC('language')) l = metadata.makeelement(DC('language'))
l.text = lang l.text = lang
metadata.append(l) metadata.append(l)
# }}}
# Creator/Contributor {{{
Author = namedtuple('Author', 'name sort')
def read_authors(root, prefixes, refines):
roled_authors, unroled_authors = [], []
def author(item, props, val):
aus = None
file_as = props.get('file-as')
if file_as:
aus = file_as[-1]
else:
aus = item.get(OPF('file-as')) or None
return Author(normalize_whitespace(val), normalize_whitespace(aus))
for item in XPath('./opf:metadata/dc:creator')(root):
val = (item.text or '').strip()
if val:
props = properties_for_id_with_scheme(item.get('id'), prefixes, refines)
role = props.get('role')
opf_role = item.get(OPF('role'))
if role:
scheme_ns, scheme, role = role
if role.lower() == 'aut' and (scheme_ns is None or (scheme_ns, scheme) == (reserved_prefixes['marc'], 'relators')):
roled_authors.append(author(item, props, val))
elif opf_role:
if opf_role.lower() == 'aut':
roled_authors.append(author(item, props, val))
else:
unroled_authors.append(author(item, props, val))
return uniq(roled_authors or unroled_authors)
# }}} # }}}
@ -323,7 +378,11 @@ def read_metadata(root):
ans.title = read_title(root, prefixes, refines) or ans.title ans.title = read_title(root, prefixes, refines) or ans.title
ans.title_sort = read_title_sort(root, prefixes, refines) or ans.title_sort ans.title_sort = read_title_sort(root, prefixes, refines) or ans.title_sort
ans.languages = read_languages(root, prefixes, refines) or ans.languages ans.languages = read_languages(root, prefixes, refines) or ans.languages
auts, aus = [], []
for a in read_authors(root, prefixes, refines):
auts.append(a.name), aus.append(a.sort)
ans.authors = auts or ans.authors
ans.author_sort = authors_to_string(aus) or ans.author_sort
return ans return ans
def get_metadata(stream): def get_metadata(stream):

View File

@ -12,7 +12,8 @@ from lxml import etree
from calibre.ebooks.metadata.opf3 import ( from calibre.ebooks.metadata.opf3 import (
parse_prefixes, reserved_prefixes, expand_prefix, read_identifiers, parse_prefixes, reserved_prefixes, expand_prefix, read_identifiers,
read_metadata, set_identifiers, XPath, set_application_id, read_title, read_metadata, set_identifiers, XPath, set_application_id, read_title,
read_refines, set_title, read_title_sort, read_languages, set_languages read_refines, set_title, read_title_sort, read_languages, set_languages,
read_authors, Author
) )
TEMPLATE = '''<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="uid"><metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">{metadata}</metadata></package>''' # noqa TEMPLATE = '''<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="uid"><metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">{metadata}</metadata></package>''' # noqa
@ -102,6 +103,26 @@ class TestOPF3(unittest.TestCase):
self.ae(st(root, []), []) self.ae(st(root, []), [])
# }}} # }}}
def test_authors(self): # {{{
def rl(root):
return read_authors(root, reserved_prefixes, read_refines(root))
def st(root, languages):
set_languages(root, reserved_prefixes, read_refines(root), languages)
return rl(root)
root = self.get_opf('''<dc:creator>a b</dc:creator>''')
self.ae([Author('a b', None)], rl(root))
for scheme in ('scheme="marc:relators"', ''):
root = self.get_opf('''<dc:creator>a b</dc:creator><dc:creator id="1">c d</dc:creator>'''
'''<meta refines="#1" property="role" %s>aut</meta>''' % scheme)
self.ae([Author('c d', None)], rl(root))
root = self.get_opf('''<dc:creator>a b</dc:creator><dc:creator opf:role="aut">c d</dc:creator>''')
self.ae([Author('c d', None)], rl(root))
root = self.get_opf('''<dc:creator opf:file-as="b, a">a b</dc:creator><dc:creator id="1">c d</dc:creator>
<meta refines="#1" property="file-as">d, c</meta>''')
self.ae([Author('a b', 'b, a'), Author('c d', 'd, c')], rl(root))
# }}}
# Run tests {{{ # Run tests {{{
class TestRunner(unittest.main): class TestRunner(unittest.main):