Implement title and title_sort

This commit is contained in:
Kovid Goyal 2016-06-18 17:34:07 +05:30
parent b139e252d6
commit d6754fe4d8
2 changed files with 116 additions and 6 deletions

View File

@ -11,7 +11,7 @@ from lxml import etree
from calibre.ebooks.metadata import check_isbn from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.utils import parse_opf, pretty_print_opf from calibre.ebooks.metadata.utils import parse_opf, pretty_print_opf, ensure_unique
from calibre.ebooks.oeb.base import OPF2_NSMAP, OPF, DC from calibre.ebooks.oeb.base import OPF2_NSMAP, OPF, DC
# Utils {{{ # Utils {{{
@ -44,11 +44,32 @@ def regex(r, flags=0):
_re_cache[(r, flags)] = ans = re.compile(r, flags) _re_cache[(r, flags)] = ans = re.compile(r, flags)
return ans return ans
def remove_element(e, refines): def remove_refines(e, refines):
e.getparent().remove(e)
for x in refines[e.get('id')]: for x in refines[e.get('id')]:
x.getparent().remove(x) x.getparent().remove(x)
refines.pop(e.get('id'), None) refines.pop(e.get('id'), None)
def remove_element(e, refines):
remove_refines(e, refines)
e.getparent().remove(e)
def properties_for_id(item_id, refines):
ans = {}
if item_id:
for elem in refines[item_id]:
key = elem.get('property')
if key:
val = (elem.text or '').strip()
if val:
ans[key] = val
return ans
def ensure_id(root, elem):
eid = elem.get('id')
if not eid:
eid = ensure_unique('id', frozenset(XPath('//*/@id')(root)))
elem.set('id', eid)
return eid
# }}} # }}}
# Prefixes {{{ # Prefixes {{{
@ -73,6 +94,22 @@ def read_refines(root):
if r.startswith('#'): if r.startswith('#'):
ans[r[1:]].append(meta) ans[r[1:]].append(meta)
return ans return ans
def refdef(prop, val, scheme=None):
return (prop, val, scheme)
def set_refines(elem, existing_refines, *new_refines):
eid = ensure_id(elem.getroottree().getroot(), elem)
remove_refines(elem, existing_refines)
for ref in reversed(new_refines):
prop, val, scheme = ref
r = elem.makeelement(OPF('meta'))
r.set('refines', '#' + eid), r.set('property', prop)
r.text = val.strip()
if scheme:
r.set('scheme', scheme)
p = elem.getparent()
p.insert(p.index(elem)+1, r)
# }}} # }}}
# Identifiers {{{ # Identifiers {{{
@ -173,6 +210,53 @@ def set_application_id(root, refines, new_application_id=None):
# }}} # }}}
# Title {{{
def find_main_title(root, refines, remove_blanks=False):
first_title = main_title = None
for title in XPath('./opf:metadata/dc:title')(root):
if not title.text or not title.text.strip():
if remove_blanks:
remove_element(title, refines)
continue
if first_title is None:
first_title = title
props = properties_for_id(title.get('id'), refines)
if props.get('title-type') == 'main':
main_title = title
break
else:
main_title = first_title
return main_title
def read_title(root, prefixes, refines):
main_title = find_main_title(root, refines)
return None if main_title is None else main_title.text.strip()
def read_title_sort(root, prefixes, refines):
main_title = find_main_title(root, refines)
if main_title is not None:
fa = properties_for_id(main_title.get('id'), refines).get('file-as')
if fa:
return fa
# Look for OPF 2.0 style title_sort
for m in XPath('./opf:metadata/opf:meta[@name="calibre:title_sort"]')(root):
ans = m.get('content')
if ans:
return ans
def set_title(root, prefixes, refines, title, title_sort=None):
main_title = find_main_title(root, refines, remove_blanks=True)
if main_title is None:
m = XPath('./opf:metadata')(root)[0]
main_title = m.makeelement('dc:title')
m.insert(0, main_title)
main_title.text = title or None
ts = [refdef('file-as', title_sort)] if title_sort else ()
set_refines(main_title, refines, refdef('title-type', 'main'), *ts)
# }}}
def read_metadata(root): def read_metadata(root):
ans = Metadata(_('Unknown'), [_('Unknown')]) ans = Metadata(_('Unknown'), [_('Unknown')])
prefixes, refines = read_prefixes(root), read_refines(root) prefixes, refines = read_prefixes(root), read_refines(root)
@ -184,6 +268,8 @@ def read_metadata(root):
elif key != 'uuid': elif key != 'uuid':
ids[key] = vals[0] ids[key] = vals[0]
ans.set_identifiers(ids) ans.set_identifiers(ids)
ans.title = read_title(root, prefixes, refines) or ans.title
ans.title_sort = read_title_sort(root, prefixes, refines) or ans.title_sort
return ans return ans
@ -194,6 +280,8 @@ def get_metadata(stream):
def apply_metadata(root, mi, cover_prefix='', cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False): def apply_metadata(root, mi, cover_prefix='', cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False):
prefixes, refines = read_prefixes(root), read_refines(root) prefixes, refines = read_prefixes(root), read_refines(root)
set_identifiers(root, prefixes, refines, mi.identifiers, force_identifiers=force_identifiers) set_identifiers(root, prefixes, refines, mi.identifiers, force_identifiers=force_identifiers)
set_title(root, prefixes, refines, mi.title, mi.title_sort)
pretty_print_opf(root) pretty_print_opf(root)
def set_metadata(stream, mi, cover_prefix='', cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False, add_missing_cover=True): def set_metadata(stream, mi, cover_prefix='', cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False, add_missing_cover=True):

View File

@ -11,7 +11,8 @@ from lxml import etree
from calibre.ebooks.metadata.opf3 import ( from calibre.ebooks.metadata.opf3 import (
parse_prefixes, reserved_prefixes, expand_prefix, read_identifiers, parse_prefixes, reserved_prefixes, expand_prefix, read_identifiers,
read_metadata, set_identifiers, XPath, set_application_id read_metadata, set_identifiers, XPath, set_application_id, read_title,
read_refines, set_title, read_title_sort
) )
TEMPLATE = '''<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="uid"><metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">{metadata}</metadata></package>''' # noqa TEMPLATE = '''<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="uid"><metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">{metadata}</metadata></package>''' # noqa
@ -24,7 +25,7 @@ class TestOPF3(unittest.TestCase):
def get_opf(self, metadata=''): def get_opf(self, metadata=''):
return etree.fromstring(TEMPLATE.format(metadata=metadata)) return etree.fromstring(TEMPLATE.format(metadata=metadata))
def test_prefix_parsing(self): def test_prefix_parsing(self): # {{{
self.ae(parse_prefixes('foaf: http://xmlns.com/foaf/spec/\n dbp: http://dbpedia.org/ontology/'), self.ae(parse_prefixes('foaf: http://xmlns.com/foaf/spec/\n dbp: http://dbpedia.org/ontology/'),
{'foaf':'http://xmlns.com/foaf/spec/', 'dbp': 'http://dbpedia.org/ontology/'}) {'foaf':'http://xmlns.com/foaf/spec/', 'dbp': 'http://dbpedia.org/ontology/'})
for raw, expanded in ( for raw, expanded in (
@ -33,8 +34,9 @@ class TestOPF3(unittest.TestCase):
('xxx', 'xxx'), ('xxx', 'xxx'),
): ):
self.ae(expand_prefix(raw, reserved_prefixes), expanded) self.ae(expand_prefix(raw, reserved_prefixes), expanded)
# }}}
def test_identifiers(self): def test_identifiers(self): # {{{
def idt(val, scheme=None, iid=''): def idt(val, scheme=None, iid=''):
return '<dc:identifier id="{id}" {scheme}>{val}</dc:identifier>'.format(scheme=('opf:scheme="%s"'%scheme if scheme else ''), val=val, id=iid) return '<dc:identifier id="{id}" {scheme}>{val}</dc:identifier>'.format(scheme=('opf:scheme="%s"'%scheme if scheme else ''), val=val, id=iid)
def ri(root): def ri(root):
@ -69,6 +71,25 @@ class TestOPF3(unittest.TestCase):
set_application_id(root, default_refines, 'y') set_application_id(root, default_refines, 'y')
mi = read_metadata(root) mi = read_metadata(root)
self.ae(mi.application_id, 'y') self.ae(mi.application_id, 'y')
# }}}
def test_title(self): # {{{
def rt(root):
return read_title(root, reserved_prefixes, read_refines(root))
def st(root, title, title_sort=None):
set_title(root, reserved_prefixes, read_refines(root), title, title_sort)
return rt(root)
root = self.get_opf('''<dc:title/><dc:title id='t'>xxx</dc:title>''')
self.ae(rt(root), 'xxx')
self.ae(st(root, 'abc', 'cba'), 'abc')
self.ae(read_title_sort(root, reserved_prefixes, read_refines(root)), 'cba')
root = self.get_opf('''<dc:title>yyy</dc:title><dc:title id='t'>xxx
</dc:title><meta refines='#t' property='title-type'>main</meta><meta name="calibre:title_sort" content="sorted"/>''')
self.ae(read_title_sort(root, reserved_prefixes, read_refines(root)), 'sorted')
self.ae(st(root, 'abc'), 'abc')
# }}}
# Run tests {{{
class TestRunner(unittest.main): class TestRunner(unittest.main):
@ -81,3 +102,4 @@ def run(verbosity=4):
if __name__ == '__main__': if __name__ == '__main__':
run(verbosity=4) run(verbosity=4)
# }}}