mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
ODT metadata: Support reading tags from multiple <keyword> elements
This commit is contained in:
parent
57d67d9deb
commit
2b41671370
@ -59,6 +59,14 @@ fields = {
|
||||
}
|
||||
|
||||
|
||||
def uniq(vals):
|
||||
''' Remove all duplicates from vals, while preserving order. '''
|
||||
vals = vals or ()
|
||||
seen = set()
|
||||
seen_add = seen.add
|
||||
return list(x for x in vals if x not in seen and not seen_add(x))
|
||||
|
||||
|
||||
def get_metadata(stream, extract_cover=True):
|
||||
whitespace = re.compile(r'\s+')
|
||||
|
||||
@ -75,6 +83,11 @@ def get_metadata(stream, extract_cover=True):
|
||||
if ans:
|
||||
return normalize(tostring(ans[0], method='text', encoding='unicode', with_tail=False)).strip()
|
||||
|
||||
def find_all(field):
|
||||
ns, tag = fields[field]
|
||||
for x in root.xpath(f'//ns0:{tag}', namespaces={'ns0': ns}):
|
||||
yield normalize(tostring(x, method='text', encoding='unicode', with_tail=False)).strip()
|
||||
|
||||
mi = MetaInformation(None, [])
|
||||
title = find('title')
|
||||
if title:
|
||||
@ -88,9 +101,11 @@ def get_metadata(stream, extract_cover=True):
|
||||
lang = find('language')
|
||||
if lang and canonicalize_lang(lang):
|
||||
mi.languages = [canonicalize_lang(lang)]
|
||||
kw = find('keyword') or find('keywords')
|
||||
if kw:
|
||||
mi.tags = [x.strip() for x in kw.split(',') if x.strip()]
|
||||
keywords = []
|
||||
for q in ('keyword', 'keywords'):
|
||||
for kw in find_all(q):
|
||||
keywords += [x.strip() for x in kw.split(',') if x.strip()]
|
||||
mi.tags = uniq(keywords)
|
||||
data = {}
|
||||
for tag in root.xpath('//ns0:user-defined', namespaces={'ns0': fields['user-defined'][0]}):
|
||||
name = (tag.get('{%s}name' % METANS) or '').lower()
|
||||
|
Loading…
x
Reference in New Issue
Block a user