ODT metadata: Support reading tags from multiple <keyword> elements

This commit is contained in:
Kovid Goyal 2022-01-16 19:32:52 +05:30
parent 57d67d9deb
commit 2b41671370
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -59,6 +59,14 @@ fields = {
} }
def uniq(vals):
''' Remove all duplicates from vals, while preserving order. '''
vals = vals or ()
seen = set()
seen_add = seen.add
return list(x for x in vals if x not in seen and not seen_add(x))
def get_metadata(stream, extract_cover=True): def get_metadata(stream, extract_cover=True):
whitespace = re.compile(r'\s+') whitespace = re.compile(r'\s+')
@ -75,6 +83,11 @@ def get_metadata(stream, extract_cover=True):
if ans: if ans:
return normalize(tostring(ans[0], method='text', encoding='unicode', with_tail=False)).strip() return normalize(tostring(ans[0], method='text', encoding='unicode', with_tail=False)).strip()
def find_all(field):
ns, tag = fields[field]
for x in root.xpath(f'//ns0:{tag}', namespaces={'ns0': ns}):
yield normalize(tostring(x, method='text', encoding='unicode', with_tail=False)).strip()
mi = MetaInformation(None, []) mi = MetaInformation(None, [])
title = find('title') title = find('title')
if title: if title:
@ -88,9 +101,11 @@ def get_metadata(stream, extract_cover=True):
lang = find('language') lang = find('language')
if lang and canonicalize_lang(lang): if lang and canonicalize_lang(lang):
mi.languages = [canonicalize_lang(lang)] mi.languages = [canonicalize_lang(lang)]
kw = find('keyword') or find('keywords') keywords = []
if kw: for q in ('keyword', 'keywords'):
mi.tags = [x.strip() for x in kw.split(',') if x.strip()] for kw in find_all(q):
keywords += [x.strip() for x in kw.split(',') if x.strip()]
mi.tags = uniq(keywords)
data = {} data = {}
for tag in root.xpath('//ns0:user-defined', namespaces={'ns0': fields['user-defined'][0]}): for tag in root.xpath('//ns0:user-defined', namespaces={'ns0': fields['user-defined'][0]}):
name = (tag.get('{%s}name' % METANS) or '').lower() name = (tag.get('{%s}name' % METANS) or '').lower()