diff --git a/src/calibre/ebooks/metadata/odt.py b/src/calibre/ebooks/metadata/odt.py index 7c8a856628..0b1e3d7ffa 100644 --- a/src/calibre/ebooks/metadata/odt.py +++ b/src/calibre/ebooks/metadata/odt.py @@ -59,6 +59,14 @@ fields = { } +def uniq(vals): + ''' Remove all duplicates from vals, while preserving order. ''' + vals = vals or () + seen = set() + seen_add = seen.add + return list(x for x in vals if x not in seen and not seen_add(x)) + + def get_metadata(stream, extract_cover=True): whitespace = re.compile(r'\s+') @@ -75,6 +83,11 @@ def get_metadata(stream, extract_cover=True): if ans: return normalize(tostring(ans[0], method='text', encoding='unicode', with_tail=False)).strip() + def find_all(field): + ns, tag = fields[field] + for x in root.xpath(f'//ns0:{tag}', namespaces={'ns0': ns}): + yield normalize(tostring(x, method='text', encoding='unicode', with_tail=False)).strip() + mi = MetaInformation(None, []) title = find('title') if title: @@ -88,9 +101,11 @@ def get_metadata(stream, extract_cover=True): lang = find('language') if lang and canonicalize_lang(lang): mi.languages = [canonicalize_lang(lang)] - kw = find('keyword') or find('keywords') - if kw: - mi.tags = [x.strip() for x in kw.split(',') if x.strip()] + keywords = [] + for q in ('keyword', 'keywords'): + for kw in find_all(q): + keywords += [x.strip() for x in kw.split(',') if x.strip()] + mi.tags = uniq(keywords) data = {} for tag in root.xpath('//ns0:user-defined', namespaces={'ns0': fields['user-defined'][0]}): name = (tag.get('{%s}name' % METANS) or '').lower()