From df6f0f8dc73f50fafb3e5d44dfdce2e23d0cc10e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 4 May 2013 11:06:59 +0530 Subject: [PATCH] Docx metadata: Read the language of the file, if present --- src/calibre/ebooks/metadata/docx.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/calibre/ebooks/metadata/docx.py b/src/calibre/ebooks/metadata/docx.py index 1505d397f3..cb265424cc 100644 --- a/src/calibre/ebooks/metadata/docx.py +++ b/src/calibre/ebooks/metadata/docx.py @@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en' from lxml import etree from calibre.ebooks.metadata.book.base import Metadata +from calibre.utils.localization import canonicalize_lang from calibre.utils.zipfile import ZipFile from calibre.utils.magick.draw import identify_data from calibre.ebooks.oeb.base import DC11_NS @@ -52,6 +53,15 @@ def _read_doc_props(raw, mi): raw = etree.tostring(desc[0], method='text', encoding=unicode) mi.comments = raw + langs = [] + for lang in XPath('//dc:language')(root): + if lang.text and lang.text.strip(): + l = canonicalize_lang(lang.text) + if l: + langs.append(l) + if langs: + mi.languages = langs + def _read_app_props(raw, mi): root = etree.fromstring(raw, parser=RECOVER_PARSER) company = root.xpath('//*[local-name()="Company"]')