Docx metadata: Read the language of the file, if present

This commit is contained in:
Kovid Goyal 2013-05-04 11:06:59 +05:30
parent ecb520cb6e
commit df6f0f8dc7

View File

@ -10,6 +10,7 @@ __docformat__ = 'restructuredtext en'
from lxml import etree from lxml import etree
from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.localization import canonicalize_lang
from calibre.utils.zipfile import ZipFile from calibre.utils.zipfile import ZipFile
from calibre.utils.magick.draw import identify_data from calibre.utils.magick.draw import identify_data
from calibre.ebooks.oeb.base import DC11_NS from calibre.ebooks.oeb.base import DC11_NS
@ -52,6 +53,15 @@ def _read_doc_props(raw, mi):
raw = etree.tostring(desc[0], method='text', encoding=unicode) raw = etree.tostring(desc[0], method='text', encoding=unicode)
mi.comments = raw mi.comments = raw
langs = []
for lang in XPath('//dc:language')(root):
if lang.text and lang.text.strip():
l = canonicalize_lang(lang.text)
if l:
langs.append(l)
if langs:
mi.languages = langs
def _read_app_props(raw, mi): def _read_app_props(raw, mi):
root = etree.fromstring(raw, parser=RECOVER_PARSER) root = etree.fromstring(raw, parser=RECOVER_PARSER)
company = root.xpath('//*[local-name()="Company"]') company = root.xpath('//*[local-name()="Company"]')