From 6c1dc2da8dc408b681fe07e3164bf78f645c0c24 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 20 May 2014 22:37:07 +0530 Subject: [PATCH] DOCX Input: if no language is defined in the metadata, use the language specified in the document default style. Fixes #1321346 [DOCX Input language detection](https://bugs.launchpad.net/calibre/+bug/1321346) --- src/calibre/ebooks/docx/container.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/calibre/ebooks/docx/container.py b/src/calibre/ebooks/docx/container.py index cd1a7b5e60..1f61a02c1a 100644 --- a/src/calibre/ebooks/docx/container.py +++ b/src/calibre/ebooks/docx/container.py @@ -71,6 +71,14 @@ def read_app_props(raw, mi): company = root.xpath('//*[local-name()="Company"]') if company and company[0].text and company[0].text.strip(): mi.publisher = company[0].text.strip() + +def read_default_style_language(raw, mi): + root = fromstring(raw) + for lang in XPath('/w:styles/w:docDefaults/w:rPrDefault/w:rPr/w:lang/@w:val')(root): + lang = canonicalize_lang(lang) + if lang: + mi.languages = [lang] + break # }}} class DOCX(object): @@ -207,6 +215,13 @@ class DOCX(object): pass else: read_doc_props(raw, mi) + if mi.is_null('language'): + try: + raw = self.read('word/styles.xml') + except KeyError: + pass + else: + read_default_style_language(raw, mi) name = self.relationships.get(APPPROPS, None) if name is None: