DOCX Input: if no language is defined in the metadata, use the language specified in the document default style. Fixes #1321346 [DOCX Input language detection](https://bugs.launchpad.net/calibre/+bug/1321346)

This commit is contained in:
Kovid Goyal 2014-05-20 22:37:07 +05:30
parent 2a3071b8ea
commit 6c1dc2da8d

View File

@ -71,6 +71,14 @@ def read_app_props(raw, mi):
company = root.xpath('//*[local-name()="Company"]') company = root.xpath('//*[local-name()="Company"]')
if company and company[0].text and company[0].text.strip(): if company and company[0].text and company[0].text.strip():
mi.publisher = company[0].text.strip() mi.publisher = company[0].text.strip()
def read_default_style_language(raw, mi):
root = fromstring(raw)
for lang in XPath('/w:styles/w:docDefaults/w:rPrDefault/w:rPr/w:lang/@w:val')(root):
lang = canonicalize_lang(lang)
if lang:
mi.languages = [lang]
break
# }}} # }}}
class DOCX(object): class DOCX(object):
@ -207,6 +215,13 @@ class DOCX(object):
pass pass
else: else:
read_doc_props(raw, mi) read_doc_props(raw, mi)
if mi.is_null('language'):
try:
raw = self.read('word/styles.xml')
except KeyError:
pass
else:
read_default_style_language(raw, mi)
name = self.relationships.get(APPPROPS, None) name = self.relationships.get(APPPROPS, None)
if name is None: if name is None: