mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DOCX Input: Add support for multilingual documents. Fixes #1394428 [docx driver doesn't handle different languages](https://bugs.launchpad.net/calibre/+bug/1394428)
This commit is contained in:
parent
35406e21d8
commit
00ea2cc4a4
@ -115,7 +115,7 @@ def cleanup_markup(log, root, styles, dest_dir, detect_cover):
|
|||||||
|
|
||||||
# Merge consecutive spans that have the same styling
|
# Merge consecutive spans that have the same styling
|
||||||
current_run = []
|
current_run = []
|
||||||
for span in root.xpath('//span[not(@style)]'):
|
for span in root.xpath('//span[not(@style or @lang)]'):
|
||||||
if not current_run:
|
if not current_run:
|
||||||
current_run.append(span)
|
current_run.append(span)
|
||||||
else:
|
else:
|
||||||
@ -144,6 +144,8 @@ def cleanup_markup(log, root, styles, dest_dir, detect_cover):
|
|||||||
parent.set('class', pclass)
|
parent.set('class', pclass)
|
||||||
parent.text = span.text
|
parent.text = span.text
|
||||||
parent.remove(span)
|
parent.remove(span)
|
||||||
|
if span.get('lang'):
|
||||||
|
parent.set('lang', span.get('lang'))
|
||||||
for child in span:
|
for child in span:
|
||||||
parent.append(child)
|
parent.append(child)
|
||||||
|
|
||||||
@ -159,7 +161,7 @@ def cleanup_markup(log, root, styles, dest_dir, detect_cover):
|
|||||||
del span.attrib['class']
|
del span.attrib['class']
|
||||||
|
|
||||||
# Get rid of <span>s that have no styling
|
# Get rid of <span>s that have no styling
|
||||||
for span in root.xpath('//span[not(@class) and not(@id) and not(@style)]'):
|
for span in root.xpath('//span[not(@class or @id or @style or @lang)]'):
|
||||||
lift(span)
|
lift(span)
|
||||||
|
|
||||||
# Convert <p><br style="page-break-after:always"> </p> style page breaks
|
# Convert <p><br style="page-break-after:always"> </p> style page breaks
|
||||||
|
@ -43,6 +43,13 @@ class Text:
|
|||||||
setattr(self.elem, self.attr, ''.join(self.buf))
|
setattr(self.elem, self.attr, ''.join(self.buf))
|
||||||
self.elem, self.attr, self.buf = elem, 'tail', []
|
self.elem, self.attr, self.buf = elem, 'tail', []
|
||||||
|
|
||||||
|
def html_lang(docx_lang):
|
||||||
|
lang = canonicalize_lang(docx_lang)
|
||||||
|
if lang and lang != 'und':
|
||||||
|
lang = lang_as_iso639_1(lang)
|
||||||
|
if lang:
|
||||||
|
return lang
|
||||||
|
|
||||||
class Convert(object):
|
class Convert(object):
|
||||||
|
|
||||||
def __init__(self, path_or_stream, dest_dir=None, log=None, detect_cover=True, notes_text=None):
|
def __init__(self, path_or_stream, dest_dir=None, log=None, detect_cover=True, notes_text=None):
|
||||||
@ -77,11 +84,12 @@ class Convert(object):
|
|||||||
child.tail = '\n\t\t'
|
child.tail = '\n\t\t'
|
||||||
self.html[0][-1].tail = '\n\t'
|
self.html[0][-1].tail = '\n\t'
|
||||||
self.html[1].text = self.html[1].tail = '\n'
|
self.html[1].text = self.html[1].tail = '\n'
|
||||||
lang = canonicalize_lang(self.mi.language)
|
lang = html_lang(self.mi.language)
|
||||||
if lang and lang != 'und':
|
|
||||||
lang = lang_as_iso639_1(lang)
|
|
||||||
if lang:
|
if lang:
|
||||||
self.html.set('lang', lang)
|
self.html.set('lang', lang)
|
||||||
|
self.doc_lang = lang
|
||||||
|
else:
|
||||||
|
self.doc_lang = None
|
||||||
|
|
||||||
def __call__(self):
|
def __call__(self):
|
||||||
doc = self.docx.document
|
doc = self.docx.document
|
||||||
@ -626,7 +634,9 @@ class Convert(object):
|
|||||||
if style.vert_align in {'superscript', 'subscript'}:
|
if style.vert_align in {'superscript', 'subscript'}:
|
||||||
ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup'
|
ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup'
|
||||||
if style.lang is not inherit:
|
if style.lang is not inherit:
|
||||||
ans.lang = style.lang
|
lang = html_lang(style.lang)
|
||||||
|
if lang is not None and lang != self.doc_lang:
|
||||||
|
ans.set('lang', lang)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def add_frame(self, html_obj, style):
|
def add_frame(self, html_obj, style):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user