DOCX Input: More fixes for right-to-left language support. Fixes #1569771 [right to left Arabic language tables](https://bugs.launchpad.net/calibre/+bug/1569771)

This commit is contained in:
Kovid Goyal 2016-08-22 20:43:59 +05:30
parent 1df8918fdd
commit 343fa0148d
3 changed files with 31 additions and 33 deletions

View File

@ -162,8 +162,10 @@ def read_justification(parent, dest, XPath, get):
continue
if val in {'both', 'distribute'} or 'thai' in val or 'kashida' in val:
ans = 'justify'
if val in {'left', 'center', 'right',}:
elif val in {'left', 'center', 'right', 'start', 'end'}:
ans = val
elif val in {'start', 'end'}:
ans = {'start':'left'}.get(val, 'right')
setattr(dest, 'text_align', ans)
def read_spacing(parent, dest, XPath, get):
@ -189,16 +191,6 @@ def read_spacing(parent, dest, XPath, get):
setattr(dest, 'margin_bottom', padding_bottom)
setattr(dest, 'line_height', line_height)
def read_direction(parent, dest, XPath, get):
ans = inherit
for jc in XPath('./w:textFlow[@w:val]')(parent):
val = get(jc, 'w:val')
if not val:
continue
if 'rl' in val.lower():
ans = 'rtl'
setattr(dest, 'direction', ans)
def read_shd(parent, dest, XPath, get):
ans = inherit
for shd in XPath('./w:shd[@w:fill]')(parent):
@ -322,7 +314,7 @@ class ParagraphStyle(object):
'margin_left', 'margin_top', 'margin_right', 'margin_bottom',
# Misc.
'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
'text_indent', 'text_align', 'line_height', 'background_color',
'numbering', 'font_family', 'font_size', 'color', 'frame',
)
@ -341,7 +333,7 @@ class ParagraphStyle(object):
):
setattr(self, p, binary_property(pPr, p, namespace.XPath, namespace.get))
for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd', 'numbering', 'frame'):
for x in ('border', 'indent', 'justification', 'spacing', 'shd', 'numbering', 'frame'):
f = globals()['read_%s' % x]
f(pPr, self, namespace.XPath, namespace.get)
@ -389,12 +381,16 @@ class ParagraphStyle(object):
if self.line_height not in {inherit, '1'}:
c['line-height'] = self.line_height
for x in ('text_indent', 'text_align', 'background_color', 'font_family', 'font_size', 'color'):
for x in ('text_indent', 'background_color', 'font_family', 'font_size', 'color'):
val = getattr(self, x)
if val is not inherit:
if x == 'font_size':
val = '%.3gpt' % val
c[x.replace('_', '-')] = val
ta = self.text_align
if self.bidi:
ta = {'left':'right', 'right':'left'}.get(ta, ta)
c['text-align'] = ta
return self._css

View File

@ -17,12 +17,14 @@ def mergeable(previous, current):
return False
if current.get('id', False):
return False
for attr in ('style', 'lang', 'dir'):
if previous.get(attr) != current.get(attr):
return False
try:
return next(previous.itersiblings()) is current
except StopIteration:
return False
def append_text(parent, text):
if len(parent) > 0:
parent[-1].tail = (parent[-1].tail or '') + text
@ -114,7 +116,7 @@ def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath):
# Merge consecutive spans that have the same styling
current_run = []
for span in root.xpath('//span[not(@style or @lang or @dir)]'):
for span in root.xpath('//span'):
if not current_run:
current_run.append(span)
else:
@ -126,33 +128,33 @@ def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath):
merge_run(current_run)
current_run = [span]
# Remove unnecessary span tags that are the only child of a parent block
# element
# Process dir attributes
class_map = dict(styles.classes.itervalues())
parents = ('p', 'div') + tuple('h%d' % i for i in xrange(1, 7))
for parent in root.xpath('//*[(%s)]' % ' or '.join('name()="%s"' % t for t in parents)):
# If all spans have dir="rtl" and the parent does not have dir set,
# move the dir to the parent.
if len(parent) and (parent.get('dir') or 'rtl') == 'rtl':
has_rtl_children = False
# Ensure that children of rtl parents that are not rtl have an
# explicit dir set. Also, remove dir from children if it is the same as
# that of the parent.
if len(parent):
parent_dir = parent.get('dir')
for child in parent.iterchildren('span'):
if child.get('dir') == 'rtl':
has_rtl_children = True
else:
has_rtl_children = False
break
if has_rtl_children:
parent.set('dir', 'rtl')
for child in parent.iterchildren():
del child.attrib['dir']
child_dir = child.get('dir')
if parent_dir == 'rtl' and child_dir != 'rtl':
child_dir = 'ltr'
child.set('dir', child_dir)
if child_dir and child_dir == parent_dir:
child.attrib.pop('dir')
# Remove unnecessary span tags that are the only child of a parent block
# element
for parent in root.xpath('//*[(%s) and count(span)=1]' % ' or '.join('name()="%s"' % t for t in parents)):
if len(parent) == 1 and not parent.text and not parent[0].tail and not parent[0].get('id', None):
# We have a block whose contents are entirely enclosed in a <span>
span = parent[0]
span_class = span.get('class', None)
span_css = class_map.get(span_class, {})
if liftable(span_css):
span_dir = span.get('dir')
if liftable(span_css) and (not span_dir or span_dir == parent.get('dir')):
pclass = parent.get('class', None)
if span_class:
pclass = (pclass + ' ' + span_class) if pclass else span_class

View File

@ -466,7 +466,7 @@ class Convert(object):
n = min(6, max(1, int(m.group(1))))
dest.tag = 'h%d' % n
if style.direction == 'rtl':
if style.bidi is True:
dest.set('dir', 'rtl')
border_runs = []