diff --git a/src/calibre/ebooks/docx/block_styles.py b/src/calibre/ebooks/docx/block_styles.py index e751e195f6..552fe9820e 100644 --- a/src/calibre/ebooks/docx/block_styles.py +++ b/src/calibre/ebooks/docx/block_styles.py @@ -162,8 +162,10 @@ def read_justification(parent, dest, XPath, get): continue if val in {'both', 'distribute'} or 'thai' in val or 'kashida' in val: ans = 'justify' - if val in {'left', 'center', 'right',}: + elif val in {'left', 'center', 'right', 'start', 'end'}: ans = val + elif val in {'start', 'end'}: + ans = {'start':'left'}.get(val, 'right') setattr(dest, 'text_align', ans) def read_spacing(parent, dest, XPath, get): @@ -189,16 +191,6 @@ def read_spacing(parent, dest, XPath, get): setattr(dest, 'margin_bottom', padding_bottom) setattr(dest, 'line_height', line_height) -def read_direction(parent, dest, XPath, get): - ans = inherit - for jc in XPath('./w:textFlow[@w:val]')(parent): - val = get(jc, 'w:val') - if not val: - continue - if 'rl' in val.lower(): - ans = 'rtl' - setattr(dest, 'direction', ans) - def read_shd(parent, dest, XPath, get): ans = inherit for shd in XPath('./w:shd[@w:fill]')(parent): @@ -322,7 +314,7 @@ class ParagraphStyle(object): 'margin_left', 'margin_top', 'margin_right', 'margin_bottom', # Misc. - 'text_indent', 'text_align', 'line_height', 'direction', 'background_color', + 'text_indent', 'text_align', 'line_height', 'background_color', 'numbering', 'font_family', 'font_size', 'color', 'frame', ) @@ -341,7 +333,7 @@ class ParagraphStyle(object): ): setattr(self, p, binary_property(pPr, p, namespace.XPath, namespace.get)) - for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd', 'numbering', 'frame'): + for x in ('border', 'indent', 'justification', 'spacing', 'shd', 'numbering', 'frame'): f = globals()['read_%s' % x] f(pPr, self, namespace.XPath, namespace.get) @@ -389,12 +381,16 @@ class ParagraphStyle(object): if self.line_height not in {inherit, '1'}: c['line-height'] = self.line_height - for x in ('text_indent', 'text_align', 'background_color', 'font_family', 'font_size', 'color'): + for x in ('text_indent', 'background_color', 'font_family', 'font_size', 'color'): val = getattr(self, x) if val is not inherit: if x == 'font_size': val = '%.3gpt' % val c[x.replace('_', '-')] = val + ta = self.text_align + if self.bidi: + ta = {'left':'right', 'right':'left'}.get(ta, ta) + c['text-align'] = ta return self._css diff --git a/src/calibre/ebooks/docx/cleanup.py b/src/calibre/ebooks/docx/cleanup.py index 297fa5cf26..2a2dd5b12a 100644 --- a/src/calibre/ebooks/docx/cleanup.py +++ b/src/calibre/ebooks/docx/cleanup.py @@ -17,12 +17,14 @@ def mergeable(previous, current): return False if current.get('id', False): return False + for attr in ('style', 'lang', 'dir'): + if previous.get(attr) != current.get(attr): + return False try: return next(previous.itersiblings()) is current except StopIteration: return False - def append_text(parent, text): if len(parent) > 0: parent[-1].tail = (parent[-1].tail or '') + text @@ -114,7 +116,7 @@ def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath): # Merge consecutive spans that have the same styling current_run = [] - for span in root.xpath('//span[not(@style or @lang or @dir)]'): + for span in root.xpath('//span'): if not current_run: current_run.append(span) else: @@ -126,33 +128,33 @@ def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath): merge_run(current_run) current_run = [span] - # Remove unnecessary span tags that are the only child of a parent block - # element + # Process dir attributes class_map = dict(styles.classes.itervalues()) parents = ('p', 'div') + tuple('h%d' % i for i in xrange(1, 7)) for parent in root.xpath('//*[(%s)]' % ' or '.join('name()="%s"' % t for t in parents)): - # If all spans have dir="rtl" and the parent does not have dir set, - # move the dir to the parent. - if len(parent) and (parent.get('dir') or 'rtl') == 'rtl': - has_rtl_children = False + # Ensure that children of rtl parents that are not rtl have an + # explicit dir set. Also, remove dir from children if it is the same as + # that of the parent. + if len(parent): + parent_dir = parent.get('dir') for child in parent.iterchildren('span'): - if child.get('dir') == 'rtl': - has_rtl_children = True - else: - has_rtl_children = False - break - if has_rtl_children: - parent.set('dir', 'rtl') - for child in parent.iterchildren(): - del child.attrib['dir'] + child_dir = child.get('dir') + if parent_dir == 'rtl' and child_dir != 'rtl': + child_dir = 'ltr' + child.set('dir', child_dir) + if child_dir and child_dir == parent_dir: + child.attrib.pop('dir') + # Remove unnecessary span tags that are the only child of a parent block + # element for parent in root.xpath('//*[(%s) and count(span)=1]' % ' or '.join('name()="%s"' % t for t in parents)): if len(parent) == 1 and not parent.text and not parent[0].tail and not parent[0].get('id', None): # We have a block whose contents are entirely enclosed in a span = parent[0] span_class = span.get('class', None) span_css = class_map.get(span_class, {}) - if liftable(span_css): + span_dir = span.get('dir') + if liftable(span_css) and (not span_dir or span_dir == parent.get('dir')): pclass = parent.get('class', None) if span_class: pclass = (pclass + ' ' + span_class) if pclass else span_class diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 59fedc31e3..9919950aa5 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -466,7 +466,7 @@ class Convert(object): n = min(6, max(1, int(m.group(1)))) dest.tag = 'h%d' % n - if style.direction == 'rtl': + if style.bidi is True: dest.set('dir', 'rtl') border_runs = []