DOCX Output: Fix incorrect handling of bold/italic in paragraphs where

the majority of text is either bold or italic instead of normal
This commit is contained in:
Kovid Goyal 2015-05-21 13:43:41 +05:30
parent 85bb1dbeec
commit 1f4e6c22dd
2 changed files with 114 additions and 18 deletions

View File

@ -64,6 +64,7 @@ class TextRun(object):
self.lang = lang self.lang = lang
self.parent_style = None self.parent_style = None
self.makeelement = namespace.makeelement self.makeelement = namespace.makeelement
self.descendant_style = None
def add_text(self, text, preserve_whitespace, bookmark=None, link=None): def add_text(self, text, preserve_whitespace, bookmark=None, link=None):
if not preserve_whitespace: if not preserve_whitespace:
@ -86,8 +87,8 @@ class TextRun(object):
parent = p if self.link is None else links_manager.serialize_hyperlink(p, self.link) parent = p if self.link is None else links_manager.serialize_hyperlink(p, self.link)
r = makeelement(parent, 'w:r') r = makeelement(parent, 'w:r')
rpr = makeelement(r, 'w:rPr', append=False) rpr = makeelement(r, 'w:rPr', append=False)
if self.parent_style is not self.style: if getattr(self.descendant_style, 'id', None) is not None:
makeelement(rpr, 'w:rStyle', w_val=self.style.id) makeelement(rpr, 'w:rStyle', w_val=self.descendant_style.id)
if self.lang: if self.lang:
makeelement(rpr, 'w:lang', w_bidi=self.lang, w_val=self.lang, w_eastAsia=self.lang) makeelement(rpr, 'w:lang', w_bidi=self.lang, w_val=self.lang, w_eastAsia=self.lang)
if len(rpr) > 0: if len(rpr) > 0:

View File

@ -341,6 +341,93 @@ class TextStyle(DOCXStyle):
if bdr.attrib: if bdr.attrib:
rPr.append(bdr) rPr.append(bdr)
class DescendantTextStyle(object):
def __init__(self, parent_style, child_style):
self.id = self.name = None
self.makeelement = child_style.makeelement
p = []
def add(name, **props):
p.append((name, frozenset(props.iteritems())))
def vals(attr):
return getattr(parent_style, attr), getattr(child_style, attr)
def check(attr):
pval, cval = vals(attr)
return pval != cval
if parent_style.font_family != child_style.font_family:
add('rFonts', **{k:child_style.font_family for k in 'ascii cs eastAsia hAnsi'.split()})
for name, attr in (('sz', 'font_size'), ('b', 'bold'), ('i', 'italic')):
pval, cval = vals(attr)
if pval != cval:
val = 'on' if attr in {'bold', 'italic'} else str(cval) # bold, italic are toggle properties
for suffix in ('', 'Cs'):
add(name + suffix, val=val)
if check('color'):
add('color', val=child_style.color or 'auto')
if check('background_color'):
add('shd', fill=child_style.background_color or 'auto')
if check('underline'):
add('u', val='single' if child_style.underline else 'none')
if check('dstrike'):
add('dstrike', val=bmap(child_style.dstrike))
if check('strike'):
add('strike', val='on') # toggle property
if check('caps'):
add('caps', val='on') # toggle property
if check('small_caps'):
add('smallCaps', val='on') # toggle property
if check('shadow'):
add('shadow', val='on') # toggle property
if check('spacing'):
add('spacing', val=str(child_style.spacing or 0))
if check('vertical_align'):
val = child_style.vertical_align
if val in {'superscript', 'subscript', 'baseline'}:
add('vertAlign', val=val)
else:
add('position', val=val)
bdr = {}
if check('padding'):
bdr['space'] = str(child_style.padding)
if check('border_width'):
bdr['sz'] = str(child_style.border_width)
if check('border_style'):
bdr['val'] = child_style.border_style
if check('border_color'):
bdr['color'] = child_style.border_color
if bdr:
add('bdr', **bdr)
self.properties = tuple(p)
self._hash = hash(self.properties)
def __hash__(self):
return self._hash
def __eq__(self, other):
return self.properties == other.properties
def __ne__(self, other):
return self.properties != other.properties
def serialize(self, styles):
makeelement = self.makeelement
style = makeelement(styles, 'style', styleId=self.id, type='character')
style.append(makeelement(style, 'name', val=self.name))
rpr = makeelement(style, 'rPr')
style.append(rpr)
for name, attrs in self.properties:
rpr.append(makeelement(style, name, **dict(attrs)))
styles.append(style)
return style
def read_css_block_borders(self, css, store_css_style=False): def read_css_block_borders(self, css, store_css_style=False):
for edge in border_edges: for edge in border_edges:
if css is None: if css is None:
@ -540,7 +627,7 @@ class StylesManager(object):
ans = existing ans = existing
return ans return ans
def finalize(self, blocks): def finalize(self, all_blocks):
block_counts, run_counts = Counter(), Counter() block_counts, run_counts = Counter(), Counter()
block_rmap, run_rmap = defaultdict(list), defaultdict(list) block_rmap, run_rmap = defaultdict(list), defaultdict(list)
used_pairs = defaultdict(list) used_pairs = defaultdict(list)
@ -548,7 +635,7 @@ class StylesManager(object):
headings = frozenset('h1 h2 h3 h4 h5 h6'.split()) headings = frozenset('h1 h2 h3 h4 h5 h6'.split())
pure_block_styles = set() pure_block_styles = set()
for block in blocks: for block in all_blocks:
bs = block.style bs = block.style
block_counts[bs] += 1 block_counts[bs] += 1
block_rmap[block.style].append(block) block_rmap[block.style].append(block)
@ -574,17 +661,6 @@ class StylesManager(object):
if i == 0: if i == 0:
self.normal_pure_block_style = bs self.normal_pure_block_style = bs
rnum = len(str(max(1, len(run_counts) - 1)))
for i, (text_style, count) in enumerate(run_counts.most_common()):
text_style.id = 'Text%d' % i
text_style.name = '%0{}d Text'.format(rnum) % i
text_style.seq = i
if i == 0:
self.normal_text_style = text_style
for s in tuple(self.text_styles):
if s.id is None:
self.text_styles.pop(s)
counts = Counter() counts = Counter()
smap = {} smap = {}
for (bs, rs), blocks in used_pairs.iteritems(): for (bs, rs), blocks in used_pairs.iteritems():
@ -614,8 +690,27 @@ class StylesManager(object):
style.seq = i style.seq = i
self.combined_styles = sorted(counts.iterkeys(), key=attrgetter('seq')) self.combined_styles = sorted(counts.iterkeys(), key=attrgetter('seq'))
[ls.apply() for ls in self.combined_styles] [ls.apply() for ls in self.combined_styles]
descendant_style_map = {}
ds_counts = Counter()
for block in all_blocks:
for run in block.runs:
if run.parent_style is not run.style:
ds = DescendantTextStyle(run.parent_style, run.style)
if ds.properties:
run.descendant_style = descendant_style_map.get(ds)
if run.descendant_style is None:
run.descendant_style = descendant_style_map[ds] = ds
ds_counts[run.descendant_style] += run.style_weight
rnum = len(str(max(1, len(ds_counts) - 1)))
for i, (text_style, count) in enumerate(ds_counts.most_common()):
text_style.id = 'Text%d' % i
text_style.name = '%0{}d Text'.format(rnum) % i
text_style.seq = i
self.descendant_text_styles = sorted(descendant_style_map, key=attrgetter('seq'))
self.log.debug('%d Text Styles %d Combined styles' % tuple(map(len, ( self.log.debug('%d Text Styles %d Combined styles' % tuple(map(len, (
self.text_styles, self.combined_styles)))) self.descendant_text_styles, self.combined_styles))))
self.primary_heading_style = None self.primary_heading_style = None
if heading_styles: if heading_styles:
@ -634,7 +729,7 @@ class StylesManager(object):
lang.attrib[k] = self.document_lang lang.attrib[k] = self.document_lang
for style in self.combined_styles: for style in self.combined_styles:
style.serialize(styles, self.normal_style) style.serialize(styles, self.normal_style)
for style in sorted(self.text_styles, key=attrgetter('seq')): for style in self.descendant_text_styles:
style.serialize(styles, self.normal_text_style) style.serialize(styles)
for style in sorted(self.pure_block_styles, key=attrgetter('seq')): for style in sorted(self.pure_block_styles, key=attrgetter('seq')):
style.serialize(styles, self.normal_pure_block_style) style.serialize(styles, self.normal_pure_block_style)