DOCX Input: Cascade the font css

This commit is contained in:
Kovid Goyal 2013-05-14 18:39:58 +05:30
parent 33793ff0d1
commit a597fe76bb
4 changed files with 81 additions and 15 deletions

View File

@ -208,7 +208,7 @@ class ParagraphStyle(object):
# Misc.
'text_indent', 'text_align', 'line_height', 'direction', 'background_color',
'numbering',
'numbering', 'font_family', 'font_size',
)
def __init__(self, pPr=None):
@ -232,6 +232,8 @@ class ParagraphStyle(object):
for s in XPath('./w:pStyle[@w:val]')(pPr):
self.linked_style = get(s, 'w:val')
self.font_family = self.font_size = inherit
self._css = None
def update(self, other):
@ -274,10 +276,13 @@ class ParagraphStyle(object):
if self.line_height not in {inherit, '1'}:
c['line-height'] = self.line_height
for x in ('text_indent', 'text_align', 'background_color'):
for x in ('text_indent', 'text_align', 'background_color', 'font_family', 'font_size'):
val = getattr(self, x)
if val is not inherit:
if x == 'font_size':
val = '%.3gpt' % val
c[x.replace('_', '-')] = val
return self._css
# TODO: keepNext must be done at markup level

View File

@ -172,6 +172,18 @@ class RunStyle(object):
if val is inherit:
setattr(self, p, getattr(parent, p))
def get_border_css(self, ans):
for x in ('color', 'style', 'width'):
val = getattr(self, 'border_'+x)
if x == 'width' and val is not inherit:
val = '%.3gpt' % val
if val is not inherit:
ans['border-%s' % x] = val
def clear_border_css(self):
for x in ('color', 'style', 'width'):
setattr(self, 'border_'+x, inherit)
@property
def css(self):
if self._css is None:
@ -196,12 +208,7 @@ class RunStyle(object):
if self.vanish is True:
c['display'] = 'none'
for x in ('color', 'style', 'width'):
val = getattr(self, 'border_'+x)
if x == 'width' and val is not inherit:
val = '%.3gpt' % val
if val is not inherit:
c['border-%s' % x] = val
self.get_border_css(c)
if self.padding is not inherit:
c['padding'] = '%.3gpt' % self.padding
@ -223,6 +230,7 @@ class RunStyle(object):
if self.font_family is not inherit:
c['font-family'] = self.font_family
return self._css
def same_border(self, other):

View File

@ -258,6 +258,55 @@ class Styles(object):
if obj.tag.endswith('}r'):
return self.resolve_run(obj)
def cascade(self, layers):
self.body_font_family = 'serif'
self.body_font_size = '10pt'
for p, runs in layers.iteritems():
char_styles = [self.resolve_run(r) for r in runs]
block_style = self.resolve_paragraph(p)
c = Counter()
for s in char_styles:
if s.font_family is not inherit:
c[s.font_family] += 1
if c:
family = c.most_common(1)[0][0]
block_style.font_family = family
for s in char_styles:
if s.font_family == family:
s.font_family = inherit
sizes = [s.font_size for s in char_styles if s.font_size is not inherit]
if sizes:
sz = block_style.font_size = sizes[0]
for s in char_styles:
if s.font_size == sz:
s.font_size = inherit
block_styles = [self.resolve_paragraph(p) for p in layers]
c = Counter()
for s in block_styles:
if s.font_family is not inherit:
c[s.font_family] += 1
if c:
self.body_font_family = family = c.most_common(1)[0][0]
for s in block_styles:
if s.font_family == family:
s.font_family = inherit
c = Counter()
for s in block_styles:
if s.font_size is not inherit:
c[s.font_size] += 1
if c:
sz = c.most_common(1)[0][0]
for s in block_styles:
if s.font_size == sz:
s.font_size = inherit
self.body_font_size = '%.3gpt' % sz
def resolve_numbering(self, numbering):
# When a numPr element appears inside a paragraph style, the lvl info
# must be discarder and pStyle used instead.
@ -298,12 +347,14 @@ class Styles(object):
ef = self.fonts.embed_fonts(dest_dir, docx)
prefix = textwrap.dedent(
'''\
body { font-family: %s; font-size: %s }
p { text-indent: 1.5em }
ul, ol, p { margin: 0; padding: 0 }
''')
''') % (self.body_font_family, self.body_font_size)
if ef:
prefix += '\n' + ef
prefix = ef + '\n' + prefix
ans = []
for (cls, css) in sorted(self.classes.itervalues(), key=lambda x:x[0]):

View File

@ -64,12 +64,15 @@ class Convert(object):
doc = self.docx.document
relationships_by_id, relationships_by_type = self.docx.document_relationships
self.read_styles(relationships_by_type)
self.layers = OrderedDict()
for wp in XPath('//w:p')(doc):
p = self.convert_p(wp)
self.body.append(p)
# TODO: tables <w:tbl> child of <w:body> (nested tables?)
# TODO: Last section properties <w:sectPr> child of <w:body>
self.styles.cascade(self.layers)
numbered = []
for html_obj, obj in self.object_map.iteritems():
raw = obj.get('calibre_num_id', None)
@ -156,9 +159,11 @@ class Convert(object):
dest = P()
self.object_map[dest] = p
style = self.styles.resolve_paragraph(p)
self.layers[p] = []
for run in XPath('descendant::w:r')(p):
span = self.convert_run(run)
dest.append(span)
self.layers[p].append(run)
m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE)
if m is not None:
@ -184,12 +189,9 @@ class Convert(object):
spans = []
bs = {}
for span, style in border_run:
c = style.css
style.get_border_css(bs)
style.clear_border_css()
spans.append(span)
for x in ('width', 'color', 'style'):
val = c.pop('border-%s' % x, None)
if val is not None:
bs['border-%s' % x] = val
if bs:
cls = self.styles.register(bs, 'text_border')
wrapper = self.wrap_elems(spans, SPAN())