From a597fe76bb40aa170af740b269e5cc48f8e5e633 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 14 May 2013 18:39:58 +0530 Subject: [PATCH] DOCX Input: Cascade the font css --- src/calibre/ebooks/docx/block_styles.py | 9 +++- src/calibre/ebooks/docx/char_styles.py | 20 ++++++--- src/calibre/ebooks/docx/styles.py | 55 ++++++++++++++++++++++++- src/calibre/ebooks/docx/to_html.py | 12 +++--- 4 files changed, 81 insertions(+), 15 deletions(-) diff --git a/src/calibre/ebooks/docx/block_styles.py b/src/calibre/ebooks/docx/block_styles.py index eef68a184f..10dc416eec 100644 --- a/src/calibre/ebooks/docx/block_styles.py +++ b/src/calibre/ebooks/docx/block_styles.py @@ -208,7 +208,7 @@ class ParagraphStyle(object): # Misc. 'text_indent', 'text_align', 'line_height', 'direction', 'background_color', - 'numbering', + 'numbering', 'font_family', 'font_size', ) def __init__(self, pPr=None): @@ -232,6 +232,8 @@ class ParagraphStyle(object): for s in XPath('./w:pStyle[@w:val]')(pPr): self.linked_style = get(s, 'w:val') + self.font_family = self.font_size = inherit + self._css = None def update(self, other): @@ -274,10 +276,13 @@ class ParagraphStyle(object): if self.line_height not in {inherit, '1'}: c['line-height'] = self.line_height - for x in ('text_indent', 'text_align', 'background_color'): + for x in ('text_indent', 'text_align', 'background_color', 'font_family', 'font_size'): val = getattr(self, x) if val is not inherit: + if x == 'font_size': + val = '%.3gpt' % val c[x.replace('_', '-')] = val + return self._css # TODO: keepNext must be done at markup level diff --git a/src/calibre/ebooks/docx/char_styles.py b/src/calibre/ebooks/docx/char_styles.py index b65766e494..ca023e23af 100644 --- a/src/calibre/ebooks/docx/char_styles.py +++ b/src/calibre/ebooks/docx/char_styles.py @@ -172,6 +172,18 @@ class RunStyle(object): if val is inherit: setattr(self, p, getattr(parent, p)) + def get_border_css(self, ans): + for x in ('color', 'style', 'width'): + val = getattr(self, 'border_'+x) + if x == 'width' and val is not inherit: + val = '%.3gpt' % val + if val is not inherit: + ans['border-%s' % x] = val + + def clear_border_css(self): + for x in ('color', 'style', 'width'): + setattr(self, 'border_'+x, inherit) + @property def css(self): if self._css is None: @@ -196,12 +208,7 @@ class RunStyle(object): if self.vanish is True: c['display'] = 'none' - for x in ('color', 'style', 'width'): - val = getattr(self, 'border_'+x) - if x == 'width' and val is not inherit: - val = '%.3gpt' % val - if val is not inherit: - c['border-%s' % x] = val + self.get_border_css(c) if self.padding is not inherit: c['padding'] = '%.3gpt' % self.padding @@ -223,6 +230,7 @@ class RunStyle(object): if self.font_family is not inherit: c['font-family'] = self.font_family + return self._css def same_border(self, other): diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py index 13b9ebe58f..c17418d0dd 100644 --- a/src/calibre/ebooks/docx/styles.py +++ b/src/calibre/ebooks/docx/styles.py @@ -258,6 +258,55 @@ class Styles(object): if obj.tag.endswith('}r'): return self.resolve_run(obj) + def cascade(self, layers): + self.body_font_family = 'serif' + self.body_font_size = '10pt' + + for p, runs in layers.iteritems(): + char_styles = [self.resolve_run(r) for r in runs] + block_style = self.resolve_paragraph(p) + c = Counter() + for s in char_styles: + if s.font_family is not inherit: + c[s.font_family] += 1 + if c: + family = c.most_common(1)[0][0] + block_style.font_family = family + for s in char_styles: + if s.font_family == family: + s.font_family = inherit + + sizes = [s.font_size for s in char_styles if s.font_size is not inherit] + if sizes: + sz = block_style.font_size = sizes[0] + for s in char_styles: + if s.font_size == sz: + s.font_size = inherit + + block_styles = [self.resolve_paragraph(p) for p in layers] + c = Counter() + for s in block_styles: + if s.font_family is not inherit: + c[s.font_family] += 1 + + if c: + self.body_font_family = family = c.most_common(1)[0][0] + for s in block_styles: + if s.font_family == family: + s.font_family = inherit + + c = Counter() + for s in block_styles: + if s.font_size is not inherit: + c[s.font_size] += 1 + + if c: + sz = c.most_common(1)[0][0] + for s in block_styles: + if s.font_size == sz: + s.font_size = inherit + self.body_font_size = '%.3gpt' % sz + def resolve_numbering(self, numbering): # When a numPr element appears inside a paragraph style, the lvl info # must be discarder and pStyle used instead. @@ -298,12 +347,14 @@ class Styles(object): ef = self.fonts.embed_fonts(dest_dir, docx) prefix = textwrap.dedent( '''\ + body { font-family: %s; font-size: %s } + p { text-indent: 1.5em } ul, ol, p { margin: 0; padding: 0 } - ''') + ''') % (self.body_font_family, self.body_font_size) if ef: - prefix += '\n' + ef + prefix = ef + '\n' + prefix ans = [] for (cls, css) in sorted(self.classes.itervalues(), key=lambda x:x[0]): diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index b4e5b0e5f7..902952ca4a 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -64,12 +64,15 @@ class Convert(object): doc = self.docx.document relationships_by_id, relationships_by_type = self.docx.document_relationships self.read_styles(relationships_by_type) + self.layers = OrderedDict() for wp in XPath('//w:p')(doc): p = self.convert_p(wp) self.body.append(p) # TODO: tables child of (nested tables?) # TODO: Last section properties child of + self.styles.cascade(self.layers) + numbered = [] for html_obj, obj in self.object_map.iteritems(): raw = obj.get('calibre_num_id', None) @@ -156,9 +159,11 @@ class Convert(object): dest = P() self.object_map[dest] = p style = self.styles.resolve_paragraph(p) + self.layers[p] = [] for run in XPath('descendant::w:r')(p): span = self.convert_run(run) dest.append(span) + self.layers[p].append(run) m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE) if m is not None: @@ -184,12 +189,9 @@ class Convert(object): spans = [] bs = {} for span, style in border_run: - c = style.css + style.get_border_css(bs) + style.clear_border_css() spans.append(span) - for x in ('width', 'color', 'style'): - val = c.pop('border-%s' % x, None) - if val is not None: - bs['border-%s' % x] = val if bs: cls = self.styles.register(bs, 'text_border') wrapper = self.wrap_elems(spans, SPAN())