Refactor styles code into its own module

This commit is contained in:
Kovid Goyal 2015-02-15 18:26:55 +05:30
parent 957ce7604e
commit 51a08167db
2 changed files with 281 additions and 266 deletions

View File

@ -11,14 +11,10 @@ import re
from lxml import etree
from lxml.builder import ElementMaker
from calibre.ebooks import parse_css_length
from calibre.ebooks.docx.names import namespaces
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
from calibre.ebooks.docx.styles import w, BlockStyle, TextStyle
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
from calibre.ebooks.oeb.base import XPath, barename
from tinycss.css21 import CSS21Parser
css_parser = CSS21Parser()
class Style(St):
@ -44,267 +40,6 @@ class Stylizer(Sz):
except KeyError:
return Style(element, self)
border_edges = ('left', 'top', 'right', 'bottom')
border_props = ('padding_%s', 'border_%s_width', 'border_%s_style', 'border_%s_color')
def parse_css_font_family(raw):
decl, errs = css_parser.parse_style_attr('font-family:' + raw)
if decl:
for token in decl[0].value:
if token.type in 'STRING IDENT':
val = token.value
if val == 'inherit':
break
yield val
def css_font_family_to_docx(raw):
generic = {'serif':'Cambria', 'sansserif':'Candara', 'sans-serif':'Candara', 'fantasy':'Comic Sans', 'cursive':'Segoe Script'}
for ff in parse_css_font_family(raw):
return generic.get(ff.lower(), ff)
class DOCXStyle(object):
ALL_PROPS = ()
def __init__(self):
self.update_hash()
def __hash__(self):
return self._hash
def update_hash(self):
self._hash = hash(tuple(
getattr(self, x) for x in self.ALL_PROPS))
def __eq__(self, other):
return hash(self) == hash(other)
def __ne__(self, other):
return not self == other
def __repr__(self):
return etree.tostring(self.serialize(etree.Element(self.__class__.__name__, nsmap={'w':namespaces['w']})), pretty_print=True)
__str__ = __repr__
def serialize_borders(self, bdr):
for edge in border_edges:
e = bdr.makeelement(w(edge))
padding = getattr(self, 'padding_' + edge)
if padding > 0:
e.set(w('space'), str(padding))
width = getattr(self, 'border_%s_width' % edge)
bstyle = getattr(self, 'border_%s_style' % edge)
if width > 0 and bstyle != 'none':
e.set(w('val'), bstyle)
e.set(w('sz'), str(width))
e.set(w('color'), getattr(self, 'border_%s_color' % edge))
if e.attrib:
bdr.append(e)
return bdr
LINE_STYLES = {
'none': 'none',
'hidden': 'none',
'dotted': 'dotted',
'dashed': 'dashed',
'solid': 'single',
'double': 'double',
'groove': 'threeDEngrave',
'ridge': 'threeDEmboss',
'inset': 'inset',
'outset': 'outset',
}
def w(x):
return '{%s}%s' % (namespaces['w'], x)
class TextStyle(DOCXStyle):
ALL_PROPS = ('font_family', 'font_size', 'bold', 'italic', 'color',
'background_color', 'underline', 'strike', 'dstrike', 'caps',
'shadow', 'small_caps', 'spacing', 'vertical_align') + tuple(
x%edge for edge in border_edges for x in border_props)
def __init__(self, css):
self.font_family = css_font_family_to_docx(css['font-family'])
try:
self.font_size = max(0, int(float(css['font-size']) * 2)) # stylizer normalizes all font sizes into pts
except (ValueError, TypeError, AttributeError):
self.font_size = None
fw = css['font-weight']
self.bold = fw.lower() in {'bold', 'bolder'} or int_or_zero(fw) >= 700
self.italic = css['font-style'].lower() in {'italic', 'oblique'}
self.color = convert_color(css['color'])
self.background_color = convert_color(css.backgroundColor)
td = set((css.effective_text_decoration or '').split())
self.underline = 'underline' in td
self.dstrike = 'line-through' in td and 'overline' in td
self.strike = not self.dstrike and 'line-through' in td
self.text_transform = css['text-transform'] # TODO: If lowercase or capitalize, transform the actual text
self.caps = self.text_transform == 'uppercase'
self.small_caps = css['font-variant'].lower() in {'small-caps', 'smallcaps'}
self.shadow = css['text-shadow'] not in {'none', None}
try:
self.spacing = int(float(css['letter-spacing']) * 20)
except (ValueError, TypeError, AttributeError):
self.spacing = None
self.vertical_align = css['vertical-align']
for edge in border_edges:
# In DOCX padding can only be a positive integer
setattr(self, 'padding_' + edge, max(0, int(css['padding-' + edge])))
val = min(96, max(2, int({'thin':0.2, 'medium':1, 'thick':2}.get(css['border-%s-width' % edge], 0) * 8)))
setattr(self, 'border_%s_width' % edge, val)
setattr(self, 'border_%s_color' % edge, convert_color(css['border-%s-color' % edge]))
setattr(self, 'border_%s_style' % edge, LINE_STYLES.get(css['border-%s-style' % edge].lower(), 'none'))
DOCXStyle.__init__(self)
def serialize(self, style):
style.append(style.makeelement(w('rFonts'), **{
w(k):self.font_family for k in 'ascii cs eastAsia hAnsi'.split()}))
for suffix in ('', 'Cs'):
style.append(style.makeelement(w('sz' + suffix), **{w('val'):str(self.font_size)}))
style.append(style.makeelement(w('b' + suffix), **{w('val'):('on' if self.bold else 'off')}))
style.append(style.makeelement(w('i' + suffix), **{w('val'):('on' if self.italic else 'off')}))
if self.color:
style.append(style.makeelement(w('color'), **{w('val'):str(self.color)}))
if self.background_color:
style.append(style.makeelement(w('shd'), **{w('val'):str(self.background_color)}))
if self.underline:
style.append(style.makeelement(w('u'), **{w('val'):'single'}))
if self.dstrike:
style.append(style.makeelement(w('dstrike'), **{w('val'):'on'}))
elif self.strike:
style.append(style.makeelement(w('strike'), **{w('val'):'on'}))
if self.caps:
style.append(style.makeelement(w('caps'), **{w('val'):'on'}))
if self.small_caps:
style.append(style.makeelement(w('smallCaps'), **{w('val'):'on'}))
if self.shadow:
style.append(style.makeelement(w('shadow'), **{w('val'):'on'}))
if self.spacing is not None:
style.append(style.makeelement(w('spacing'), **{w('val'):str(self.spacing)}))
if isinstance(self.vertical_align, (int, float)):
val = int(self.vertical_align * 2)
style.append(style.makeelement(w('position'), **{w('val'):str(val)}))
elif isinstance(self.vertical_align, basestring):
val = {'top':'superscript', 'text-top':'superscript', 'sup':'superscript', 'bottom':'subscript', 'text-bottom':'subscript', 'sub':'subscript'}.get(
self.vertical_align.lower())
if val:
style.append(style.makeelement(w('vertAlign'), **{w('val'):val}))
bdr = self.serialize_borders(style.makeelement(w('bdr')))
if len(bdr):
style.append(bdr)
return style
class BlockStyle(DOCXStyle):
ALL_PROPS = tuple(
'text_align page_break_before keep_lines keep_next css_text_indent text_indent line_height css_line_height background_color'.split()
+ ['margin_' + edge for edge in border_edges]
+ ['css_margin_' + edge for edge in border_edges]
+ [x%edge for edge in border_edges for x in border_props]
)
def __init__(self, css, html_block, is_first_block=False):
self.page_break_before = html_block.tag.endswith('}body') or (not is_first_block and css['page-break-before'] == 'always')
self.keep_lines = css['page-break-inside'] == 'avoid'
# TODO: Ensure that only the last docx block for this html block has the correct value for keep next
self.keep_next = css['page-break-after'] == 'avoid'
for edge in border_edges:
# In DOCX padding can only be a positive integer
setattr(self, 'padding_' + edge, max(0, int(css['padding-' + edge])))
# In DOCX margin must be a positive integer in twips (twentieth of a point)
setattr(self, 'margin_' + edge, max(0, int(css['margin-' + edge] * 20)))
setattr(self, 'css_margin_' + edge, css._style.get('margin-' + edge, ''))
val = min(96, max(2, int({'thin':0.2, 'medium':1, 'thick':2}.get(css['border-%s-width' % edge], 0) * 8)))
setattr(self, 'border_%s_width' % edge, val)
setattr(self, 'border_%s_color' % edge, convert_color(css['border-%s-color' % edge]))
setattr(self, 'border_%s_style' % edge, LINE_STYLES.get(css['border-%s-style' % edge].lower(), 'none'))
self.text_indent = max(0, int(css['text-indent'] * 20))
self.css_text_indent = css._get('text-indent')
self.line_height = max(0, int(css['line-height'] * 20))
self.css_line_height = css._get('line-height')
self.background_color = convert_color(css['background-color'])
self.text_align = {'start':'left', 'left':'left', 'end':'right', 'right':'right', 'center':'center', 'justify':'both', 'centre':'center'}.get(
css['text-align'].lower(), 'left')
DOCXStyle.__init__(self)
def serialize(self, style):
spacing = style.makeelement(w('spacing'))
for edge, attr in {'top':'before', 'bottom':'after'}.iteritems():
css_val, css_unit = parse_css_length(getattr(self, 'css_margin_' + edge))
if css_unit in ('em', 'ex'):
lines = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
if lines > 0:
spacing.set(w(attr + 'Lines'), str(lines))
else:
val = getattr(self, 'margin_' + edge)
if val > 0:
spacing.set(w(attr), str(val))
if self.css_line_height != 'normal':
try:
css_val, css_unit = float(self.css_line_height), 'ratio'
except Exception:
css_val, css_unit = parse_css_length(self.css_line_height)
if css_unit in {'em', 'ex', '%', 'ratio'}:
mult = {'ex':0.5, '%':0.01}.get(css_unit, 1)
val = int(css_val * 240 * mult)
spacing.set(w('line'), str(val))
else:
spacing.set(w('line'), str(self.line_height))
spacing.set(w('lineRule', 'exactly'))
if spacing.attrib:
style.append(spacing)
ind = style.makeelement(w('ind'))
for edge in ('left', 'right'):
css_val, css_unit = parse_css_length(getattr(self, 'css_margin_' + edge))
if css_unit in ('em', 'ex'):
chars = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
if chars > 0:
ind.set(w(edge + 'Chars'), str(chars))
else:
val = getattr(self, 'margin_' + edge)
if val > 0:
ind.set(w(attr), str(val))
css_val, css_unit = parse_css_length(self.css_text_indent)
if css_unit in ('em', 'ex'):
chars = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
if chars > 0:
ind.set('firstLineChars', str(chars))
else:
val = self.text_indent
if val > 0:
ind.set('firstLine', str(val))
if ind.attrib:
style.append(ind)
if self.background_color:
shd = style.makeelement(w('shd'))
style.append(shd)
shd.set(w('val'), 'clear'), shd.set(w('fill'), self.background_color), shd.set(w('color'), 'auto')
pbdr = self.serialize_borders(style.makeelement(w('pBdr')))
if len(pbdr):
style.append(pbdr)
jc = style.makeelement(w('jc'))
jc.set(w('val'), self.text_align)
style.append(jc)
if self.page_break_before:
style.append(style.makeelement(w('pageBreakBefore'), **{w('val'):'on'}))
if self.keep_lines:
style.append(style.makeelement(w('keepLines'), **{w('val'):'on'}))
if self.keep_next:
style.append(style.makeelement(w('keepNext'), **{w('val'):'on'}))
return style
class LineBreak(object):

View File

@ -0,0 +1,280 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
from lxml import etree
from calibre.ebooks import parse_css_length
from calibre.ebooks.docx.names import namespaces
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
from tinycss.css21 import CSS21Parser
css_parser = CSS21Parser()
border_edges = ('left', 'top', 'right', 'bottom')
border_props = ('padding_%s', 'border_%s_width', 'border_%s_style', 'border_%s_color')
def parse_css_font_family(raw):
decl, errs = css_parser.parse_style_attr('font-family:' + raw)
if decl:
for token in decl[0].value:
if token.type in 'STRING IDENT':
val = token.value
if val == 'inherit':
break
yield val
def css_font_family_to_docx(raw):
generic = {'serif':'Cambria', 'sansserif':'Candara', 'sans-serif':'Candara', 'fantasy':'Comic Sans', 'cursive':'Segoe Script'}
for ff in parse_css_font_family(raw):
return generic.get(ff.lower(), ff)
class DOCXStyle(object):
ALL_PROPS = ()
def __init__(self):
self.update_hash()
def __hash__(self):
return self._hash
def update_hash(self):
self._hash = hash(tuple(
getattr(self, x) for x in self.ALL_PROPS))
def __eq__(self, other):
return hash(self) == hash(other)
def __ne__(self, other):
return not self == other
def __repr__(self):
return etree.tostring(self.serialize(etree.Element(self.__class__.__name__, nsmap={'w':namespaces['w']})), pretty_print=True)
__str__ = __repr__
def serialize_borders(self, bdr):
for edge in border_edges:
e = bdr.makeelement(w(edge))
padding = getattr(self, 'padding_' + edge)
if padding > 0:
e.set(w('space'), str(padding))
width = getattr(self, 'border_%s_width' % edge)
bstyle = getattr(self, 'border_%s_style' % edge)
if width > 0 and bstyle != 'none':
e.set(w('val'), bstyle)
e.set(w('sz'), str(width))
e.set(w('color'), getattr(self, 'border_%s_color' % edge))
if e.attrib:
bdr.append(e)
return bdr
LINE_STYLES = {
'none': 'none',
'hidden': 'none',
'dotted': 'dotted',
'dashed': 'dashed',
'solid': 'single',
'double': 'double',
'groove': 'threeDEngrave',
'ridge': 'threeDEmboss',
'inset': 'inset',
'outset': 'outset',
}
def w(x):
return '{%s}%s' % (namespaces['w'], x)
class TextStyle(DOCXStyle):
ALL_PROPS = ('font_family', 'font_size', 'bold', 'italic', 'color',
'background_color', 'underline', 'strike', 'dstrike', 'caps',
'shadow', 'small_caps', 'spacing', 'vertical_align') + tuple(
x%edge for edge in border_edges for x in border_props)
def __init__(self, css):
self.font_family = css_font_family_to_docx(css['font-family'])
try:
self.font_size = max(0, int(float(css['font-size']) * 2)) # stylizer normalizes all font sizes into pts
except (ValueError, TypeError, AttributeError):
self.font_size = None
fw = css['font-weight']
self.bold = fw.lower() in {'bold', 'bolder'} or int_or_zero(fw) >= 700
self.italic = css['font-style'].lower() in {'italic', 'oblique'}
self.color = convert_color(css['color'])
self.background_color = convert_color(css.backgroundColor)
td = set((css.effective_text_decoration or '').split())
self.underline = 'underline' in td
self.dstrike = 'line-through' in td and 'overline' in td
self.strike = not self.dstrike and 'line-through' in td
self.text_transform = css['text-transform'] # TODO: If lowercase or capitalize, transform the actual text
self.caps = self.text_transform == 'uppercase'
self.small_caps = css['font-variant'].lower() in {'small-caps', 'smallcaps'}
self.shadow = css['text-shadow'] not in {'none', None}
try:
self.spacing = int(float(css['letter-spacing']) * 20)
except (ValueError, TypeError, AttributeError):
self.spacing = None
self.vertical_align = css['vertical-align']
for edge in border_edges:
# In DOCX padding can only be a positive integer
setattr(self, 'padding_' + edge, max(0, int(css['padding-' + edge])))
val = min(96, max(2, int({'thin':0.2, 'medium':1, 'thick':2}.get(css['border-%s-width' % edge], 0) * 8)))
setattr(self, 'border_%s_width' % edge, val)
setattr(self, 'border_%s_color' % edge, convert_color(css['border-%s-color' % edge]))
setattr(self, 'border_%s_style' % edge, LINE_STYLES.get(css['border-%s-style' % edge].lower(), 'none'))
DOCXStyle.__init__(self)
def serialize(self, style):
style.append(style.makeelement(w('rFonts'), **{
w(k):self.font_family for k in 'ascii cs eastAsia hAnsi'.split()}))
for suffix in ('', 'Cs'):
style.append(style.makeelement(w('sz' + suffix), **{w('val'):str(self.font_size)}))
style.append(style.makeelement(w('b' + suffix), **{w('val'):('on' if self.bold else 'off')}))
style.append(style.makeelement(w('i' + suffix), **{w('val'):('on' if self.italic else 'off')}))
if self.color:
style.append(style.makeelement(w('color'), **{w('val'):str(self.color)}))
if self.background_color:
style.append(style.makeelement(w('shd'), **{w('val'):str(self.background_color)}))
if self.underline:
style.append(style.makeelement(w('u'), **{w('val'):'single'}))
if self.dstrike:
style.append(style.makeelement(w('dstrike'), **{w('val'):'on'}))
elif self.strike:
style.append(style.makeelement(w('strike'), **{w('val'):'on'}))
if self.caps:
style.append(style.makeelement(w('caps'), **{w('val'):'on'}))
if self.small_caps:
style.append(style.makeelement(w('smallCaps'), **{w('val'):'on'}))
if self.shadow:
style.append(style.makeelement(w('shadow'), **{w('val'):'on'}))
if self.spacing is not None:
style.append(style.makeelement(w('spacing'), **{w('val'):str(self.spacing)}))
if isinstance(self.vertical_align, (int, float)):
val = int(self.vertical_align * 2)
style.append(style.makeelement(w('position'), **{w('val'):str(val)}))
elif isinstance(self.vertical_align, basestring):
val = {'top':'superscript', 'text-top':'superscript', 'sup':'superscript', 'bottom':'subscript', 'text-bottom':'subscript', 'sub':'subscript'}.get(
self.vertical_align.lower())
if val:
style.append(style.makeelement(w('vertAlign'), **{w('val'):val}))
bdr = self.serialize_borders(style.makeelement(w('bdr')))
if len(bdr):
style.append(bdr)
return style
class BlockStyle(DOCXStyle):
ALL_PROPS = tuple(
'text_align page_break_before keep_lines keep_next css_text_indent text_indent line_height css_line_height background_color'.split()
+ ['margin_' + edge for edge in border_edges]
+ ['css_margin_' + edge for edge in border_edges]
+ [x%edge for edge in border_edges for x in border_props]
)
def __init__(self, css, html_block, is_first_block=False):
self.page_break_before = html_block.tag.endswith('}body') or (not is_first_block and css['page-break-before'] == 'always')
self.keep_lines = css['page-break-inside'] == 'avoid'
# TODO: Ensure that only the last docx block for this html block has the correct value for keep next
self.keep_next = css['page-break-after'] == 'avoid'
for edge in border_edges:
# In DOCX padding can only be a positive integer
setattr(self, 'padding_' + edge, max(0, int(css['padding-' + edge])))
# In DOCX margin must be a positive integer in twips (twentieth of a point)
setattr(self, 'margin_' + edge, max(0, int(css['margin-' + edge] * 20)))
setattr(self, 'css_margin_' + edge, css._style.get('margin-' + edge, ''))
val = min(96, max(2, int({'thin':0.2, 'medium':1, 'thick':2}.get(css['border-%s-width' % edge], 0) * 8)))
setattr(self, 'border_%s_width' % edge, val)
setattr(self, 'border_%s_color' % edge, convert_color(css['border-%s-color' % edge]))
setattr(self, 'border_%s_style' % edge, LINE_STYLES.get(css['border-%s-style' % edge].lower(), 'none'))
self.text_indent = max(0, int(css['text-indent'] * 20))
self.css_text_indent = css._get('text-indent')
self.line_height = max(0, int(css['line-height'] * 20))
self.css_line_height = css._get('line-height')
self.background_color = convert_color(css['background-color'])
self.text_align = {'start':'left', 'left':'left', 'end':'right', 'right':'right', 'center':'center', 'justify':'both', 'centre':'center'}.get(
css['text-align'].lower(), 'left')
DOCXStyle.__init__(self)
def serialize(self, style):
spacing = style.makeelement(w('spacing'))
for edge, attr in {'top':'before', 'bottom':'after'}.iteritems():
css_val, css_unit = parse_css_length(getattr(self, 'css_margin_' + edge))
if css_unit in ('em', 'ex'):
lines = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
if lines > 0:
spacing.set(w(attr + 'Lines'), str(lines))
else:
val = getattr(self, 'margin_' + edge)
if val > 0:
spacing.set(w(attr), str(val))
if self.css_line_height != 'normal':
try:
css_val, css_unit = float(self.css_line_height), 'ratio'
except Exception:
css_val, css_unit = parse_css_length(self.css_line_height)
if css_unit in {'em', 'ex', '%', 'ratio'}:
mult = {'ex':0.5, '%':0.01}.get(css_unit, 1)
val = int(css_val * 240 * mult)
spacing.set(w('line'), str(val))
else:
spacing.set(w('line'), str(self.line_height))
spacing.set(w('lineRule', 'exactly'))
if spacing.attrib:
style.append(spacing)
ind = style.makeelement(w('ind'))
for edge in ('left', 'right'):
css_val, css_unit = parse_css_length(getattr(self, 'css_margin_' + edge))
if css_unit in ('em', 'ex'):
chars = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
if chars > 0:
ind.set(w(edge + 'Chars'), str(chars))
else:
val = getattr(self, 'margin_' + edge)
if val > 0:
ind.set(w(attr), str(val))
css_val, css_unit = parse_css_length(self.css_text_indent)
if css_unit in ('em', 'ex'):
chars = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
if chars > 0:
ind.set('firstLineChars', str(chars))
else:
val = self.text_indent
if val > 0:
ind.set('firstLine', str(val))
if ind.attrib:
style.append(ind)
if self.background_color:
shd = style.makeelement(w('shd'))
style.append(shd)
shd.set(w('val'), 'clear'), shd.set(w('fill'), self.background_color), shd.set(w('color'), 'auto')
pbdr = self.serialize_borders(style.makeelement(w('pBdr')))
if len(pbdr):
style.append(pbdr)
jc = style.makeelement(w('jc'))
jc.set(w('val'), self.text_align)
style.append(jc)
if self.page_break_before:
style.append(style.makeelement(w('pageBreakBefore'), **{w('val'):'on'}))
if self.keep_lines:
style.append(style.makeelement(w('keepLines'), **{w('val'):'on'}))
if self.keep_next:
style.append(style.makeelement(w('keepNext'), **{w('val'):'on'}))
return style