mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Mapping from CSS to DOCX for block styles
This commit is contained in:
parent
bbbcc7774e
commit
4e8f148e59
@ -277,6 +277,17 @@ def unit_convert(value, base, font, dpi, body_font_size=12):
|
||||
result = value * body_font_size
|
||||
return result
|
||||
|
||||
def parse_css_length(value):
|
||||
try:
|
||||
m = UNIT_RE.match(value)
|
||||
except TypeError:
|
||||
return None, None
|
||||
if m is not None and m.group(1):
|
||||
value = float(m.group(1))
|
||||
unit = m.group(2)
|
||||
return value, unit.lower()
|
||||
return None, None
|
||||
|
||||
def generate_masthead(title, output_path=None, width=600, height=60):
|
||||
from calibre.ebooks.conversion.config import load_defaults
|
||||
recs = load_defaults('mobi_output')
|
||||
|
@ -11,10 +11,12 @@ import re
|
||||
from lxml import etree
|
||||
from lxml.builder import ElementMaker
|
||||
|
||||
from calibre.ebooks import parse_css_length
|
||||
from calibre.ebooks.docx.names import namespaces
|
||||
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
|
||||
from calibre.ebooks.oeb.base import XPath, barename
|
||||
from tinycss.color3 import parse_color_string
|
||||
|
||||
class Style(St):
|
||||
|
||||
@ -40,7 +42,62 @@ class Stylizer(Sz):
|
||||
except KeyError:
|
||||
return Style(element, self)
|
||||
|
||||
class TextStyle(object):
|
||||
border_edges = ('left', 'top', 'right', 'bottom')
|
||||
border_props = ('padding_%s', 'border_%s_width', 'border_%s_style', 'border_%s_color')
|
||||
|
||||
def css_color_to_rgb(value):
|
||||
if not value:
|
||||
return
|
||||
if value.lower() == 'currentcolor':
|
||||
return 'auto'
|
||||
val = parse_color_string(value)
|
||||
if val is None:
|
||||
return
|
||||
if val.alpha < 0.01:
|
||||
return
|
||||
return '%02X%02X%02X' % (int(val.red * 255), int(val.green * 255), int(val.blue * 255))
|
||||
|
||||
class DOCXStyle(object):
|
||||
|
||||
ALL_PROPS = ()
|
||||
|
||||
def __init__(self):
|
||||
self.update_hash()
|
||||
|
||||
def __hash__(self):
|
||||
return self._hash
|
||||
|
||||
def update_hash(self):
|
||||
self._hash = hash(tuple(
|
||||
getattr(self, x) for x in self.ALL_PROPS))
|
||||
|
||||
def __eq__(self, other):
|
||||
return hash(self) == hash(other)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self):
|
||||
return etree.tostring(self.serialize(etree.Element(w('style'), nsmap={'w':namespaces['w']})), pretty_print=True)
|
||||
__str__ = __repr__
|
||||
|
||||
LINE_STYLES = {
|
||||
'none': 'none',
|
||||
'hidden': 'none',
|
||||
'dotted': 'dotted',
|
||||
'dashed': 'dashed',
|
||||
'solid': 'single',
|
||||
'double': 'double',
|
||||
'groove': 'threeDEngrave',
|
||||
'ridge': 'threeDEmboss',
|
||||
'inset': 'inset',
|
||||
'outset': 'outset',
|
||||
}
|
||||
|
||||
def w(x):
|
||||
return '{%s}%s' % (namespaces['w'], x)
|
||||
|
||||
class TextStyle(DOCXStyle):
|
||||
|
||||
ALL_PROPS = ('font_family', 'font_size', 'bold', 'italic', 'color',
|
||||
'background_color', 'underline', 'strike', 'dstrike', 'caps',
|
||||
@ -72,21 +129,127 @@ class TextStyle(object):
|
||||
except (ValueError, TypeError, AttributeError):
|
||||
self.spacing = None
|
||||
self.vertical_align = {'sub':'subscript', 'super':'superscript'}.get((css['vertical-align'] or '').lower(), 'baseline')
|
||||
|
||||
# TODO: Borders and padding
|
||||
|
||||
def __hash__(self):
|
||||
return hash(tuple(
|
||||
getattr(self, x) for x in self.ALL_PROPS))
|
||||
DOCXStyle.__init__(self)
|
||||
|
||||
def __eq__(self, other):
|
||||
for x in self.ALL_PROPS:
|
||||
if getattr(self, x) != getattr(other, x, None):
|
||||
return False
|
||||
return True
|
||||
class BlockStyle(DOCXStyle):
|
||||
|
||||
ALL_PROPS = tuple(
|
||||
'text_align page_break_before keep_lines keep_next css_text_indent text_indent line_height css_line_height background_color'.split()
|
||||
+ ['margin_' + edge for edge in border_edges]
|
||||
+ ['css_margin_' + edge for edge in border_edges]
|
||||
+ [x%edge for edge in border_edges for x in border_props]
|
||||
)
|
||||
|
||||
def __init__(self, css, html_block, is_first_block=False):
|
||||
self.page_break_before = html_block.tag.endswith('}body') or (not is_first_block and css['page-break-before'] == 'always')
|
||||
self.keep_lines = css['page-break-inside'] == 'avoid'
|
||||
# TODO: Ensure that only the last docx block for this html block has the correct value for keep next
|
||||
self.keep_next = css['page-break-after'] == 'avoid'
|
||||
for edge in border_edges:
|
||||
# In DOCX padding can only be a positive integer
|
||||
setattr(self, 'padding_' + edge, max(0, int(css['padding-' + edge])))
|
||||
# In DOCX margin must be a positive integer in twips (twentieth of a point)
|
||||
setattr(self, 'margin_' + edge, max(0, int(css['margin-' + edge] * 20)))
|
||||
setattr(self, 'css_margin_' + edge, css._style.get('margin-' + edge, ''))
|
||||
val = min(96, max(2, int({'thin':0.2, 'medium':1, 'thick':2}.get(css['border-%s-width' % edge], 0) * 8)))
|
||||
setattr(self, 'border_%s_width' % edge, val)
|
||||
setattr(self, 'border_%s_color' % edge, css_color_to_rgb(css['border-%s-color' % edge]))
|
||||
setattr(self, 'border_%s_style' % edge, LINE_STYLES.get(css['border-%s-style' % edge].lower(), 'none'))
|
||||
self.text_indent = max(0, int(css['text-indent'] * 20))
|
||||
self.css_text_indent = css._get('text-indent')
|
||||
self.line_height = max(0, int(css['line-height'] * 20))
|
||||
self.css_line_height = css._get('line-height')
|
||||
self.background_color = css_color_to_rgb(css['background-color'])
|
||||
self.text_align = {'start':'left', 'left':'left', 'end':'right', 'right':'right', 'center':'center', 'justify':'both', 'centre':'center'}.get(
|
||||
css['text-align'].lower(), 'left')
|
||||
|
||||
DOCXStyle.__init__(self)
|
||||
|
||||
def serialize(self, style):
|
||||
spacing = style.makeelement(w('spacing'))
|
||||
for edge, attr in {'top':'before', 'bottom':'after'}.iteritems():
|
||||
css_val, css_unit = parse_css_length(getattr(self, 'css_margin_' + edge))
|
||||
if css_unit in ('em', 'ex'):
|
||||
lines = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
|
||||
if lines > 0:
|
||||
spacing.set(w(attr + 'Lines'), str(lines))
|
||||
else:
|
||||
val = getattr(self, 'margin_' + edge)
|
||||
if val > 0:
|
||||
spacing.set(w(attr), str(val))
|
||||
if self.css_line_height != 'normal':
|
||||
try:
|
||||
css_val, css_unit = float(self.css_line_height), 'ratio'
|
||||
except Exception:
|
||||
css_val, css_unit = parse_css_length(self.css_line_height)
|
||||
if css_unit in {'em', 'ex', '%', 'ratio'}:
|
||||
mult = {'ex':0.5, '%':0.01}.get(css_unit, 1)
|
||||
val = int(css_val * 240 * mult)
|
||||
spacing.set(w('line'), str(val))
|
||||
else:
|
||||
spacing.set(w('line'), str(self.line_height))
|
||||
spacing.set(w('lineRule', 'exactly'))
|
||||
|
||||
if spacing.attrib:
|
||||
style.append(spacing)
|
||||
|
||||
ind = style.makeelement(w('ind'))
|
||||
for edge in ('left', 'right'):
|
||||
css_val, css_unit = parse_css_length(getattr(self, 'css_margin_' + edge))
|
||||
if css_unit in ('em', 'ex'):
|
||||
chars = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
|
||||
if chars > 0:
|
||||
ind.set(w(edge + 'Chars'), str(chars))
|
||||
else:
|
||||
val = getattr(self, 'margin_' + edge)
|
||||
if val > 0:
|
||||
ind.set(w(attr), str(val))
|
||||
css_val, css_unit = parse_css_length(self.css_text_indent)
|
||||
if css_unit in ('em', 'ex'):
|
||||
chars = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
|
||||
if chars > 0:
|
||||
ind.set('firstLineChars', str(chars))
|
||||
else:
|
||||
val = self.text_indent
|
||||
if val > 0:
|
||||
ind.set('firstLine', str(val))
|
||||
if ind.attrib:
|
||||
style.append(ind)
|
||||
|
||||
if self.background_color:
|
||||
shd = style.makeelement(w('shd'))
|
||||
style.append(shd)
|
||||
shd.set(w('val'), 'clear'), shd.set(w('fill'), self.background_color), shd.set(w('color'), 'auto')
|
||||
|
||||
pbdr = style.makeelement(w('pBdr'))
|
||||
for edge in border_edges:
|
||||
e = pbdr.makeelement(w(edge))
|
||||
padding = getattr(self, 'padding_' + edge)
|
||||
if padding > 0:
|
||||
e.set(w('space'), str(padding))
|
||||
width = getattr(self, 'border_%s_width' % edge)
|
||||
bstyle = getattr(self, 'border_%s_style' % edge)
|
||||
if width > 0 and bstyle != 'none':
|
||||
e.set(w('val'), bstyle)
|
||||
e.set(w('sz'), str(width))
|
||||
e.set(w('color'), getattr(self, 'border_%s_color' % edge))
|
||||
if e.attrib:
|
||||
pbdr.append(e)
|
||||
if len(pbdr):
|
||||
style.append(pbdr)
|
||||
jc = style.makeelement(w('jc'))
|
||||
jc.set(w('val'), self.text_align)
|
||||
style.append(jc)
|
||||
if self.page_break_before:
|
||||
style.append(style.makeelement(w('pageBreakBefore'), **{w('val'):'on'}))
|
||||
if self.keep_lines:
|
||||
style.append(style.makeelement(w('keepLines'), **{w('val'):'on'}))
|
||||
if self.keep_next:
|
||||
style.append(style.makeelement(w('keepNext'), **{w('val'):'on'}))
|
||||
return style
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
class LineBreak(object):
|
||||
|
||||
@ -97,7 +260,8 @@ class TextRun(object):
|
||||
|
||||
ws_pat = None
|
||||
|
||||
def __init__(self, style):
|
||||
def __init__(self, style, first_html_parent):
|
||||
self.first_html_parent = first_html_parent
|
||||
if self.ws_pat is None:
|
||||
TextRun.ws_pat = self.ws_pat = re.compile(r'\s+')
|
||||
self.style = style
|
||||
@ -125,20 +289,20 @@ class TextRun(object):
|
||||
if preserve_whitespace:
|
||||
t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
|
||||
|
||||
style_cache = {}
|
||||
|
||||
class Block(object):
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, html_block, style, is_first_block=False):
|
||||
self.html_block = html_block
|
||||
self.style = BlockStyle(style, html_block, is_first_block=is_first_block)
|
||||
self.runs = []
|
||||
|
||||
def add_text(self, text, style, ignore_leading_whitespace=False):
|
||||
def add_text(self, text, style, ignore_leading_whitespace=False, html_parent=None):
|
||||
ts = TextStyle(style)
|
||||
ws = style['white-space']
|
||||
if self.runs and ts == self.runs[-1].style:
|
||||
run = self.runs[-1]
|
||||
else:
|
||||
run = TextRun(ts)
|
||||
run = TextRun(ts, html_parent or self.html_block)
|
||||
self.runs.append(run)
|
||||
preserve_whitespace = ws in {'pre', 'pre-wrap'}
|
||||
if ignore_leading_whitespace and not preserve_whitespace:
|
||||
@ -176,9 +340,11 @@ class Convert(object):
|
||||
def process_item(self, item):
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.opts.output_profile)
|
||||
|
||||
is_first_block = True
|
||||
for body in XPath('//h:body')(item.data):
|
||||
b = Block()
|
||||
b = Block(body, stylizer.style(body), is_first_block=is_first_block)
|
||||
self.blocks.append(b)
|
||||
is_first_block = False
|
||||
self.process_block(body, b, stylizer, ignore_tail=True)
|
||||
|
||||
def process_block(self, html_block, docx_block, stylizer, ignore_tail=False):
|
||||
@ -192,7 +358,7 @@ class Convert(object):
|
||||
if tag == 'img':
|
||||
return # TODO: Handle images
|
||||
if display == 'block':
|
||||
b = Block()
|
||||
b = Block(child, style)
|
||||
self.blocks.append(b)
|
||||
self.process_block(child, b, stylizer)
|
||||
else:
|
||||
@ -201,7 +367,7 @@ class Convert(object):
|
||||
if ignore_tail is False and html_block.tail and html_block.tail.strip():
|
||||
b = docx_block
|
||||
if b is not self.blocks[-1]:
|
||||
b = Block()
|
||||
b = Block(html_block, stylizer.style(html_block))
|
||||
self.blocks.append(b)
|
||||
b.add_text(html_block.tail, stylizer.style(html_block.getparent()))
|
||||
|
||||
@ -211,19 +377,19 @@ class Convert(object):
|
||||
return # TODO: Handle images
|
||||
style = stylizer.style(html_child)
|
||||
if html_child.text:
|
||||
docx_block.add_text(html_child.text, style)
|
||||
docx_block.add_text(html_child.text, style, html_parent=html_child)
|
||||
for child in html_child.iterchildren(etree.Element):
|
||||
style = stylizer.style(child)
|
||||
display = style.get('display', 'inline')
|
||||
if display == 'block':
|
||||
b = Block()
|
||||
b = Block(child, style)
|
||||
self.blocks.append(b)
|
||||
self.process_block(child, b, stylizer)
|
||||
else:
|
||||
self.process_inline(child, self.blocks[-1], stylizer)
|
||||
|
||||
if html_child.tail:
|
||||
docx_block.add_text(html_child.tail, stylizer.style(html_child.getparent()))
|
||||
self.blocks[-1].add_text(html_child.tail, stylizer.style(html_child.getparent()), html_parent=html_child.getparent())
|
||||
|
||||
def write(self):
|
||||
dn = {k:v for k, v in namespaces.iteritems() if k in {'w', 'r', 'm', 've', 'o', 'wp', 'w10', 'wne'}}
|
||||
@ -240,11 +406,15 @@ class Convert(object):
|
||||
E.docDefaults(
|
||||
E.rPrDefault(
|
||||
E.rPr(
|
||||
E.rFonts(),
|
||||
E.rFonts(**{w('asciiTheme'):"minorHAnsi", w('eastAsiaTheme'):"minorEastAsia", w('hAnsiTheme'):"minorHAnsi", w('cstheme'):"minorBidi"}),
|
||||
E.sz(**{w('val'):'22'}),
|
||||
E.szCs(**{w('val'):'22'}),
|
||||
E.lang(**{w('val'):'en-US', w('eastAsia'):"en-US", w('bidi'):"ar-SA"})
|
||||
)
|
||||
),
|
||||
E.pPrDefault(
|
||||
E.pPr(
|
||||
E.spacing(**{w('after'):"0", w('line'):"276", w('lineRule'):"auto"})
|
||||
)
|
||||
)
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user