DOCX Input: Add support for framed text created by setting the same border style on neighboring paragraphs. Fixes #1526663 [DOCX Conversion- Border to a bunch of paras](https://bugs.launchpad.net/calibre/+bug/1526663)

This commit is contained in:
Kovid Goyal 2016-01-27 18:29:16 +05:30
parent 5a500f7e1c
commit 1127fc805c
4 changed files with 126 additions and 22 deletions

View File

@ -74,6 +74,7 @@ LINE_STYLES = { # {{{
# Read from XML {{{ # Read from XML {{{
border_props = ('padding_%s', 'border_%s_width', 'border_%s_style', 'border_%s_color') border_props = ('padding_%s', 'border_%s_width', 'border_%s_style', 'border_%s_color')
border_edges = ('left', 'top', 'right', 'bottom', 'between')
def read_single_border(parent, edge, XPath, get): def read_single_border(parent, edge, XPath, get):
color = style = width = padding = None color = style = width = padding = None
@ -94,15 +95,12 @@ def read_single_border(parent, edge, XPath, get):
if sz is not None: if sz is not None:
# we dont care about art borders (they are only used for page borders) # we dont care about art borders (they are only used for page borders)
try: try:
# WebKit needs at least 1pt to render borders width = min(96, max(2, float(sz))) / 8
width = min(96, max(8, float(sz))) / 8
except (ValueError, TypeError): except (ValueError, TypeError):
pass pass
if style == 'double' and width is not None and 0 < width < 3:
width = 3 # WebKit needs 3pts to render double borders
return {p:v for p, v in zip(border_props, (padding, width, style, color))} return {p:v for p, v in zip(border_props, (padding, width, style, color))}
def read_border(parent, dest, XPath, get, border_edges=('left', 'top', 'right', 'bottom'), name='pBdr'): def read_border(parent, dest, XPath, get, border_edges=border_edges, name='pBdr'):
vals = {k % edge:inherit for edge in border_edges for k in border_props} vals = {k % edge:inherit for edge in border_edges for k in border_props}
for border in XPath('./w:' + name)(parent): for border in XPath('./w:' + name)(parent):
@ -114,6 +112,22 @@ def read_border(parent, dest, XPath, get, border_edges=('left', 'top', 'right',
for key, val in vals.iteritems(): for key, val in vals.iteritems():
setattr(dest, key, val) setattr(dest, key, val)
def border_to_css(edge, style, css):
bs = getattr(style, 'border_%s_style' % edge)
bc = getattr(style, 'border_%s_color' % edge)
bw = getattr(style, 'border_%s_width' % edge)
if isinstance(bw, (float, int, long)):
# WebKit needs at least 1pt to render borders and 3pt to render double borders
bw = max(bw, (3 if bs == 'double' else 1))
if bs is not inherit and bs is not None:
css['border-%s-style' % edge] = bs
if bc is not inherit and bc is not None:
css['border-%s-color' % edge] = bc
if bw is not inherit and bw is not None:
if isinstance(bw, (int, float, long)):
bw = '%.3gpt' % bw
css['border-%s-width' % edge] = bw
def read_indent(parent, dest, XPath, get): def read_indent(parent, dest, XPath, get):
padding_left = padding_right = text_indent = inherit padding_left = padding_right = text_indent = inherit
for indent in XPath('./w:ind')(parent): for indent in XPath('./w:ind')(parent):
@ -304,6 +318,7 @@ class ParagraphStyle(object):
'border_top_width', 'border_top_style', 'border_top_color', 'padding_top', 'border_top_width', 'border_top_style', 'border_top_color', 'padding_top',
'border_right_width', 'border_right_style', 'border_right_color', 'padding_right', 'border_right_width', 'border_right_style', 'border_right_color', 'padding_right',
'border_bottom_width', 'border_bottom_style', 'border_bottom_color', 'padding_bottom', 'border_bottom_width', 'border_bottom_style', 'border_bottom_color', 'padding_bottom',
'border_between_width', 'border_between_style', 'border_between_color', 'padding_between',
'margin_left', 'margin_top', 'margin_right', 'margin_bottom', 'margin_left', 'margin_top', 'margin_right', 'margin_bottom',
# Misc. # Misc.
@ -336,6 +351,7 @@ class ParagraphStyle(object):
self.font_family = self.font_size = self.color = inherit self.font_family = self.font_size = self.color = inherit
self._css = None self._css = None
self._border_key = None
def update(self, other): def update(self, other):
for prop in self.all_properties: for prop in self.all_properties:
@ -362,13 +378,7 @@ class ParagraphStyle(object):
if self.keepNext is True: if self.keepNext is True:
c['page-break-after'] = 'avoid' c['page-break-after'] = 'avoid'
for edge in ('left', 'top', 'right', 'bottom'): for edge in ('left', 'top', 'right', 'bottom'):
val = getattr(self, 'border_%s_width' % edge) border_to_css(edge, self, c)
if val is not inherit:
c['border-left-width'] = '%.3gpt' % val
for x in ('style', 'color'):
val = getattr(self, 'border_%s_%s' % (edge, x))
if val is not inherit:
c['border-%s-%s' % (edge, x)] = val
val = getattr(self, 'padding_%s' % edge) val = getattr(self, 'padding_%s' % edge)
if val is not inherit: if val is not inherit:
c['padding-%s' % edge] = '%.3gpt' % val c['padding-%s' % edge] = '%.3gpt' % val
@ -388,3 +398,40 @@ class ParagraphStyle(object):
return self._css return self._css
@property
def border_key(self):
if self._border_key is None:
k = []
for edge in border_edges:
for prop in border_props:
prop = prop % edge
k.append(getattr(self, prop))
self._border_key = tuple(k)
return self._border_key
def has_identical_borders(self, other_style):
return self.border_key == getattr(other_style, 'border_key', None)
def clear_borders(self):
for edge in border_edges[:-1]:
for prop in ('width', 'color', 'style'):
setattr(self, 'border_%s_%s' % (edge, prop), inherit)
def clone_border_styles(self):
style = ParagraphStyle(self.namespace)
for edge in border_edges[:-1]:
for prop in ('width', 'color', 'style'):
attr = 'border_%s_%s' % (edge, prop)
setattr(style, attr, getattr(self, attr))
return style
def apply_between_border(self):
for prop in ('width', 'color', 'style'):
setattr(self, 'border_bottom_%s' % prop, getattr(self, 'border_between_%s' % prop))
def has_visible_border(self):
for edge in border_edges[:-1]:
bw, bs = getattr(self, 'border_%s_width' % edge), getattr(self, 'border_%s_style' % edge)
if bw is not inherit and bw and bs is not inherit and bs != 'none':
return True
return False

View File

@ -439,7 +439,7 @@ class Styles(object):
body { font-family: %s; font-size: %s; color: %s } body { font-family: %s; font-size: %s; color: %s }
/* In word all paragraphs have zero margins unless explicitly specified in a style */ /* In word all paragraphs have zero margins unless explicitly specified in a style */
p, h1, h2, h3, h4, h5, h6 { margin: 0; padding: 0 } p, h1, h2, h3, h4, h5, h6, div { margin: 0; padding: 0 }
/* In word headings only have bold font if explicitly specified */ /* In word headings only have bold font if explicitly specified */
h1, h2, h3, h4, h5, h6 { font-weight: normal } h1, h2, h3, h4, h5, h6 { font-weight: normal }
/* Setting padding-left to zero breaks rendering of lists, so we only set the other values to zero and leave padding-left for the user-agent */ /* Setting padding-left to zero breaks rendering of lists, so we only set the other values to zero and leave padding-left for the user-agent */

View File

@ -8,7 +8,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
from lxml.html.builder import TABLE, TR, TD from lxml.html.builder import TABLE, TR, TD
from calibre.ebooks.docx.block_styles import inherit, read_shd as rs, read_border, binary_property, border_props, ParagraphStyle from calibre.ebooks.docx.block_styles import inherit, read_shd as rs, read_border, binary_property, border_props, ParagraphStyle, border_to_css
from calibre.ebooks.docx.char_styles import RunStyle from calibre.ebooks.docx.char_styles import RunStyle
# Read from XML {{{ # Read from XML {{{
@ -177,14 +177,10 @@ class Style(object):
def convert_border(self): def convert_border(self):
c = {} c = {}
for x in edges: for x in edges:
for prop in border_props: border_to_css(x, self, c)
prop = prop % x val = getattr(self, 'padding_%s' % x)
if prop.startswith('border'): if val is not inherit:
val = getattr(self, prop) c['padding-%s' % x] = '%.3gpt' % val
if val is not inherit:
if isinstance(val, (int, float)):
val = '%.3gpt' % val
c[prop.replace('_', '-')] = val
return c return c
class RowStyle(Style): class RowStyle(Style):

View File

@ -100,10 +100,12 @@ class Convert(object):
self.images(relationships_by_id) self.images(relationships_by_id)
self.layers = OrderedDict() self.layers = OrderedDict()
self.framed = [[]] self.framed = [[]]
self.frame_map = {}
self.framed_map = {} self.framed_map = {}
self.anchor_map = {} self.anchor_map = {}
self.link_map = defaultdict(list) self.link_map = defaultdict(list)
self.link_source_map = {} self.link_source_map = {}
self.block_runs = []
paras = [] paras = []
self.log.debug('Converting Word markup to HTML') self.log.debug('Converting Word markup to HTML')
@ -119,6 +121,7 @@ class Convert(object):
self.read_block_anchors(doc) self.read_block_anchors(doc)
self.styles.apply_contextual_spacing(paras) self.styles.apply_contextual_spacing(paras)
self.mark_block_runs(paras)
# Apply page breaks at the start of every section, except the first # Apply page breaks at the start of every section, except the first
# section (since that will be the start of the file) # section (since that will be the start of the file)
self.styles.apply_section_page_breaks(self.section_starts[1:]) self.styles.apply_section_page_breaks(self.section_starts[1:])
@ -147,6 +150,7 @@ class Convert(object):
dl[-1].append(p) dl[-1].append(p)
paras.append(wp) paras.append(wp)
self.styles.apply_contextual_spacing(paras) self.styles.apply_contextual_spacing(paras)
self.mark_block_runs(paras)
for p, wp in self.object_map.iteritems(): for p, wp in self.object_map.iteritems():
if len(p) > 0 and not p.text and len(p[0]) > 0 and not p[0].text and p[0][0].get('class', None) == 'tab': if len(p) > 0 and not p.text and len(p[0]) > 0 and not p[0].text and p[0][0].get('class', None) == 'tab':
@ -391,6 +395,7 @@ class Convert(object):
self.object_map[dest] = p self.object_map[dest] = p
style = self.styles.resolve_paragraph(p) style = self.styles.resolve_paragraph(p)
self.layers[p] = [] self.layers[p] = []
self.frame_map[p] = style.frame
self.add_frame(dest, style.frame) self.add_frame(dest, style.frame)
current_anchor = None current_anchor = None
@ -680,6 +685,62 @@ class Convert(object):
self.framed_map[frame] = css = style.css(self.page_map[self.object_map[paras[0]]]) self.framed_map[frame] = css = style.css(self.page_map[self.object_map[paras[0]]])
self.styles.register(css, 'frame') self.styles.register(css, 'frame')
if not self.block_runs:
return
rmap = {v:k for k, v in self.object_map.iteritems()}
for border_style, blocks in self.block_runs:
paras = tuple(rmap[p] for p in blocks)
parent = paras[0].getparent()
idx = parent.index(paras[0])
frame = DIV(*paras)
parent.insert(idx, frame)
self.framed_map[frame] = css = border_style.css
self.styles.register(css, 'frame')
def mark_block_runs(self, paras):
def process_run(run):
max_left = max_right = 0
has_visible_border = None
for p in run:
style = self.styles.resolve_paragraph(p)
if has_visible_border is None:
has_visible_border = style.has_visible_border()
max_left, max_right = max(style.margin_left, max_left), max(style.margin_right, max_right)
if has_visible_border:
style.margin_left = style.margin_right = inherit
if p is not run[0]:
style.padding_top = 0
else:
border_style = style.clone_border_styles()
if has_visible_border:
border_style.margin_top, style.margin_top = style.margin_top, inherit
if p is not run[-1]:
style.padding_bottom = 0
else:
if has_visible_border:
border_style.margin_bottom, style.margin_bottom = style.margin_bottom, inherit
style.clear_borders()
if p is not run[-1]:
style.apply_between_border()
if has_visible_border:
border_style.margin_left, border_style.margin_right = max_left,max_right
self.block_runs.append((border_style, run))
run = []
for p in paras:
if run and self.frame_map.get(p) == self.frame_map.get(run[-1]):
style = self.styles.resolve_paragraph(p)
last_style = self.styles.resolve_paragraph(run[-1])
if style.has_identical_borders(last_style):
run.append(p)
continue
if len(run) > 1:
process_run(run)
run = [p]
if len(run) > 1:
process_run(run)
if __name__ == '__main__': if __name__ == '__main__':
import shutil import shutil
from calibre.utils.logging import default_log from calibre.utils.logging import default_log