mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DOCX Input: Add support for framed text created by setting the same border style on neighboring paragraphs. Fixes #1526663 [DOCX Conversion- Border to a bunch of paras](https://bugs.launchpad.net/calibre/+bug/1526663)
This commit is contained in:
parent
5a500f7e1c
commit
1127fc805c
@ -74,6 +74,7 @@ LINE_STYLES = { # {{{
|
|||||||
# Read from XML {{{
|
# Read from XML {{{
|
||||||
|
|
||||||
border_props = ('padding_%s', 'border_%s_width', 'border_%s_style', 'border_%s_color')
|
border_props = ('padding_%s', 'border_%s_width', 'border_%s_style', 'border_%s_color')
|
||||||
|
border_edges = ('left', 'top', 'right', 'bottom', 'between')
|
||||||
|
|
||||||
def read_single_border(parent, edge, XPath, get):
|
def read_single_border(parent, edge, XPath, get):
|
||||||
color = style = width = padding = None
|
color = style = width = padding = None
|
||||||
@ -94,15 +95,12 @@ def read_single_border(parent, edge, XPath, get):
|
|||||||
if sz is not None:
|
if sz is not None:
|
||||||
# we dont care about art borders (they are only used for page borders)
|
# we dont care about art borders (they are only used for page borders)
|
||||||
try:
|
try:
|
||||||
# WebKit needs at least 1pt to render borders
|
width = min(96, max(2, float(sz))) / 8
|
||||||
width = min(96, max(8, float(sz))) / 8
|
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
pass
|
pass
|
||||||
if style == 'double' and width is not None and 0 < width < 3:
|
|
||||||
width = 3 # WebKit needs 3pts to render double borders
|
|
||||||
return {p:v for p, v in zip(border_props, (padding, width, style, color))}
|
return {p:v for p, v in zip(border_props, (padding, width, style, color))}
|
||||||
|
|
||||||
def read_border(parent, dest, XPath, get, border_edges=('left', 'top', 'right', 'bottom'), name='pBdr'):
|
def read_border(parent, dest, XPath, get, border_edges=border_edges, name='pBdr'):
|
||||||
vals = {k % edge:inherit for edge in border_edges for k in border_props}
|
vals = {k % edge:inherit for edge in border_edges for k in border_props}
|
||||||
|
|
||||||
for border in XPath('./w:' + name)(parent):
|
for border in XPath('./w:' + name)(parent):
|
||||||
@ -114,6 +112,22 @@ def read_border(parent, dest, XPath, get, border_edges=('left', 'top', 'right',
|
|||||||
for key, val in vals.iteritems():
|
for key, val in vals.iteritems():
|
||||||
setattr(dest, key, val)
|
setattr(dest, key, val)
|
||||||
|
|
||||||
|
def border_to_css(edge, style, css):
|
||||||
|
bs = getattr(style, 'border_%s_style' % edge)
|
||||||
|
bc = getattr(style, 'border_%s_color' % edge)
|
||||||
|
bw = getattr(style, 'border_%s_width' % edge)
|
||||||
|
if isinstance(bw, (float, int, long)):
|
||||||
|
# WebKit needs at least 1pt to render borders and 3pt to render double borders
|
||||||
|
bw = max(bw, (3 if bs == 'double' else 1))
|
||||||
|
if bs is not inherit and bs is not None:
|
||||||
|
css['border-%s-style' % edge] = bs
|
||||||
|
if bc is not inherit and bc is not None:
|
||||||
|
css['border-%s-color' % edge] = bc
|
||||||
|
if bw is not inherit and bw is not None:
|
||||||
|
if isinstance(bw, (int, float, long)):
|
||||||
|
bw = '%.3gpt' % bw
|
||||||
|
css['border-%s-width' % edge] = bw
|
||||||
|
|
||||||
def read_indent(parent, dest, XPath, get):
|
def read_indent(parent, dest, XPath, get):
|
||||||
padding_left = padding_right = text_indent = inherit
|
padding_left = padding_right = text_indent = inherit
|
||||||
for indent in XPath('./w:ind')(parent):
|
for indent in XPath('./w:ind')(parent):
|
||||||
@ -304,6 +318,7 @@ class ParagraphStyle(object):
|
|||||||
'border_top_width', 'border_top_style', 'border_top_color', 'padding_top',
|
'border_top_width', 'border_top_style', 'border_top_color', 'padding_top',
|
||||||
'border_right_width', 'border_right_style', 'border_right_color', 'padding_right',
|
'border_right_width', 'border_right_style', 'border_right_color', 'padding_right',
|
||||||
'border_bottom_width', 'border_bottom_style', 'border_bottom_color', 'padding_bottom',
|
'border_bottom_width', 'border_bottom_style', 'border_bottom_color', 'padding_bottom',
|
||||||
|
'border_between_width', 'border_between_style', 'border_between_color', 'padding_between',
|
||||||
'margin_left', 'margin_top', 'margin_right', 'margin_bottom',
|
'margin_left', 'margin_top', 'margin_right', 'margin_bottom',
|
||||||
|
|
||||||
# Misc.
|
# Misc.
|
||||||
@ -336,6 +351,7 @@ class ParagraphStyle(object):
|
|||||||
self.font_family = self.font_size = self.color = inherit
|
self.font_family = self.font_size = self.color = inherit
|
||||||
|
|
||||||
self._css = None
|
self._css = None
|
||||||
|
self._border_key = None
|
||||||
|
|
||||||
def update(self, other):
|
def update(self, other):
|
||||||
for prop in self.all_properties:
|
for prop in self.all_properties:
|
||||||
@ -362,13 +378,7 @@ class ParagraphStyle(object):
|
|||||||
if self.keepNext is True:
|
if self.keepNext is True:
|
||||||
c['page-break-after'] = 'avoid'
|
c['page-break-after'] = 'avoid'
|
||||||
for edge in ('left', 'top', 'right', 'bottom'):
|
for edge in ('left', 'top', 'right', 'bottom'):
|
||||||
val = getattr(self, 'border_%s_width' % edge)
|
border_to_css(edge, self, c)
|
||||||
if val is not inherit:
|
|
||||||
c['border-left-width'] = '%.3gpt' % val
|
|
||||||
for x in ('style', 'color'):
|
|
||||||
val = getattr(self, 'border_%s_%s' % (edge, x))
|
|
||||||
if val is not inherit:
|
|
||||||
c['border-%s-%s' % (edge, x)] = val
|
|
||||||
val = getattr(self, 'padding_%s' % edge)
|
val = getattr(self, 'padding_%s' % edge)
|
||||||
if val is not inherit:
|
if val is not inherit:
|
||||||
c['padding-%s' % edge] = '%.3gpt' % val
|
c['padding-%s' % edge] = '%.3gpt' % val
|
||||||
@ -388,3 +398,40 @@ class ParagraphStyle(object):
|
|||||||
|
|
||||||
return self._css
|
return self._css
|
||||||
|
|
||||||
|
@property
|
||||||
|
def border_key(self):
|
||||||
|
if self._border_key is None:
|
||||||
|
k = []
|
||||||
|
for edge in border_edges:
|
||||||
|
for prop in border_props:
|
||||||
|
prop = prop % edge
|
||||||
|
k.append(getattr(self, prop))
|
||||||
|
self._border_key = tuple(k)
|
||||||
|
return self._border_key
|
||||||
|
|
||||||
|
def has_identical_borders(self, other_style):
|
||||||
|
return self.border_key == getattr(other_style, 'border_key', None)
|
||||||
|
|
||||||
|
def clear_borders(self):
|
||||||
|
for edge in border_edges[:-1]:
|
||||||
|
for prop in ('width', 'color', 'style'):
|
||||||
|
setattr(self, 'border_%s_%s' % (edge, prop), inherit)
|
||||||
|
|
||||||
|
def clone_border_styles(self):
|
||||||
|
style = ParagraphStyle(self.namespace)
|
||||||
|
for edge in border_edges[:-1]:
|
||||||
|
for prop in ('width', 'color', 'style'):
|
||||||
|
attr = 'border_%s_%s' % (edge, prop)
|
||||||
|
setattr(style, attr, getattr(self, attr))
|
||||||
|
return style
|
||||||
|
|
||||||
|
def apply_between_border(self):
|
||||||
|
for prop in ('width', 'color', 'style'):
|
||||||
|
setattr(self, 'border_bottom_%s' % prop, getattr(self, 'border_between_%s' % prop))
|
||||||
|
|
||||||
|
def has_visible_border(self):
|
||||||
|
for edge in border_edges[:-1]:
|
||||||
|
bw, bs = getattr(self, 'border_%s_width' % edge), getattr(self, 'border_%s_style' % edge)
|
||||||
|
if bw is not inherit and bw and bs is not inherit and bs != 'none':
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
@ -439,7 +439,7 @@ class Styles(object):
|
|||||||
body { font-family: %s; font-size: %s; color: %s }
|
body { font-family: %s; font-size: %s; color: %s }
|
||||||
|
|
||||||
/* In word all paragraphs have zero margins unless explicitly specified in a style */
|
/* In word all paragraphs have zero margins unless explicitly specified in a style */
|
||||||
p, h1, h2, h3, h4, h5, h6 { margin: 0; padding: 0 }
|
p, h1, h2, h3, h4, h5, h6, div { margin: 0; padding: 0 }
|
||||||
/* In word headings only have bold font if explicitly specified */
|
/* In word headings only have bold font if explicitly specified */
|
||||||
h1, h2, h3, h4, h5, h6 { font-weight: normal }
|
h1, h2, h3, h4, h5, h6 { font-weight: normal }
|
||||||
/* Setting padding-left to zero breaks rendering of lists, so we only set the other values to zero and leave padding-left for the user-agent */
|
/* Setting padding-left to zero breaks rendering of lists, so we only set the other values to zero and leave padding-left for the user-agent */
|
||||||
|
@ -8,7 +8,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
|
|
||||||
from lxml.html.builder import TABLE, TR, TD
|
from lxml.html.builder import TABLE, TR, TD
|
||||||
|
|
||||||
from calibre.ebooks.docx.block_styles import inherit, read_shd as rs, read_border, binary_property, border_props, ParagraphStyle
|
from calibre.ebooks.docx.block_styles import inherit, read_shd as rs, read_border, binary_property, border_props, ParagraphStyle, border_to_css
|
||||||
from calibre.ebooks.docx.char_styles import RunStyle
|
from calibre.ebooks.docx.char_styles import RunStyle
|
||||||
|
|
||||||
# Read from XML {{{
|
# Read from XML {{{
|
||||||
@ -177,14 +177,10 @@ class Style(object):
|
|||||||
def convert_border(self):
|
def convert_border(self):
|
||||||
c = {}
|
c = {}
|
||||||
for x in edges:
|
for x in edges:
|
||||||
for prop in border_props:
|
border_to_css(x, self, c)
|
||||||
prop = prop % x
|
val = getattr(self, 'padding_%s' % x)
|
||||||
if prop.startswith('border'):
|
|
||||||
val = getattr(self, prop)
|
|
||||||
if val is not inherit:
|
if val is not inherit:
|
||||||
if isinstance(val, (int, float)):
|
c['padding-%s' % x] = '%.3gpt' % val
|
||||||
val = '%.3gpt' % val
|
|
||||||
c[prop.replace('_', '-')] = val
|
|
||||||
return c
|
return c
|
||||||
|
|
||||||
class RowStyle(Style):
|
class RowStyle(Style):
|
||||||
|
@ -100,10 +100,12 @@ class Convert(object):
|
|||||||
self.images(relationships_by_id)
|
self.images(relationships_by_id)
|
||||||
self.layers = OrderedDict()
|
self.layers = OrderedDict()
|
||||||
self.framed = [[]]
|
self.framed = [[]]
|
||||||
|
self.frame_map = {}
|
||||||
self.framed_map = {}
|
self.framed_map = {}
|
||||||
self.anchor_map = {}
|
self.anchor_map = {}
|
||||||
self.link_map = defaultdict(list)
|
self.link_map = defaultdict(list)
|
||||||
self.link_source_map = {}
|
self.link_source_map = {}
|
||||||
|
self.block_runs = []
|
||||||
paras = []
|
paras = []
|
||||||
|
|
||||||
self.log.debug('Converting Word markup to HTML')
|
self.log.debug('Converting Word markup to HTML')
|
||||||
@ -119,6 +121,7 @@ class Convert(object):
|
|||||||
|
|
||||||
self.read_block_anchors(doc)
|
self.read_block_anchors(doc)
|
||||||
self.styles.apply_contextual_spacing(paras)
|
self.styles.apply_contextual_spacing(paras)
|
||||||
|
self.mark_block_runs(paras)
|
||||||
# Apply page breaks at the start of every section, except the first
|
# Apply page breaks at the start of every section, except the first
|
||||||
# section (since that will be the start of the file)
|
# section (since that will be the start of the file)
|
||||||
self.styles.apply_section_page_breaks(self.section_starts[1:])
|
self.styles.apply_section_page_breaks(self.section_starts[1:])
|
||||||
@ -147,6 +150,7 @@ class Convert(object):
|
|||||||
dl[-1].append(p)
|
dl[-1].append(p)
|
||||||
paras.append(wp)
|
paras.append(wp)
|
||||||
self.styles.apply_contextual_spacing(paras)
|
self.styles.apply_contextual_spacing(paras)
|
||||||
|
self.mark_block_runs(paras)
|
||||||
|
|
||||||
for p, wp in self.object_map.iteritems():
|
for p, wp in self.object_map.iteritems():
|
||||||
if len(p) > 0 and not p.text and len(p[0]) > 0 and not p[0].text and p[0][0].get('class', None) == 'tab':
|
if len(p) > 0 and not p.text and len(p[0]) > 0 and not p[0].text and p[0][0].get('class', None) == 'tab':
|
||||||
@ -391,6 +395,7 @@ class Convert(object):
|
|||||||
self.object_map[dest] = p
|
self.object_map[dest] = p
|
||||||
style = self.styles.resolve_paragraph(p)
|
style = self.styles.resolve_paragraph(p)
|
||||||
self.layers[p] = []
|
self.layers[p] = []
|
||||||
|
self.frame_map[p] = style.frame
|
||||||
self.add_frame(dest, style.frame)
|
self.add_frame(dest, style.frame)
|
||||||
|
|
||||||
current_anchor = None
|
current_anchor = None
|
||||||
@ -680,6 +685,62 @@ class Convert(object):
|
|||||||
self.framed_map[frame] = css = style.css(self.page_map[self.object_map[paras[0]]])
|
self.framed_map[frame] = css = style.css(self.page_map[self.object_map[paras[0]]])
|
||||||
self.styles.register(css, 'frame')
|
self.styles.register(css, 'frame')
|
||||||
|
|
||||||
|
if not self.block_runs:
|
||||||
|
return
|
||||||
|
rmap = {v:k for k, v in self.object_map.iteritems()}
|
||||||
|
for border_style, blocks in self.block_runs:
|
||||||
|
paras = tuple(rmap[p] for p in blocks)
|
||||||
|
parent = paras[0].getparent()
|
||||||
|
idx = parent.index(paras[0])
|
||||||
|
frame = DIV(*paras)
|
||||||
|
parent.insert(idx, frame)
|
||||||
|
self.framed_map[frame] = css = border_style.css
|
||||||
|
self.styles.register(css, 'frame')
|
||||||
|
|
||||||
|
def mark_block_runs(self, paras):
|
||||||
|
|
||||||
|
def process_run(run):
|
||||||
|
max_left = max_right = 0
|
||||||
|
has_visible_border = None
|
||||||
|
for p in run:
|
||||||
|
style = self.styles.resolve_paragraph(p)
|
||||||
|
if has_visible_border is None:
|
||||||
|
has_visible_border = style.has_visible_border()
|
||||||
|
max_left, max_right = max(style.margin_left, max_left), max(style.margin_right, max_right)
|
||||||
|
if has_visible_border:
|
||||||
|
style.margin_left = style.margin_right = inherit
|
||||||
|
if p is not run[0]:
|
||||||
|
style.padding_top = 0
|
||||||
|
else:
|
||||||
|
border_style = style.clone_border_styles()
|
||||||
|
if has_visible_border:
|
||||||
|
border_style.margin_top, style.margin_top = style.margin_top, inherit
|
||||||
|
if p is not run[-1]:
|
||||||
|
style.padding_bottom = 0
|
||||||
|
else:
|
||||||
|
if has_visible_border:
|
||||||
|
border_style.margin_bottom, style.margin_bottom = style.margin_bottom, inherit
|
||||||
|
style.clear_borders()
|
||||||
|
if p is not run[-1]:
|
||||||
|
style.apply_between_border()
|
||||||
|
if has_visible_border:
|
||||||
|
border_style.margin_left, border_style.margin_right = max_left,max_right
|
||||||
|
self.block_runs.append((border_style, run))
|
||||||
|
|
||||||
|
run = []
|
||||||
|
for p in paras:
|
||||||
|
if run and self.frame_map.get(p) == self.frame_map.get(run[-1]):
|
||||||
|
style = self.styles.resolve_paragraph(p)
|
||||||
|
last_style = self.styles.resolve_paragraph(run[-1])
|
||||||
|
if style.has_identical_borders(last_style):
|
||||||
|
run.append(p)
|
||||||
|
continue
|
||||||
|
if len(run) > 1:
|
||||||
|
process_run(run)
|
||||||
|
run = [p]
|
||||||
|
if len(run) > 1:
|
||||||
|
process_run(run)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import shutil
|
import shutil
|
||||||
from calibre.utils.logging import default_log
|
from calibre.utils.logging import default_log
|
||||||
|
Loading…
x
Reference in New Issue
Block a user