mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
More work on DOCX Output
Use a styles manager and fix handing of page-break-after:avoid
This commit is contained in:
parent
69b15134df
commit
47841f1e0f
@ -12,7 +12,7 @@ from lxml import etree
|
|||||||
from lxml.builder import ElementMaker
|
from lxml.builder import ElementMaker
|
||||||
|
|
||||||
from calibre.ebooks.docx.names import namespaces
|
from calibre.ebooks.docx.names import namespaces
|
||||||
from calibre.ebooks.docx.writer.styles import w, BlockStyle, TextStyle
|
from calibre.ebooks.docx.writer.styles import w, StylesManager
|
||||||
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
|
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
|
||||||
from calibre.ebooks.oeb.base import XPath, barename
|
from calibre.ebooks.oeb.base import XPath, barename
|
||||||
|
|
||||||
@ -65,13 +65,13 @@ class TextRun(object):
|
|||||||
self.texts.append((None, clear))
|
self.texts.append((None, clear))
|
||||||
|
|
||||||
def serialize(self, p):
|
def serialize(self, p):
|
||||||
r = p.makeelement('{%s}r' % namespaces['w'])
|
r = p.makeelement(w('r'))
|
||||||
p.append(r)
|
p.append(r)
|
||||||
for text, preserve_whitespace in self.texts:
|
for text, preserve_whitespace in self.texts:
|
||||||
if text is None:
|
if text is None:
|
||||||
r.append(r.makeelement(w('br'), **{w('clear'):preserve_whitespace}))
|
r.append(r.makeelement(w('br'), **{w('clear'):preserve_whitespace}))
|
||||||
else:
|
else:
|
||||||
t = r.makeelement('{%s}t' % namespaces['w'])
|
t = r.makeelement(w('t'))
|
||||||
r.append(t)
|
r.append(t)
|
||||||
t.text = text or ''
|
t.text = text or ''
|
||||||
if preserve_whitespace:
|
if preserve_whitespace:
|
||||||
@ -79,14 +79,16 @@ class TextRun(object):
|
|||||||
|
|
||||||
class Block(object):
|
class Block(object):
|
||||||
|
|
||||||
def __init__(self, html_block, style, is_first_block=False):
|
def __init__(self, styles_manager, html_block, style, is_first_block=False):
|
||||||
self.html_block = html_block
|
self.html_block = html_block
|
||||||
self.html_style = style
|
self.html_style = style
|
||||||
self.style = BlockStyle(style, html_block, is_first_block=is_first_block)
|
self.style = styles_manager.create_block_style(style, html_block, is_first_block=is_first_block)
|
||||||
|
self.styles_manager = styles_manager
|
||||||
|
self.keep_next = False
|
||||||
self.runs = []
|
self.runs = []
|
||||||
|
|
||||||
def add_text(self, text, style, ignore_leading_whitespace=False, html_parent=None):
|
def add_text(self, text, style, ignore_leading_whitespace=False, html_parent=None):
|
||||||
ts = TextStyle(style)
|
ts = self.styles_manager.create_text_style(style)
|
||||||
ws = style['white-space']
|
ws = style['white-space']
|
||||||
if self.runs and ts == self.runs[-1].style:
|
if self.runs and ts == self.runs[-1].style:
|
||||||
run = self.runs[-1]
|
run = self.runs[-1]
|
||||||
@ -107,13 +109,17 @@ class Block(object):
|
|||||||
if self.runs:
|
if self.runs:
|
||||||
run = self.runs[-1]
|
run = self.runs[-1]
|
||||||
else:
|
else:
|
||||||
run = TextRun(TextStyle(self.html_style), self.html_block)
|
run = TextRun(self.styles_manager.create_text_style(self.html_style), self.html_block)
|
||||||
self.runs.append(run)
|
self.runs.append(run)
|
||||||
run.add_break(clear=clear)
|
run.add_break(clear=clear)
|
||||||
|
|
||||||
def serialize(self, body):
|
def serialize(self, body):
|
||||||
p = body.makeelement('{%s}p' % namespaces['w'])
|
p = body.makeelement(w('p'))
|
||||||
body.append(p)
|
body.append(p)
|
||||||
|
ppr = p.makeelement(w('pPr'))
|
||||||
|
p.append(ppr)
|
||||||
|
if self.keep_next:
|
||||||
|
ppr.append(ppr.makeelement(w('keepNext')))
|
||||||
for run in self.runs:
|
for run in self.runs:
|
||||||
run.serialize(p)
|
run.serialize(p)
|
||||||
|
|
||||||
@ -129,6 +135,8 @@ class Convert(object):
|
|||||||
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
|
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
|
||||||
SVGRasterizer()(self.oeb, self.opts)
|
SVGRasterizer()(self.oeb, self.opts)
|
||||||
|
|
||||||
|
self.styles_manager = StylesManager()
|
||||||
|
|
||||||
for item in self.oeb.spine:
|
for item in self.oeb.spine:
|
||||||
self.process_item(item)
|
self.process_item(item)
|
||||||
|
|
||||||
@ -139,7 +147,7 @@ class Convert(object):
|
|||||||
|
|
||||||
is_first_block = True
|
is_first_block = True
|
||||||
for body in XPath('//h:body')(item.data):
|
for body in XPath('//h:body')(item.data):
|
||||||
b = Block(body, stylizer.style(body), is_first_block=is_first_block)
|
b = Block(self.styles_manager, body, stylizer.style(body), is_first_block=is_first_block)
|
||||||
self.blocks.append(b)
|
self.blocks.append(b)
|
||||||
is_first_block = False
|
is_first_block = False
|
||||||
self.process_block(body, b, stylizer, ignore_tail=True)
|
self.process_block(body, b, stylizer, ignore_tail=True)
|
||||||
@ -158,7 +166,7 @@ class Convert(object):
|
|||||||
if tag == 'img':
|
if tag == 'img':
|
||||||
pass # TODO: Handle images
|
pass # TODO: Handle images
|
||||||
if display == 'block' and tag != 'br':
|
if display == 'block' and tag != 'br':
|
||||||
b = Block(child, style)
|
b = Block(self.styles_manager, child, style)
|
||||||
self.blocks.append(b)
|
self.blocks.append(b)
|
||||||
self.process_block(child, b, stylizer)
|
self.process_block(child, b, stylizer)
|
||||||
else:
|
else:
|
||||||
@ -167,9 +175,11 @@ class Convert(object):
|
|||||||
if ignore_tail is False and html_block.tail and html_block.tail.strip():
|
if ignore_tail is False and html_block.tail and html_block.tail.strip():
|
||||||
b = docx_block
|
b = docx_block
|
||||||
if b is not self.blocks[-1]:
|
if b is not self.blocks[-1]:
|
||||||
b = Block(html_block, block_style)
|
b = Block(self.styles_manager, html_block, block_style)
|
||||||
self.blocks.append(b)
|
self.blocks.append(b)
|
||||||
b.add_text(html_block.tail, stylizer.style(html_block.getparent()))
|
b.add_text(html_block.tail, stylizer.style(html_block.getparent()))
|
||||||
|
if block_style['page-break-after'] == 'avoid':
|
||||||
|
self.blocks[-1].keep_next = True
|
||||||
|
|
||||||
def process_inline(self, html_child, docx_block, stylizer):
|
def process_inline(self, html_child, docx_block, stylizer):
|
||||||
tag = barename(html_child.tag)
|
tag = barename(html_child.tag)
|
||||||
@ -188,7 +198,7 @@ class Convert(object):
|
|||||||
style = stylizer.style(child)
|
style = stylizer.style(child)
|
||||||
display = style.get('display', 'inline')
|
display = style.get('display', 'inline')
|
||||||
if display == 'block':
|
if display == 'block':
|
||||||
b = Block(child, style)
|
b = Block(self.styles_manager, child, style)
|
||||||
self.blocks.append(b)
|
self.blocks.append(b)
|
||||||
self.process_block(child, b, stylizer)
|
self.process_block(child, b, stylizer)
|
||||||
else:
|
else:
|
||||||
|
@ -38,17 +38,17 @@ class DOCXStyle(object):
|
|||||||
ALL_PROPS = ()
|
ALL_PROPS = ()
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.update_hash()
|
self._hash = hash(tuple(
|
||||||
|
getattr(self, x) for x in self.ALL_PROPS))
|
||||||
|
|
||||||
def __hash__(self):
|
def __hash__(self):
|
||||||
return self._hash
|
return self._hash
|
||||||
|
|
||||||
def update_hash(self):
|
|
||||||
self._hash = hash(tuple(
|
|
||||||
getattr(self, x) for x in self.ALL_PROPS))
|
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
return hash(self) == hash(other)
|
for x in self.ALL_PROPS:
|
||||||
|
if getattr(self, x) != getattr(other, x, None):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
def __ne__(self, other):
|
def __ne__(self, other):
|
||||||
return not self == other
|
return not self == other
|
||||||
@ -175,7 +175,7 @@ class TextStyle(DOCXStyle):
|
|||||||
class BlockStyle(DOCXStyle):
|
class BlockStyle(DOCXStyle):
|
||||||
|
|
||||||
ALL_PROPS = tuple(
|
ALL_PROPS = tuple(
|
||||||
'text_align page_break_before keep_lines keep_next css_text_indent text_indent line_height css_line_height background_color'.split()
|
'text_align page_break_before keep_lines css_text_indent text_indent line_height css_line_height background_color'.split()
|
||||||
+ ['margin_' + edge for edge in border_edges]
|
+ ['margin_' + edge for edge in border_edges]
|
||||||
+ ['css_margin_' + edge for edge in border_edges]
|
+ ['css_margin_' + edge for edge in border_edges]
|
||||||
+ [x%edge for edge in border_edges for x in border_props]
|
+ [x%edge for edge in border_edges for x in border_props]
|
||||||
@ -184,8 +184,6 @@ class BlockStyle(DOCXStyle):
|
|||||||
def __init__(self, css, html_block, is_first_block=False):
|
def __init__(self, css, html_block, is_first_block=False):
|
||||||
self.page_break_before = html_block.tag.endswith('}body') or (not is_first_block and css['page-break-before'] == 'always')
|
self.page_break_before = html_block.tag.endswith('}body') or (not is_first_block and css['page-break-before'] == 'always')
|
||||||
self.keep_lines = css['page-break-inside'] == 'avoid'
|
self.keep_lines = css['page-break-inside'] == 'avoid'
|
||||||
# TODO: Ensure that only the last docx block for this html block has the correct value for keep next
|
|
||||||
self.keep_next = css['page-break-after'] == 'avoid'
|
|
||||||
for edge in border_edges:
|
for edge in border_edges:
|
||||||
# In DOCX padding can only be a positive integer
|
# In DOCX padding can only be a positive integer
|
||||||
setattr(self, 'padding_' + edge, max(0, int(css['padding-' + edge])))
|
setattr(self, 'padding_' + edge, max(0, int(css['padding-' + edge])))
|
||||||
@ -272,9 +270,28 @@ class BlockStyle(DOCXStyle):
|
|||||||
style.append(style.makeelement(w('pageBreakBefore'), **{w('val'):'on'}))
|
style.append(style.makeelement(w('pageBreakBefore'), **{w('val'):'on'}))
|
||||||
if self.keep_lines:
|
if self.keep_lines:
|
||||||
style.append(style.makeelement(w('keepLines'), **{w('val'):'on'}))
|
style.append(style.makeelement(w('keepLines'), **{w('val'):'on'}))
|
||||||
if self.keep_next:
|
|
||||||
style.append(style.makeelement(w('keepNext'), **{w('val'):'on'}))
|
|
||||||
return style
|
return style
|
||||||
|
|
||||||
|
|
||||||
|
class StylesManager(object):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.block_styles, self.text_styles = {}, {}
|
||||||
|
|
||||||
|
def create_text_style(self, css_style):
|
||||||
|
ans = TextStyle(css_style)
|
||||||
|
existing = self.text_styles.get(ans, None)
|
||||||
|
if existing is None:
|
||||||
|
self.text_styles[ans] = ans
|
||||||
|
else:
|
||||||
|
ans = existing
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def create_block_style(self, css_style, html_block, is_first_block=False):
|
||||||
|
ans = BlockStyle(css_style, html_block, is_first_block=is_first_block)
|
||||||
|
existing = self.block_styles.get(ans, None)
|
||||||
|
if existing is None:
|
||||||
|
self.block_styles[ans] = ans
|
||||||
|
else:
|
||||||
|
ans = existing
|
||||||
|
return ans
|
||||||
|
Loading…
x
Reference in New Issue
Block a user