DOCX Output: Do not put page-break properties in styles, so as to reduce number of distinct styles. Also handle page-break-after on block level elements.

This commit is contained in:
Kovid Goyal 2015-05-11 07:55:27 +05:30
parent 35699b1f7b
commit 51b3df2c29
2 changed files with 26 additions and 13 deletions

View File

@ -131,10 +131,12 @@ class Block(object):
self.style = styles_manager.create_block_style(style, html_block, is_table_cell=is_table_cell) self.style = styles_manager.create_block_style(style, html_block, is_table_cell=is_table_cell)
self.styles_manager, self.links_manager = styles_manager, links_manager self.styles_manager, self.links_manager = styles_manager, links_manager
self.keep_next = False self.keep_next = False
self.page_break_before = False
self.runs = [] self.runs = []
self.skipped = False self.skipped = False
self.linked_style = None self.linked_style = None
self.page_break_before = style['page-break-before'] == 'always'
self.keep_lines = style['page-break-inside'] == 'avoid'
self.page_break_after = False
def resolve_skipped(self, next_block): def resolve_skipped(self, next_block):
if not self.is_empty(): if not self.is_empty():
@ -189,8 +191,6 @@ class Block(object):
ppr = makeelement(p, 'w:pPr') ppr = makeelement(p, 'w:pPr')
if self.keep_next: if self.keep_next:
makeelement(ppr, 'w:keepNext') makeelement(ppr, 'w:keepNext')
if self.page_break_before:
makeelement(ppr, 'w:pageBreakBefore')
if self.float_spec is not None: if self.float_spec is not None:
self.float_spec.serialize(self, ppr) self.float_spec.serialize(self, ppr)
if self.numbering_id is not None: if self.numbering_id is not None:
@ -201,6 +201,10 @@ class Block(object):
makeelement(ppr, 'w:pStyle', w_val=self.linked_style.id) makeelement(ppr, 'w:pStyle', w_val=self.linked_style.id)
if self.is_first_block: if self.is_first_block:
makeelement(ppr, 'w:pageBreakBefore', w_val='off') makeelement(ppr, 'w:pageBreakBefore', w_val='off')
elif self.page_break_before:
makeelement(ppr, 'w:pageBreakBefore', w_val='on')
if self.keep_lines:
makeelement(ppr, 'w:keepLines', w_val='on')
for run in self.runs: for run in self.runs:
run.serialize(p, self.links_manager) run.serialize(p, self.links_manager)
for bmark in end_bookmarks: for bmark in end_bookmarks:
@ -208,6 +212,7 @@ class Block(object):
def __repr__(self): def __repr__(self):
return 'Block(%r)' % self.runs return 'Block(%r)' % self.runs
__str__ = __repr__
def is_empty(self): def is_empty(self):
for run in self.runs: for run in self.runs:
@ -228,6 +233,7 @@ class Blocks(object):
self.tables = [] self.tables = []
self.current_table = None self.current_table = None
self.open_html_blocks = set() self.open_html_blocks = set()
self.html_tag_start_blocks = {}
def current_or_new_block(self, html_tag, tag_style): def current_or_new_block(self, html_tag, tag_style):
return self.current_block or self.start_new_block(html_tag, tag_style) return self.current_block or self.start_new_block(html_tag, tag_style)
@ -248,6 +254,7 @@ class Blocks(object):
self.current_block = Block( self.current_block = Block(
self.namespace, self.styles_manager, self.links_manager, html_block, style, self.namespace, self.styles_manager, self.links_manager, html_block, style,
is_table_cell=is_table_cell, float_spec=float_spec, is_list_item=is_list_item) is_table_cell=is_table_cell, float_spec=float_spec, is_list_item=is_list_item)
self.html_tag_start_blocks[html_block] = self.current_block
self.open_html_blocks.add(html_block) self.open_html_blocks.add(html_block)
return self.current_block return self.current_block
@ -267,6 +274,9 @@ class Blocks(object):
def finish_tag(self, html_tag): def finish_tag(self, html_tag):
if self.current_block is not None and html_tag in self.open_html_blocks: if self.current_block is not None and html_tag in self.open_html_blocks:
start_block = self.html_tag_start_blocks.get(html_tag)
if start_block is not None and start_block.html_style['page-break-after'] == 'always':
self.current_block.page_break_after = True
self.end_current_block() self.end_current_block()
self.open_html_blocks.discard(html_tag) self.open_html_blocks.discard(html_tag)
@ -300,7 +310,10 @@ class Blocks(object):
if block.float_spec is not None: if block.float_spec is not None:
block.float_spec.blocks.remove(block) block.float_spec.blocks.remove(block)
try: try:
self.all_blocks[pos].bookmarks.update(block.bookmarks) next_block = self.all_blocks[pos]
next_block.bookmarks.update(block.bookmarks)
for attr in 'page_break_after page_break_before'.split():
setattr(next_block, attr, getattr(block, attr))
except (IndexError, KeyError): except (IndexError, KeyError):
pass pass
@ -323,6 +336,13 @@ class Blocks(object):
self.all_blocks[self.pos].page_break_before = True self.all_blocks[self.pos].page_break_before = True
self.block_map = {} self.block_map = {}
def apply_page_break_after(self):
for i, block in enumerate(self.all_blocks):
if block.page_break_after and i < len(self.all_blocks) - 1:
next_block = self.all_blocks[i + 1]
if next_block.parent_items is block.parent_items and block.parent_items is self.items:
next_block.page_break_before = True
def __repr__(self): def __repr__(self):
return 'Block(%r)' % self.runs return 'Block(%r)' % self.runs
@ -369,6 +389,7 @@ class Convert(object):
for pos, block in reversed(remove_blocks): for pos, block in reversed(remove_blocks):
self.blocks.delete_block_at(pos) self.blocks.delete_block_at(pos)
self.blocks.all_blocks[0].is_first_block = True self.blocks.all_blocks[0].is_first_block = True
self.blocks.apply_page_break_after()
self.lists_manager.finalize(all_blocks) self.lists_manager.finalize(all_blocks)
self.styles_manager.finalize(all_blocks) self.styles_manager.finalize(all_blocks)

View File

@ -363,7 +363,7 @@ def read_css_block_borders(self, css, store_css_style=False):
class BlockStyle(DOCXStyle): class BlockStyle(DOCXStyle):
ALL_PROPS = tuple( ALL_PROPS = tuple(
'text_align page_break_before keep_lines css_text_indent text_indent line_height background_color'.split() + 'text_align css_text_indent text_indent line_height background_color'.split() +
['margin_' + edge for edge in border_edges] + ['margin_' + edge for edge in border_edges] +
['css_margin_' + edge for edge in border_edges] + ['css_margin_' + edge for edge in border_edges] +
[x%edge for edge in border_edges for x in border_props] [x%edge for edge in border_edges for x in border_props]
@ -378,15 +378,12 @@ class BlockStyle(DOCXStyle):
setattr(self, 'padding_' + edge, 0) setattr(self, 'padding_' + edge, 0)
setattr(self, 'margin_' + edge, 0) setattr(self, 'margin_' + edge, 0)
if css is None: if css is None:
self.page_break_before = self.keep_lines = False
self.text_indent = 0 self.text_indent = 0
self.css_text_indent = None self.css_text_indent = None
self.line_height = 280 self.line_height = 280
self.background_color = None self.background_color = None
self.text_align = 'left' self.text_align = 'left'
else: else:
self.page_break_before = css['page-break-before'] == 'always'
self.keep_lines = css['page-break-inside'] == 'avoid'
self.text_indent = int(css['text-indent'] * 20) self.text_indent = int(css['text-indent'] * 20)
self.css_text_indent = css._get('text-indent') self.css_text_indent = css._get('text-indent')
self.line_height = max(0, int(css.lineHeight * 20)) self.line_height = max(0, int(css.lineHeight * 20))
@ -493,11 +490,6 @@ class BlockStyle(DOCXStyle):
if self is normal_style or self.text_align != normal_style.text_align: if self is normal_style or self.text_align != normal_style.text_align:
pPr.append(makeelement(pPr, 'jc', val=self.text_align)) pPr.append(makeelement(pPr, 'jc', val=self.text_align))
if (self is normal_style and self.page_break_before) or self.page_break_before != normal_style.page_break_before:
pPr.append(makeelement(pPr, 'pageBreakBefore', val=bmap(self.page_break_before)))
if (self is normal_style and self.keep_lines) or self.keep_lines != normal_style.keep_lines:
pPr.append(makeelement(pPr, 'keepLines', val=bmap(self.keep_lines)))
if self is not normal_style and self.next_style is not None: if self is not normal_style and self.next_style is not None:
pPr.append(makeelement(pPr, 'next', val=self.next_style)) pPr.append(makeelement(pPr, 'next', val=self.next_style))