DOCX Output: Fix handling of page break at start of every HTML file

This commit is contained in:
Kovid Goyal 2015-03-31 20:10:01 +05:30
parent 2729414b53
commit de5ce8d46b
2 changed files with 18 additions and 12 deletions

View File

@ -93,12 +93,13 @@ class TextRun(object):
class Block(object): class Block(object):
def __init__(self, styles_manager, html_block, style, is_first_block=False): def __init__(self, styles_manager, html_block, style):
self.html_block = html_block self.html_block = html_block
self.html_style = style self.html_style = style
self.style = styles_manager.create_block_style(style, html_block, is_first_block=is_first_block) self.style = styles_manager.create_block_style(style, html_block)
self.styles_manager = styles_manager self.styles_manager = styles_manager
self.keep_next = False self.keep_next = False
self.page_break_before = False
self.runs = [] self.runs = []
def add_text(self, text, style, ignore_leading_whitespace=False, html_parent=None, is_parent_style=False): def add_text(self, text, style, ignore_leading_whitespace=False, html_parent=None, is_parent_style=False):
@ -142,6 +143,8 @@ class Block(object):
p.append(ppr) p.append(ppr)
if self.keep_next: if self.keep_next:
ppr.append(ppr.makeelement(w('keepNext'))) ppr.append(ppr.makeelement(w('keepNext')))
if self.page_break_before:
ppr.append(ppr.makeelement(w('pageBreakBefore')))
ppr.append(ppr.makeelement(w('pStyle'), **{w('val'):self.style.id})) ppr.append(ppr.makeelement(w('pStyle'), **{w('val'):self.style.id}))
for run in self.runs: for run in self.runs:
run.serialize(p) run.serialize(p)
@ -159,10 +162,10 @@ class Blocks(object):
self.pos = 0 self.pos = 0
self.current_block = None self.current_block = None
def start_new_block(self, styles_manager, html_block, style, is_first_tag=False): def start_new_block(self, styles_manager, html_block, style):
if self.current_block is not None: if self.current_block is not None:
self.all_blocks.append(self.current_block) self.all_blocks.append(self.current_block)
self.current_block = Block(styles_manager, html_block, style, is_first_block=is_first_tag) self.current_block = Block(styles_manager, html_block, style)
return self.current_block return self.current_block
def serialize(self, body): def serialize(self, body):
@ -178,8 +181,11 @@ class Blocks(object):
self.current_block = None self.current_block = None
if len(self.all_blocks) > self.pos and self.all_blocks[self.pos].is_empty(): if len(self.all_blocks) > self.pos and self.all_blocks[self.pos].is_empty():
# Delete the empty block corresponding to the <body> tag when the # Delete the empty block corresponding to the <body> tag when the
# body tag has no text content before its first sub-block # body tag has no inline content before its first sub-block
del self.all_blocks[self.pos] del self.all_blocks[self.pos]
if self.pos > 0 and self.pos < len(self.all_blocks):
# Insert a page break corresponding to the start of the html file
self.all_blocks[self.pos].page_break_before = True
class Convert(object): class Convert(object):
@ -240,7 +246,7 @@ class Convert(object):
# Image is floating so dont start a new paragraph for it # Image is floating so dont start a new paragraph for it
self.add_inline_tag(tagname, html_tag, tag_style, stylizer) self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
else: else:
self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_first_tag=is_first_tag) self.add_block_tag(tagname, html_tag, tag_style, stylizer)
inlined = False inlined = False
for child in html_tag.iterchildren('*'): for child in html_tag.iterchildren('*'):
@ -258,8 +264,8 @@ class Convert(object):
if block is not None: if block is not None:
block.add_text(text, tag_style, ignore_leading_whitespace=ignore_leading_whitespace, html_parent=html_parent, is_parent_style=is_parent_style) block.add_text(text, tag_style, ignore_leading_whitespace=ignore_leading_whitespace, html_parent=html_parent, is_parent_style=is_parent_style)
def add_block_tag(self, tagname, html_tag, tag_style, stylizer, is_first_tag=False): def add_block_tag(self, tagname, html_tag, tag_style, stylizer):
block = self.blocks.start_new_block(self.styles_manager, html_tag, tag_style, is_first_tag=is_first_tag) block = self.blocks.start_new_block(self.styles_manager, html_tag, tag_style)
if tagname == 'img': if tagname == 'img':
self.images_manager.add_image(html_tag, block, stylizer) self.images_manager.add_image(html_tag, block, stylizer)
else: else:

View File

@ -235,7 +235,7 @@ class BlockStyle(DOCXStyle):
[x%edge for edge in border_edges for x in border_props] [x%edge for edge in border_edges for x in border_props]
) )
def __init__(self, css, html_block, is_first_block=False): def __init__(self, css, html_block):
if css is None: if css is None:
self.page_break_before = self.keep_lines = False self.page_break_before = self.keep_lines = False
for edge in border_edges: for edge in border_edges:
@ -251,7 +251,7 @@ class BlockStyle(DOCXStyle):
self.background_color = None self.background_color = None
self.text_align = 'left' self.text_align = 'left'
else: else:
self.page_break_before = html_block.tag.endswith('}body') or (not is_first_block and css['page-break-before'] == 'always') self.page_break_before = css['page-break-before'] == 'always'
self.keep_lines = css['page-break-inside'] == 'avoid' self.keep_lines = css['page-break-inside'] == 'avoid'
for edge in border_edges: for edge in border_edges:
# In DOCX padding can only be a positive integer # In DOCX padding can only be a positive integer
@ -377,8 +377,8 @@ class StylesManager(object):
ans = existing ans = existing
return ans return ans
def create_block_style(self, css_style, html_block, is_first_block=False): def create_block_style(self, css_style, html_block):
ans = BlockStyle(css_style, html_block, is_first_block=is_first_block) ans = BlockStyle(css_style, html_block)
existing = self.block_styles.get(ans, None) existing = self.block_styles.get(ans, None)
if existing is None: if existing is None:
self.block_styles[ans] = ans self.block_styles[ans] = ans