mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DOCX Output: Fix text immediately after a hidden tag not being included. Fixes #1717403 [Sections of Text Missing After Conversion (DOCX)](https://bugs.launchpad.net/calibre/+bug/1717403)
This commit is contained in:
parent
ff828ae47d
commit
f19fbaf61c
@ -477,62 +477,63 @@ class Convert(object):
|
|||||||
|
|
||||||
def process_tag(self, html_tag, stylizer, is_first_tag=False, float_spec=None):
|
def process_tag(self, html_tag, stylizer, is_first_tag=False, float_spec=None):
|
||||||
tagname = barename(html_tag.tag)
|
tagname = barename(html_tag.tag)
|
||||||
if tagname in {'script', 'style', 'title', 'meta'}:
|
|
||||||
return
|
|
||||||
tag_style = stylizer.style(html_tag)
|
tag_style = stylizer.style(html_tag)
|
||||||
if tag_style.is_hidden:
|
ignore_tag_contents = tagname in {'script', 'style', 'title', 'meta'} or tag_style.is_hidden
|
||||||
return
|
|
||||||
|
|
||||||
previous_link = self.current_link
|
|
||||||
if tagname == 'a' and html_tag.get('href'):
|
|
||||||
self.current_link = (self.current_item, html_tag.get('href'), html_tag.get('title'))
|
|
||||||
previous_lang = self.current_lang
|
|
||||||
tag_lang = lang_for_tag(html_tag)
|
|
||||||
if tag_lang:
|
|
||||||
self.current_lang = tag_lang
|
|
||||||
|
|
||||||
display = tag_style._get('display')
|
display = tag_style._get('display')
|
||||||
is_float = tag_style['float'] in {'left', 'right'} and not is_first_tag
|
is_block = False
|
||||||
if float_spec is None and is_float:
|
|
||||||
float_spec = FloatSpec(self.docx.namespace, html_tag, tag_style)
|
|
||||||
|
|
||||||
if display in {'inline', 'inline-block'} or tagname == 'br': # <br> has display:block but we dont want to start a new paragraph
|
if not ignore_tag_contents:
|
||||||
if is_float and float_spec.is_dropcaps:
|
previous_link = self.current_link
|
||||||
self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)
|
if tagname == 'a' and html_tag.get('href'):
|
||||||
float_spec = None
|
self.current_link = (self.current_item, html_tag.get('href'), html_tag.get('title'))
|
||||||
|
previous_lang = self.current_lang
|
||||||
|
tag_lang = lang_for_tag(html_tag)
|
||||||
|
if tag_lang:
|
||||||
|
self.current_lang = tag_lang
|
||||||
|
|
||||||
|
is_float = tag_style['float'] in {'left', 'right'} and not is_first_tag
|
||||||
|
if float_spec is None and is_float:
|
||||||
|
float_spec = FloatSpec(self.docx.namespace, html_tag, tag_style)
|
||||||
|
|
||||||
|
if display in {'inline', 'inline-block'} or tagname == 'br': # <br> has display:block but we dont want to start a new paragraph
|
||||||
|
if is_float and float_spec.is_dropcaps:
|
||||||
|
self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)
|
||||||
|
float_spec = None
|
||||||
|
else:
|
||||||
|
self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
|
||||||
|
elif display == 'list-item':
|
||||||
|
self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_list_item=True)
|
||||||
|
elif display.startswith('table') or display == 'inline-table':
|
||||||
|
if display == 'table-cell':
|
||||||
|
self.blocks.start_new_cell(html_tag, tag_style)
|
||||||
|
self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_table_cell=True)
|
||||||
|
elif display == 'table-row':
|
||||||
|
self.blocks.start_new_row(html_tag, tag_style)
|
||||||
|
elif display in {'table', 'inline-table'}:
|
||||||
|
self.blocks.end_current_block()
|
||||||
|
self.blocks.start_new_table(html_tag, tag_style)
|
||||||
else:
|
else:
|
||||||
self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
|
if tagname == 'img' and is_float:
|
||||||
elif display == 'list-item':
|
# Image is floating so dont start a new paragraph for it
|
||||||
self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_list_item=True)
|
self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
|
||||||
elif display.startswith('table') or display == 'inline-table':
|
else:
|
||||||
if display == 'table-cell':
|
if tagname == 'hr':
|
||||||
self.blocks.start_new_cell(html_tag, tag_style)
|
for edge in 'right bottom left'.split():
|
||||||
self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_table_cell=True)
|
tag_style.set('border-%s-style' % edge, 'none')
|
||||||
elif display == 'table-row':
|
self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)
|
||||||
self.blocks.start_new_row(html_tag, tag_style)
|
|
||||||
elif display in {'table', 'inline-table'}:
|
|
||||||
self.blocks.end_current_block()
|
|
||||||
self.blocks.start_new_table(html_tag, tag_style)
|
|
||||||
else:
|
|
||||||
if tagname == 'img' and is_float:
|
|
||||||
# Image is floating so dont start a new paragraph for it
|
|
||||||
self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
|
|
||||||
else:
|
|
||||||
if tagname == 'hr':
|
|
||||||
for edge in 'right bottom left'.split():
|
|
||||||
tag_style.set('border-%s-style' % edge, 'none')
|
|
||||||
self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)
|
|
||||||
|
|
||||||
for child in html_tag.iterchildren('*'):
|
for child in html_tag.iterchildren('*'):
|
||||||
self.process_tag(child, stylizer, float_spec=float_spec)
|
self.process_tag(child, stylizer, float_spec=float_spec)
|
||||||
|
|
||||||
is_block = html_tag in self.blocks.open_html_blocks
|
is_block = html_tag in self.blocks.open_html_blocks
|
||||||
self.blocks.finish_tag(html_tag)
|
self.blocks.finish_tag(html_tag)
|
||||||
if is_block and tag_style['page-break-after'] == 'avoid':
|
if is_block and tag_style['page-break-after'] == 'avoid':
|
||||||
self.blocks.all_blocks[-1].keep_next = True
|
self.blocks.all_blocks[-1].keep_next = True
|
||||||
|
|
||||||
self.current_link = previous_link
|
self.current_link = previous_link
|
||||||
self.current_lang = previous_lang
|
self.current_lang = previous_lang
|
||||||
|
|
||||||
|
# Now, process the tail if any
|
||||||
|
|
||||||
if display == 'table-row':
|
if display == 'table-row':
|
||||||
return # We ignore the tail for these tags
|
return # We ignore the tail for these tags
|
||||||
|
Loading…
x
Reference in New Issue
Block a user