DOCX Output: Handle invisible HTML elements

This commit is contained in:
Kovid Goyal 2015-02-15 18:33:34 +05:30
parent 51a08167db
commit a403ec4351
2 changed files with 13 additions and 4 deletions

View File

@ -12,7 +12,7 @@ from lxml import etree
from lxml.builder import ElementMaker
from calibre.ebooks.docx.names import namespaces
from calibre.ebooks.docx.styles import w, BlockStyle, TextStyle
from calibre.ebooks.docx.writer.styles import w, BlockStyle, TextStyle
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
from calibre.ebooks.oeb.base import XPath, barename
@ -138,8 +138,11 @@ class Convert(object):
self.process_block(body, b, stylizer, ignore_tail=True)
def process_block(self, html_block, docx_block, stylizer, ignore_tail=False):
block_style = stylizer.style(html_block)
if block_style.is_hidden:
return
if html_block.text:
docx_block.add_text(html_block.text, stylizer.style(html_block), ignore_leading_whitespace=True)
docx_block.add_text(html_block.text, block_style, ignore_leading_whitespace=True)
for child in html_block.iterchildren(etree.Element):
tag = barename(child.tag)
@ -157,15 +160,17 @@ class Convert(object):
if ignore_tail is False and html_block.tail and html_block.tail.strip():
b = docx_block
if b is not self.blocks[-1]:
b = Block(html_block, stylizer.style(html_block))
b = Block(html_block, block_style)
self.blocks.append(b)
b.add_text(html_block.tail, stylizer.style(html_block.getparent()))
def process_inline(self, html_child, docx_block, stylizer):
tag = barename(html_child.tag)
style = stylizer.style(html_child)
if style.is_hidden:
return
if tag == 'img':
return # TODO: Handle images
style = stylizer.style(html_child)
if html_child.text:
docx_block.add_text(html_child.text, style, html_parent=html_child)
for child in html_child.iterchildren(etree.Element):

View File

@ -737,3 +737,7 @@ class Style(object):
css = self._pseudo_classes
return {k:v for k, v in css.iteritems() if v}
@property
def is_hidden(self):
return self._style.get('display') == 'none' or self._style.get('visibility') == 'hidden'