From a403ec43510a9a023a7b9f02cb1da72e391de90f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 15 Feb 2015 18:33:34 +0530 Subject: [PATCH] DOCX Output: Handle invisible HTML elements --- src/calibre/ebooks/docx/writer/from_html.py | 13 +++++++++---- src/calibre/ebooks/oeb/stylizer.py | 4 ++++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/docx/writer/from_html.py b/src/calibre/ebooks/docx/writer/from_html.py index 3d8c1f2c2e..28e701e4f9 100644 --- a/src/calibre/ebooks/docx/writer/from_html.py +++ b/src/calibre/ebooks/docx/writer/from_html.py @@ -12,7 +12,7 @@ from lxml import etree from lxml.builder import ElementMaker from calibre.ebooks.docx.names import namespaces -from calibre.ebooks.docx.styles import w, BlockStyle, TextStyle +from calibre.ebooks.docx.writer.styles import w, BlockStyle, TextStyle from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St from calibre.ebooks.oeb.base import XPath, barename @@ -138,8 +138,11 @@ class Convert(object): self.process_block(body, b, stylizer, ignore_tail=True) def process_block(self, html_block, docx_block, stylizer, ignore_tail=False): + block_style = stylizer.style(html_block) + if block_style.is_hidden: + return if html_block.text: - docx_block.add_text(html_block.text, stylizer.style(html_block), ignore_leading_whitespace=True) + docx_block.add_text(html_block.text, block_style, ignore_leading_whitespace=True) for child in html_block.iterchildren(etree.Element): tag = barename(child.tag) @@ -157,15 +160,17 @@ class Convert(object): if ignore_tail is False and html_block.tail and html_block.tail.strip(): b = docx_block if b is not self.blocks[-1]: - b = Block(html_block, stylizer.style(html_block)) + b = Block(html_block, block_style) self.blocks.append(b) b.add_text(html_block.tail, stylizer.style(html_block.getparent())) def process_inline(self, html_child, docx_block, stylizer): tag = barename(html_child.tag) + style = stylizer.style(html_child) + if style.is_hidden: + return if tag == 'img': return # TODO: Handle images - style = stylizer.style(html_child) if html_child.text: docx_block.add_text(html_child.text, style, html_parent=html_child) for child in html_child.iterchildren(etree.Element): diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py index 3b721c7e08..2c31f7f524 100644 --- a/src/calibre/ebooks/oeb/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -737,3 +737,7 @@ class Style(object): css = self._pseudo_classes return {k:v for k, v in css.iteritems() if v} + @property + def is_hidden(self): + return self._style.get('display') == 'none' or self._style.get('visibility') == 'hidden' +