From 5ec61a6b299ab2114e0b7b7ae5848b733d512371 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 14 May 2013 16:45:03 +0530 Subject: [PATCH] Dont ignore the content in tables, just extarct the content as linear blocks for now --- src/calibre/ebooks/docx/to_html.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index dbd6dce043..b4e5b0e5f7 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -14,7 +14,7 @@ from lxml.html.builder import ( HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR) from calibre.ebooks.docx.container import DOCX, fromstring -from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING, FONTS +from calibre.ebooks.docx.names import XPath, is_tag, XML, STYLES, NUMBERING, FONTS from calibre.ebooks.docx.styles import Styles, inherit from calibre.ebooks.docx.numbering import Numbering from calibre.ebooks.docx.fonts import Fonts @@ -64,16 +64,11 @@ class Convert(object): doc = self.docx.document relationships_by_id, relationships_by_type = self.docx.document_relationships self.read_styles(relationships_by_type) - for top_level in XPath('/w:document/w:body/*')(doc): - if is_tag(top_level, 'w:p'): - p = self.convert_p(top_level) - self.body.append(p) - elif is_tag(top_level, 'w:tbl'): - pass # TODO: tables - elif is_tag(top_level, 'w:sectPr'): - pass # TODO: Last section properties - else: - self.log.debug('Unknown top-level tag: %s, ignoring' % barename(top_level.tag)) + for wp in XPath('//w:p')(doc): + p = self.convert_p(wp) + self.body.append(p) + # TODO: tables child of (nested tables?) + # TODO: Last section properties child of numbered = [] for html_obj, obj in self.object_map.iteritems():