Dont ignore the content in tables, just extarct the content as linear blocks for now

This commit is contained in:
Kovid Goyal 2013-05-14 16:45:03 +05:30
parent aa2aa3d2ef
commit 5ec61a6b29

View File

@ -14,7 +14,7 @@ from lxml.html.builder import (
HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR) HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR)
from calibre.ebooks.docx.container import DOCX, fromstring from calibre.ebooks.docx.container import DOCX, fromstring
from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING, FONTS from calibre.ebooks.docx.names import XPath, is_tag, XML, STYLES, NUMBERING, FONTS
from calibre.ebooks.docx.styles import Styles, inherit from calibre.ebooks.docx.styles import Styles, inherit
from calibre.ebooks.docx.numbering import Numbering from calibre.ebooks.docx.numbering import Numbering
from calibre.ebooks.docx.fonts import Fonts from calibre.ebooks.docx.fonts import Fonts
@ -64,16 +64,11 @@ class Convert(object):
doc = self.docx.document doc = self.docx.document
relationships_by_id, relationships_by_type = self.docx.document_relationships relationships_by_id, relationships_by_type = self.docx.document_relationships
self.read_styles(relationships_by_type) self.read_styles(relationships_by_type)
for top_level in XPath('/w:document/w:body/*')(doc): for wp in XPath('//w:p')(doc):
if is_tag(top_level, 'w:p'): p = self.convert_p(wp)
p = self.convert_p(top_level) self.body.append(p)
self.body.append(p) # TODO: tables <w:tbl> child of <w:body> (nested tables?)
elif is_tag(top_level, 'w:tbl'): # TODO: Last section properties <w:sectPr> child of <w:body>
pass # TODO: tables
elif is_tag(top_level, 'w:sectPr'):
pass # TODO: Last section properties
else:
self.log.debug('Unknown top-level tag: %s, ignoring' % barename(top_level.tag))
numbered = [] numbered = []
for html_obj, obj in self.object_map.iteritems(): for html_obj, obj in self.object_map.iteritems():