mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
Dont ignore the content in tables, just extarct the content as linear blocks for now
This commit is contained in:
parent
aa2aa3d2ef
commit
5ec61a6b29
@ -14,7 +14,7 @@ from lxml.html.builder import (
|
|||||||
HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR)
|
HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR)
|
||||||
|
|
||||||
from calibre.ebooks.docx.container import DOCX, fromstring
|
from calibre.ebooks.docx.container import DOCX, fromstring
|
||||||
from calibre.ebooks.docx.names import XPath, is_tag, barename, XML, STYLES, NUMBERING, FONTS
|
from calibre.ebooks.docx.names import XPath, is_tag, XML, STYLES, NUMBERING, FONTS
|
||||||
from calibre.ebooks.docx.styles import Styles, inherit
|
from calibre.ebooks.docx.styles import Styles, inherit
|
||||||
from calibre.ebooks.docx.numbering import Numbering
|
from calibre.ebooks.docx.numbering import Numbering
|
||||||
from calibre.ebooks.docx.fonts import Fonts
|
from calibre.ebooks.docx.fonts import Fonts
|
||||||
@ -64,16 +64,11 @@ class Convert(object):
|
|||||||
doc = self.docx.document
|
doc = self.docx.document
|
||||||
relationships_by_id, relationships_by_type = self.docx.document_relationships
|
relationships_by_id, relationships_by_type = self.docx.document_relationships
|
||||||
self.read_styles(relationships_by_type)
|
self.read_styles(relationships_by_type)
|
||||||
for top_level in XPath('/w:document/w:body/*')(doc):
|
for wp in XPath('//w:p')(doc):
|
||||||
if is_tag(top_level, 'w:p'):
|
p = self.convert_p(wp)
|
||||||
p = self.convert_p(top_level)
|
self.body.append(p)
|
||||||
self.body.append(p)
|
# TODO: tables <w:tbl> child of <w:body> (nested tables?)
|
||||||
elif is_tag(top_level, 'w:tbl'):
|
# TODO: Last section properties <w:sectPr> child of <w:body>
|
||||||
pass # TODO: tables
|
|
||||||
elif is_tag(top_level, 'w:sectPr'):
|
|
||||||
pass # TODO: Last section properties
|
|
||||||
else:
|
|
||||||
self.log.debug('Unknown top-level tag: %s, ignoring' % barename(top_level.tag))
|
|
||||||
|
|
||||||
numbered = []
|
numbered = []
|
||||||
for html_obj, obj in self.object_map.iteritems():
|
for html_obj, obj in self.object_map.iteritems():
|
||||||
|
Loading…
x
Reference in New Issue
Block a user