Update odfpy for improved handling of ODT documents

This commit is contained in:
Kovid Goyal 2009-01-03 13:46:52 -08:00
parent d92e8f4d1f
commit b3ac2ca6f3
3 changed files with 77 additions and 30 deletions

View File

@ -20,9 +20,10 @@
TOOLSVERSION = u"ODFPY/0.8.1dev" TOOLSVERSION = u"ODFPY/0.8.1dev"
ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0" ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"
DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0"
CHARTNS = u"urn:oasis:names:tc:opendocument:xmlns:chart:1.0" CHARTNS = u"urn:oasis:names:tc:opendocument:xmlns:chart:1.0"
CONFIGNS = u"urn:oasis:names:tc:opendocument:xmlns:config:1.0" CONFIGNS = u"urn:oasis:names:tc:opendocument:xmlns:config:1.0"
DBNS = u"http://openoffice.org/2004/database" #DBNS = u"http://openoffice.org/2004/database"
DCNS = u"http://purl.org/dc/elements/1.1/" DCNS = u"http://purl.org/dc/elements/1.1/"
DOMNS = u"http://www.w3.org/2001/xml-events" DOMNS = u"http://www.w3.org/2001/xml-events"
DR3DNS = u"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" DR3DNS = u"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
@ -39,6 +40,7 @@ OOONS = u"http://openoffice.org/2004/office"
OOOWNS = u"http://openoffice.org/2004/writer" OOOWNS = u"http://openoffice.org/2004/writer"
OOOCNS = u"http://openoffice.org/2004/calc" OOOCNS = u"http://openoffice.org/2004/calc"
PRESENTATIONNS = u"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0" PRESENTATIONNS = u"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0"
RDFANS = u"http://docs.oasis-open.org/opendocument/meta/rdfa#"
SCRIPTNS = u"urn:oasis:names:tc:opendocument:xmlns:script:1.0" SCRIPTNS = u"urn:oasis:names:tc:opendocument:xmlns:script:1.0"
SMILNS = u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0" SMILNS = u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0"
STYLENS = u"urn:oasis:names:tc:opendocument:xmlns:style:1.0" STYLENS = u"urn:oasis:names:tc:opendocument:xmlns:style:1.0"
@ -47,6 +49,7 @@ TABLENS = u"urn:oasis:names:tc:opendocument:xmlns:table:1.0"
TEXTNS = u"urn:oasis:names:tc:opendocument:xmlns:text:1.0" TEXTNS = u"urn:oasis:names:tc:opendocument:xmlns:text:1.0"
XFORMSNS = u"http://www.w3.org/2002/xforms" XFORMSNS = u"http://www.w3.org/2002/xforms"
XLINKNS = u"http://www.w3.org/1999/xlink" XLINKNS = u"http://www.w3.org/1999/xlink"
XMLNS = "http://www.w3.org/XML/1998/namespace"
nsdict = { nsdict = {
@ -70,6 +73,7 @@ nsdict = {
OOOWNS: u'ooow', OOOWNS: u'ooow',
OOOCNS: u'ooc', OOOCNS: u'ooc',
PRESENTATIONNS: u'presentation', PRESENTATIONNS: u'presentation',
RDFANS: u'rdfa',
SCRIPTNS: u'script', SCRIPTNS: u'script',
SMILNS: u'smil', SMILNS: u'smil',
STYLENS: u'style', STYLENS: u'style',
@ -78,4 +82,5 @@ nsdict = {
TEXTNS: u'text', TEXTNS: u'text',
XFORMSNS: u'xforms', XFORMSNS: u'xforms',
XLINKNS: u'xlink', XLINKNS: u'xlink',
XMLNS: u'xml',
} }

View File

@ -22,7 +22,7 @@
#pdb.set_trace() #pdb.set_trace()
import zipfile import zipfile
import xml.sax import xml.sax
from xml.sax import handler from xml.sax import handler, expatreader
from xml.sax.xmlreader import InputSource from xml.sax.xmlreader import InputSource
from xml.sax.saxutils import escape, quoteattr from xml.sax.saxutils import escape, quoteattr
@ -206,10 +206,10 @@ class StyleToCSS:
if hpos == "center": if hpos == "center":
sdict['margin-left'] = "auto" sdict['margin-left'] = "auto"
sdict['margin-right'] = "auto" sdict['margin-right'] = "auto"
else: # else:
# force it to be *something* then delete it # # force it to be *something* then delete it
sdict['margin-left'] = sdict['margin-right'] = '' # sdict['margin-left'] = sdict['margin-right'] = ''
del sdict['margin-left'], sdict['margin-right'] # del sdict['margin-left'], sdict['margin-right']
if hpos in ("right","outside"): if hpos in ("right","outside"):
if wrap in ( "left", "parallel","dynamic"): if wrap in ( "left", "parallel","dynamic"):
@ -336,8 +336,9 @@ special_styles = {
class ODF2XHTML(handler.ContentHandler): class ODF2XHTML(handler.ContentHandler):
""" The ODF2XHTML parses an ODF file and produces XHTML""" """ The ODF2XHTML parses an ODF file and produces XHTML"""
def __init__(self): def __init__(self, generate_css=True, embedable=False):
# Tags # Tags
self.generate_css = generate_css
self.elements = { self.elements = {
(DCNS, 'title'): (self.s_processcont, self.e_dc_title), (DCNS, 'title'): (self.s_processcont, self.e_dc_title),
(DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage), (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage),
@ -349,6 +350,7 @@ class ODF2XHTML(handler.ContentHandler):
(DRAWNS, 'fill-image'): (self.s_draw_fill_image, None), (DRAWNS, 'fill-image'): (self.s_draw_fill_image, None),
(DRAWNS, "layer-set"):(self.s_ignorexml, None), (DRAWNS, "layer-set"):(self.s_ignorexml, None),
(DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page), (DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page),
(DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox),
(METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag), (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag),
(METANS, 'generator'):(self.s_processcont, self.e_dc_metatag), (METANS, 'generator'):(self.s_processcont, self.e_dc_metatag),
(METANS, 'initial-creator'): (self.s_processcont, self.e_dc_metatag), (METANS, 'initial-creator'): (self.s_processcont, self.e_dc_metatag),
@ -421,6 +423,12 @@ class ODF2XHTML(handler.ContentHandler):
(TEXTNS, "table-of-content-source"):(self.s_text_x_source, self.e_text_x_source), (TEXTNS, "table-of-content-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source), (TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
} }
if embedable:
self.elements[(OFFICENS, u"text")] = (None,None)
self.elements[(OFFICENS, u"spreadsheet")] = (None,None)
self.elements[(OFFICENS, u"presentation")] = (None,None)
self.elements[(OFFICENS, u"document-content")] = (None,None)
def writeout(self, s): def writeout(self, s):
if s != '': if s != '':
@ -548,14 +556,18 @@ class ODF2XHTML(handler.ContentHandler):
""" A <draw:frame> is made into a <div> in HTML which is then styled """ A <draw:frame> is made into a <div> in HTML which is then styled
""" """
anchor_type = attrs.get((TEXTNS,'anchor-type'),'char') anchor_type = attrs.get((TEXTNS,'anchor-type'),'char')
htmltag = 'div'
name = "G-" + attrs.get( (DRAWNS,'style-name'), "") name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
if name == 'G-': if name == 'G-':
name = "PR-" + attrs.get( (PRESENTATIONNS,'style-name'), "") name = "PR-" + attrs.get( (PRESENTATIONNS,'style-name'), "")
name = name.replace(".","_") name = name.replace(".","_")
if anchor_type == "paragraph": if anchor_type == "paragraph":
style = "" style = 'position:relative;'
elif anchor_type == 'char': elif anchor_type == 'char':
style = "position: relative;" style = "position:relative;"
elif anchor_type == 'as-char':
htmltag = 'div'
style = ''
else: else:
style = "position: absolute;" style = "position: absolute;"
if attrs.has_key( (SVGNS,"width") ): if attrs.has_key( (SVGNS,"width") ):
@ -566,7 +578,10 @@ class ODF2XHTML(handler.ContentHandler):
style = style + "left:" + attrs[(SVGNS,"x")] + ";" style = style + "left:" + attrs[(SVGNS,"x")] + ";"
if attrs.has_key( (SVGNS,"y") ): if attrs.has_key( (SVGNS,"y") ):
style = style + "top:" + attrs[(SVGNS,"y")] + ";" style = style + "top:" + attrs[(SVGNS,"y")] + ";"
self.opentag('div', {'class': name, 'style': style}) if self.generate_css:
self.opentag(htmltag, {'class': name, 'style': style})
else:
self.opentag(htmltag)
def e_draw_frame(self, tag, attrs): def e_draw_frame(self, tag, attrs):
""" End the <draw:frame> """ End the <draw:frame>
@ -593,8 +608,9 @@ class ODF2XHTML(handler.ContentHandler):
imghref = attrs[(XLINKNS,"href")] imghref = attrs[(XLINKNS,"href")]
imghref = self.rewritelink(imghref) imghref = self.rewritelink(imghref)
htmlattrs = {'alt':"", 'src':imghref } htmlattrs = {'alt':"", 'src':imghref }
if anchor_type != "char": if self.generate_css:
htmlattrs['style'] = "display: block;" if anchor_type != "char":
htmlattrs['style'] = "display: block;"
self.emptytag('img', htmlattrs) self.emptytag('img', htmlattrs)
def s_draw_page(self, tag, attrs): def s_draw_page(self, tag, attrs):
@ -607,7 +623,10 @@ class ODF2XHTML(handler.ContentHandler):
stylename = stylename.replace(".","_") stylename = stylename.replace(".","_")
masterpage = attrs.get( (DRAWNS,'master-page-name'),"") masterpage = attrs.get( (DRAWNS,'master-page-name'),"")
masterpage = masterpage.replace(".","_") masterpage = masterpage.replace(".","_")
self.opentag('fieldset', {'class':"DP-%s MP-%s" % (stylename, masterpage) }) if self.generate_css:
self.opentag('fieldset', {'class':"DP-%s MP-%s" % (stylename, masterpage) })
else:
self.opentag('fieldset')
self.opentag('legend') self.opentag('legend')
self.writeout(escape(name)) self.writeout(escape(name))
self.closetag('legend') self.closetag('legend')
@ -615,17 +634,30 @@ class ODF2XHTML(handler.ContentHandler):
def e_draw_page(self, tag, attrs): def e_draw_page(self, tag, attrs):
self.closetag('fieldset') self.closetag('fieldset')
def s_draw_textbox(self, tag, attrs):
style = ''
if attrs.has_key( (FONS,"min-height") ):
style = style + "min-height:" + attrs[(FONS,"min-height")] + ";"
self.opentag('div')
# self.opentag('div', {'style': style})
def e_draw_textbox(self, tag, attrs):
""" End the <draw:text-box>
"""
self.closetag('div')
def html_body(self, tag, attrs): def html_body(self, tag, attrs):
self.writedata() self.writedata()
self.opentag('style', {'type':"text/css"}, True) if self.generate_css:
self.writeout('/*<![CDATA[*/\n') self.opentag('style', {'type':"text/css"}, True)
self.writeout('\nimg { width: 100%; height: 100%; }\n') self.writeout('/*<![CDATA[*/\n')
self.writeout('* { padding: 0; margin: 0; }\n') self.writeout('\nimg { width: 100%; height: 100%; }\n')
self.writeout('body { margin: 0 1em; }\n') self.writeout('* { padding: 0; margin: 0; background-color:white; }\n')
self.writeout('ol, ul { padding-left: 2em; }\n') self.writeout('body { margin: 0 1em; }\n')
self.generate_stylesheet() self.writeout('ol, ul { padding-left: 2em; }\n')
self.writeout('/*]]>*/\n') self.generate_stylesheet()
self.closetag('style') self.writeout('/*]]>*/\n')
self.closetag('style')
self.purgedata() self.purgedata()
self.closetag('head') self.closetag('head')
self.opentag('body', block=True) self.opentag('body', block=True)
@ -660,7 +692,10 @@ class ODF2XHTML(handler.ContentHandler):
def generate_footnotes(self): def generate_footnotes(self):
if self.currentnote == 0: if self.currentnote == 0:
return return
self.opentag('ol', {'style':'border-top: 1px solid black'}, True) if self.generate_css:
self.opentag('ol', {'style':'border-top: 1px solid black'}, True)
else:
self.opentag('ol')
for key in range(1,self.currentnote+1): for key in range(1,self.currentnote+1):
note = self.notedict[key] note = self.notedict[key]
# for key,note in self.notedict.items(): # for key,note in self.notedict.items():
@ -874,7 +909,7 @@ class ODF2XHTML(handler.ContentHandler):
""" Start a table """ Start a table
""" """
c = attrs.get( (TABLENS,'style-name'), None) c = attrs.get( (TABLENS,'style-name'), None)
if c: if c and self.generate_css:
c = c.replace(".","_") c = c.replace(".","_")
self.opentag('table',{ 'class': "T-%s" % c }) self.opentag('table',{ 'class': "T-%s" % c })
else: else:
@ -958,7 +993,7 @@ class ODF2XHTML(handler.ContentHandler):
for x in range(level + 1,10): for x in range(level + 1,10):
self.headinglevels[x] = 0 self.headinglevels[x] = 0
special = special_styles.get("P-"+name) special = special_styles.get("P-"+name)
if special: if special or not self.generate_css:
self.opentag('h%s' % level) self.opentag('h%s' % level)
else: else:
self.opentag('h%s' % level, {'class':"P-%s" % name }) self.opentag('h%s' % level, {'class':"P-%s" % name })
@ -997,7 +1032,10 @@ class ODF2XHTML(handler.ContentHandler):
# textbox itself may be nested within another list. # textbox itself may be nested within another list.
level = self.tagstack.count_tags(tag) + 1 level = self.tagstack.count_tags(tag) + 1
name = self.tagstack.rfindattr( (TEXTNS,'style-name') ) name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
self.opentag('%s' % self.listtypes.get(name), {'class':"%s_%d" % (name, level) }) if self.generate_css:
self.opentag('%s' % self.listtypes.get(name), {'class':"%s_%d" % (name, level) })
else:
self.opentag('%s' % self.listtypes.get(name))
self.purgedata() self.purgedata()
def e_text_list(self, tag, attrs): def e_text_list(self, tag, attrs):
@ -1113,7 +1151,8 @@ class ODF2XHTML(handler.ContentHandler):
specialtag = special_styles.get("P-"+c) specialtag = special_styles.get("P-"+c)
if specialtag is None: if specialtag is None:
specialtag = 'p' specialtag = 'p'
htmlattrs['class'] = "P-%s" % c if self.generate_css:
htmlattrs['class'] = "P-%s" % c
self.opentag(specialtag, htmlattrs) self.opentag(specialtag, htmlattrs)
self.purgedata() self.purgedata()
@ -1149,7 +1188,7 @@ class ODF2XHTML(handler.ContentHandler):
if c: if c:
c = c.replace(".","_") c = c.replace(".","_")
special = special_styles.get("S-"+c) special = special_styles.get("S-"+c)
if special is None: if special is None and self.generate_css:
htmlattrs['class'] = "S-%s" % c htmlattrs['class'] = "S-%s" % c
self.opentag('span', htmlattrs) self.opentag('span', htmlattrs)
self.purgedata() self.purgedata()
@ -1219,7 +1258,10 @@ class ODF2XHTML(handler.ContentHandler):
# Extract the interesting files # Extract the interesting files
z = zipfile.ZipFile(self._odffile) z = zipfile.ZipFile(self._odffile)
parser = xml.sax.make_parser() # For some reason Trac has trouble when xml.sax.make_parser() is used.
# Could it be because PyXML is installed, and therefore a different parser
# might be chosen? By calling expatreader directly we avoid this issue
parser = expatreader.create_parser()
parser.setFeature(handler.feature_namespaces, 1) parser.setFeature(handler.feature_namespaces, 1)
parser.setContentHandler(self) parser.setContentHandler(self)
parser.setErrorHandler(handler.ErrorHandler()) parser.setErrorHandler(handler.ErrorHandler())

View File

@ -287,7 +287,7 @@ class OpenDocument:
else: else:
ext = mimetypes.guess_extension(mediatype) ext = mimetypes.guess_extension(mediatype)
manifestfn = "Pictures/%0.0f%s" % ((time.time()*10000000000), ext) manifestfn = "Pictures/%0.0f%s" % ((time.time()*10000000000), ext)
self.Pictures[manifestfn] = (IS_FILENAME, fileobj, mediatype) self.Pictures[manifestfn] = (IS_FILENAME, filename, mediatype)
else: else:
manifestfn = filename manifestfn = filename
self.Pictures[manifestfn] = (IS_IMAGE, content, mediatype) self.Pictures[manifestfn] = (IS_IMAGE, content, mediatype)