mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Update odfpy for improved handling of ODT documents
This commit is contained in:
parent
d92e8f4d1f
commit
b3ac2ca6f3
@ -20,9 +20,10 @@
|
|||||||
TOOLSVERSION = u"ODFPY/0.8.1dev"
|
TOOLSVERSION = u"ODFPY/0.8.1dev"
|
||||||
|
|
||||||
ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"
|
ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"
|
||||||
|
DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0"
|
||||||
CHARTNS = u"urn:oasis:names:tc:opendocument:xmlns:chart:1.0"
|
CHARTNS = u"urn:oasis:names:tc:opendocument:xmlns:chart:1.0"
|
||||||
CONFIGNS = u"urn:oasis:names:tc:opendocument:xmlns:config:1.0"
|
CONFIGNS = u"urn:oasis:names:tc:opendocument:xmlns:config:1.0"
|
||||||
DBNS = u"http://openoffice.org/2004/database"
|
#DBNS = u"http://openoffice.org/2004/database"
|
||||||
DCNS = u"http://purl.org/dc/elements/1.1/"
|
DCNS = u"http://purl.org/dc/elements/1.1/"
|
||||||
DOMNS = u"http://www.w3.org/2001/xml-events"
|
DOMNS = u"http://www.w3.org/2001/xml-events"
|
||||||
DR3DNS = u"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
|
DR3DNS = u"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
|
||||||
@ -39,6 +40,7 @@ OOONS = u"http://openoffice.org/2004/office"
|
|||||||
OOOWNS = u"http://openoffice.org/2004/writer"
|
OOOWNS = u"http://openoffice.org/2004/writer"
|
||||||
OOOCNS = u"http://openoffice.org/2004/calc"
|
OOOCNS = u"http://openoffice.org/2004/calc"
|
||||||
PRESENTATIONNS = u"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0"
|
PRESENTATIONNS = u"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0"
|
||||||
|
RDFANS = u"http://docs.oasis-open.org/opendocument/meta/rdfa#"
|
||||||
SCRIPTNS = u"urn:oasis:names:tc:opendocument:xmlns:script:1.0"
|
SCRIPTNS = u"urn:oasis:names:tc:opendocument:xmlns:script:1.0"
|
||||||
SMILNS = u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0"
|
SMILNS = u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0"
|
||||||
STYLENS = u"urn:oasis:names:tc:opendocument:xmlns:style:1.0"
|
STYLENS = u"urn:oasis:names:tc:opendocument:xmlns:style:1.0"
|
||||||
@ -47,6 +49,7 @@ TABLENS = u"urn:oasis:names:tc:opendocument:xmlns:table:1.0"
|
|||||||
TEXTNS = u"urn:oasis:names:tc:opendocument:xmlns:text:1.0"
|
TEXTNS = u"urn:oasis:names:tc:opendocument:xmlns:text:1.0"
|
||||||
XFORMSNS = u"http://www.w3.org/2002/xforms"
|
XFORMSNS = u"http://www.w3.org/2002/xforms"
|
||||||
XLINKNS = u"http://www.w3.org/1999/xlink"
|
XLINKNS = u"http://www.w3.org/1999/xlink"
|
||||||
|
XMLNS = "http://www.w3.org/XML/1998/namespace"
|
||||||
|
|
||||||
|
|
||||||
nsdict = {
|
nsdict = {
|
||||||
@ -70,6 +73,7 @@ nsdict = {
|
|||||||
OOOWNS: u'ooow',
|
OOOWNS: u'ooow',
|
||||||
OOOCNS: u'ooc',
|
OOOCNS: u'ooc',
|
||||||
PRESENTATIONNS: u'presentation',
|
PRESENTATIONNS: u'presentation',
|
||||||
|
RDFANS: u'rdfa',
|
||||||
SCRIPTNS: u'script',
|
SCRIPTNS: u'script',
|
||||||
SMILNS: u'smil',
|
SMILNS: u'smil',
|
||||||
STYLENS: u'style',
|
STYLENS: u'style',
|
||||||
@ -78,4 +82,5 @@ nsdict = {
|
|||||||
TEXTNS: u'text',
|
TEXTNS: u'text',
|
||||||
XFORMSNS: u'xforms',
|
XFORMSNS: u'xforms',
|
||||||
XLINKNS: u'xlink',
|
XLINKNS: u'xlink',
|
||||||
|
XMLNS: u'xml',
|
||||||
}
|
}
|
||||||
|
@ -22,7 +22,7 @@
|
|||||||
#pdb.set_trace()
|
#pdb.set_trace()
|
||||||
import zipfile
|
import zipfile
|
||||||
import xml.sax
|
import xml.sax
|
||||||
from xml.sax import handler
|
from xml.sax import handler, expatreader
|
||||||
from xml.sax.xmlreader import InputSource
|
from xml.sax.xmlreader import InputSource
|
||||||
from xml.sax.saxutils import escape, quoteattr
|
from xml.sax.saxutils import escape, quoteattr
|
||||||
|
|
||||||
@ -206,10 +206,10 @@ class StyleToCSS:
|
|||||||
if hpos == "center":
|
if hpos == "center":
|
||||||
sdict['margin-left'] = "auto"
|
sdict['margin-left'] = "auto"
|
||||||
sdict['margin-right'] = "auto"
|
sdict['margin-right'] = "auto"
|
||||||
else:
|
# else:
|
||||||
# force it to be *something* then delete it
|
# # force it to be *something* then delete it
|
||||||
sdict['margin-left'] = sdict['margin-right'] = ''
|
# sdict['margin-left'] = sdict['margin-right'] = ''
|
||||||
del sdict['margin-left'], sdict['margin-right']
|
# del sdict['margin-left'], sdict['margin-right']
|
||||||
|
|
||||||
if hpos in ("right","outside"):
|
if hpos in ("right","outside"):
|
||||||
if wrap in ( "left", "parallel","dynamic"):
|
if wrap in ( "left", "parallel","dynamic"):
|
||||||
@ -336,8 +336,9 @@ special_styles = {
|
|||||||
class ODF2XHTML(handler.ContentHandler):
|
class ODF2XHTML(handler.ContentHandler):
|
||||||
""" The ODF2XHTML parses an ODF file and produces XHTML"""
|
""" The ODF2XHTML parses an ODF file and produces XHTML"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, generate_css=True, embedable=False):
|
||||||
# Tags
|
# Tags
|
||||||
|
self.generate_css = generate_css
|
||||||
self.elements = {
|
self.elements = {
|
||||||
(DCNS, 'title'): (self.s_processcont, self.e_dc_title),
|
(DCNS, 'title'): (self.s_processcont, self.e_dc_title),
|
||||||
(DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage),
|
(DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage),
|
||||||
@ -349,6 +350,7 @@ class ODF2XHTML(handler.ContentHandler):
|
|||||||
(DRAWNS, 'fill-image'): (self.s_draw_fill_image, None),
|
(DRAWNS, 'fill-image'): (self.s_draw_fill_image, None),
|
||||||
(DRAWNS, "layer-set"):(self.s_ignorexml, None),
|
(DRAWNS, "layer-set"):(self.s_ignorexml, None),
|
||||||
(DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page),
|
(DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page),
|
||||||
|
(DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox),
|
||||||
(METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag),
|
(METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag),
|
||||||
(METANS, 'generator'):(self.s_processcont, self.e_dc_metatag),
|
(METANS, 'generator'):(self.s_processcont, self.e_dc_metatag),
|
||||||
(METANS, 'initial-creator'): (self.s_processcont, self.e_dc_metatag),
|
(METANS, 'initial-creator'): (self.s_processcont, self.e_dc_metatag),
|
||||||
@ -421,6 +423,12 @@ class ODF2XHTML(handler.ContentHandler):
|
|||||||
(TEXTNS, "table-of-content-source"):(self.s_text_x_source, self.e_text_x_source),
|
(TEXTNS, "table-of-content-source"):(self.s_text_x_source, self.e_text_x_source),
|
||||||
(TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
|
(TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
|
||||||
}
|
}
|
||||||
|
if embedable:
|
||||||
|
self.elements[(OFFICENS, u"text")] = (None,None)
|
||||||
|
self.elements[(OFFICENS, u"spreadsheet")] = (None,None)
|
||||||
|
self.elements[(OFFICENS, u"presentation")] = (None,None)
|
||||||
|
self.elements[(OFFICENS, u"document-content")] = (None,None)
|
||||||
|
|
||||||
|
|
||||||
def writeout(self, s):
|
def writeout(self, s):
|
||||||
if s != '':
|
if s != '':
|
||||||
@ -548,14 +556,18 @@ class ODF2XHTML(handler.ContentHandler):
|
|||||||
""" A <draw:frame> is made into a <div> in HTML which is then styled
|
""" A <draw:frame> is made into a <div> in HTML which is then styled
|
||||||
"""
|
"""
|
||||||
anchor_type = attrs.get((TEXTNS,'anchor-type'),'char')
|
anchor_type = attrs.get((TEXTNS,'anchor-type'),'char')
|
||||||
|
htmltag = 'div'
|
||||||
name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
|
name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
|
||||||
if name == 'G-':
|
if name == 'G-':
|
||||||
name = "PR-" + attrs.get( (PRESENTATIONNS,'style-name'), "")
|
name = "PR-" + attrs.get( (PRESENTATIONNS,'style-name'), "")
|
||||||
name = name.replace(".","_")
|
name = name.replace(".","_")
|
||||||
if anchor_type == "paragraph":
|
if anchor_type == "paragraph":
|
||||||
style = ""
|
style = 'position:relative;'
|
||||||
elif anchor_type == 'char':
|
elif anchor_type == 'char':
|
||||||
style = "position: relative;"
|
style = "position:relative;"
|
||||||
|
elif anchor_type == 'as-char':
|
||||||
|
htmltag = 'div'
|
||||||
|
style = ''
|
||||||
else:
|
else:
|
||||||
style = "position: absolute;"
|
style = "position: absolute;"
|
||||||
if attrs.has_key( (SVGNS,"width") ):
|
if attrs.has_key( (SVGNS,"width") ):
|
||||||
@ -566,7 +578,10 @@ class ODF2XHTML(handler.ContentHandler):
|
|||||||
style = style + "left:" + attrs[(SVGNS,"x")] + ";"
|
style = style + "left:" + attrs[(SVGNS,"x")] + ";"
|
||||||
if attrs.has_key( (SVGNS,"y") ):
|
if attrs.has_key( (SVGNS,"y") ):
|
||||||
style = style + "top:" + attrs[(SVGNS,"y")] + ";"
|
style = style + "top:" + attrs[(SVGNS,"y")] + ";"
|
||||||
self.opentag('div', {'class': name, 'style': style})
|
if self.generate_css:
|
||||||
|
self.opentag(htmltag, {'class': name, 'style': style})
|
||||||
|
else:
|
||||||
|
self.opentag(htmltag)
|
||||||
|
|
||||||
def e_draw_frame(self, tag, attrs):
|
def e_draw_frame(self, tag, attrs):
|
||||||
""" End the <draw:frame>
|
""" End the <draw:frame>
|
||||||
@ -593,8 +608,9 @@ class ODF2XHTML(handler.ContentHandler):
|
|||||||
imghref = attrs[(XLINKNS,"href")]
|
imghref = attrs[(XLINKNS,"href")]
|
||||||
imghref = self.rewritelink(imghref)
|
imghref = self.rewritelink(imghref)
|
||||||
htmlattrs = {'alt':"", 'src':imghref }
|
htmlattrs = {'alt':"", 'src':imghref }
|
||||||
if anchor_type != "char":
|
if self.generate_css:
|
||||||
htmlattrs['style'] = "display: block;"
|
if anchor_type != "char":
|
||||||
|
htmlattrs['style'] = "display: block;"
|
||||||
self.emptytag('img', htmlattrs)
|
self.emptytag('img', htmlattrs)
|
||||||
|
|
||||||
def s_draw_page(self, tag, attrs):
|
def s_draw_page(self, tag, attrs):
|
||||||
@ -607,7 +623,10 @@ class ODF2XHTML(handler.ContentHandler):
|
|||||||
stylename = stylename.replace(".","_")
|
stylename = stylename.replace(".","_")
|
||||||
masterpage = attrs.get( (DRAWNS,'master-page-name'),"")
|
masterpage = attrs.get( (DRAWNS,'master-page-name'),"")
|
||||||
masterpage = masterpage.replace(".","_")
|
masterpage = masterpage.replace(".","_")
|
||||||
self.opentag('fieldset', {'class':"DP-%s MP-%s" % (stylename, masterpage) })
|
if self.generate_css:
|
||||||
|
self.opentag('fieldset', {'class':"DP-%s MP-%s" % (stylename, masterpage) })
|
||||||
|
else:
|
||||||
|
self.opentag('fieldset')
|
||||||
self.opentag('legend')
|
self.opentag('legend')
|
||||||
self.writeout(escape(name))
|
self.writeout(escape(name))
|
||||||
self.closetag('legend')
|
self.closetag('legend')
|
||||||
@ -615,17 +634,30 @@ class ODF2XHTML(handler.ContentHandler):
|
|||||||
def e_draw_page(self, tag, attrs):
|
def e_draw_page(self, tag, attrs):
|
||||||
self.closetag('fieldset')
|
self.closetag('fieldset')
|
||||||
|
|
||||||
|
def s_draw_textbox(self, tag, attrs):
|
||||||
|
style = ''
|
||||||
|
if attrs.has_key( (FONS,"min-height") ):
|
||||||
|
style = style + "min-height:" + attrs[(FONS,"min-height")] + ";"
|
||||||
|
self.opentag('div')
|
||||||
|
# self.opentag('div', {'style': style})
|
||||||
|
|
||||||
|
def e_draw_textbox(self, tag, attrs):
|
||||||
|
""" End the <draw:text-box>
|
||||||
|
"""
|
||||||
|
self.closetag('div')
|
||||||
|
|
||||||
def html_body(self, tag, attrs):
|
def html_body(self, tag, attrs):
|
||||||
self.writedata()
|
self.writedata()
|
||||||
self.opentag('style', {'type':"text/css"}, True)
|
if self.generate_css:
|
||||||
self.writeout('/*<![CDATA[*/\n')
|
self.opentag('style', {'type':"text/css"}, True)
|
||||||
self.writeout('\nimg { width: 100%; height: 100%; }\n')
|
self.writeout('/*<![CDATA[*/\n')
|
||||||
self.writeout('* { padding: 0; margin: 0; }\n')
|
self.writeout('\nimg { width: 100%; height: 100%; }\n')
|
||||||
self.writeout('body { margin: 0 1em; }\n')
|
self.writeout('* { padding: 0; margin: 0; background-color:white; }\n')
|
||||||
self.writeout('ol, ul { padding-left: 2em; }\n')
|
self.writeout('body { margin: 0 1em; }\n')
|
||||||
self.generate_stylesheet()
|
self.writeout('ol, ul { padding-left: 2em; }\n')
|
||||||
self.writeout('/*]]>*/\n')
|
self.generate_stylesheet()
|
||||||
self.closetag('style')
|
self.writeout('/*]]>*/\n')
|
||||||
|
self.closetag('style')
|
||||||
self.purgedata()
|
self.purgedata()
|
||||||
self.closetag('head')
|
self.closetag('head')
|
||||||
self.opentag('body', block=True)
|
self.opentag('body', block=True)
|
||||||
@ -660,7 +692,10 @@ class ODF2XHTML(handler.ContentHandler):
|
|||||||
def generate_footnotes(self):
|
def generate_footnotes(self):
|
||||||
if self.currentnote == 0:
|
if self.currentnote == 0:
|
||||||
return
|
return
|
||||||
self.opentag('ol', {'style':'border-top: 1px solid black'}, True)
|
if self.generate_css:
|
||||||
|
self.opentag('ol', {'style':'border-top: 1px solid black'}, True)
|
||||||
|
else:
|
||||||
|
self.opentag('ol')
|
||||||
for key in range(1,self.currentnote+1):
|
for key in range(1,self.currentnote+1):
|
||||||
note = self.notedict[key]
|
note = self.notedict[key]
|
||||||
# for key,note in self.notedict.items():
|
# for key,note in self.notedict.items():
|
||||||
@ -874,7 +909,7 @@ class ODF2XHTML(handler.ContentHandler):
|
|||||||
""" Start a table
|
""" Start a table
|
||||||
"""
|
"""
|
||||||
c = attrs.get( (TABLENS,'style-name'), None)
|
c = attrs.get( (TABLENS,'style-name'), None)
|
||||||
if c:
|
if c and self.generate_css:
|
||||||
c = c.replace(".","_")
|
c = c.replace(".","_")
|
||||||
self.opentag('table',{ 'class': "T-%s" % c })
|
self.opentag('table',{ 'class': "T-%s" % c })
|
||||||
else:
|
else:
|
||||||
@ -958,7 +993,7 @@ class ODF2XHTML(handler.ContentHandler):
|
|||||||
for x in range(level + 1,10):
|
for x in range(level + 1,10):
|
||||||
self.headinglevels[x] = 0
|
self.headinglevels[x] = 0
|
||||||
special = special_styles.get("P-"+name)
|
special = special_styles.get("P-"+name)
|
||||||
if special:
|
if special or not self.generate_css:
|
||||||
self.opentag('h%s' % level)
|
self.opentag('h%s' % level)
|
||||||
else:
|
else:
|
||||||
self.opentag('h%s' % level, {'class':"P-%s" % name })
|
self.opentag('h%s' % level, {'class':"P-%s" % name })
|
||||||
@ -997,7 +1032,10 @@ class ODF2XHTML(handler.ContentHandler):
|
|||||||
# textbox itself may be nested within another list.
|
# textbox itself may be nested within another list.
|
||||||
level = self.tagstack.count_tags(tag) + 1
|
level = self.tagstack.count_tags(tag) + 1
|
||||||
name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
|
name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
|
||||||
self.opentag('%s' % self.listtypes.get(name), {'class':"%s_%d" % (name, level) })
|
if self.generate_css:
|
||||||
|
self.opentag('%s' % self.listtypes.get(name), {'class':"%s_%d" % (name, level) })
|
||||||
|
else:
|
||||||
|
self.opentag('%s' % self.listtypes.get(name))
|
||||||
self.purgedata()
|
self.purgedata()
|
||||||
|
|
||||||
def e_text_list(self, tag, attrs):
|
def e_text_list(self, tag, attrs):
|
||||||
@ -1113,7 +1151,8 @@ class ODF2XHTML(handler.ContentHandler):
|
|||||||
specialtag = special_styles.get("P-"+c)
|
specialtag = special_styles.get("P-"+c)
|
||||||
if specialtag is None:
|
if specialtag is None:
|
||||||
specialtag = 'p'
|
specialtag = 'p'
|
||||||
htmlattrs['class'] = "P-%s" % c
|
if self.generate_css:
|
||||||
|
htmlattrs['class'] = "P-%s" % c
|
||||||
self.opentag(specialtag, htmlattrs)
|
self.opentag(specialtag, htmlattrs)
|
||||||
self.purgedata()
|
self.purgedata()
|
||||||
|
|
||||||
@ -1149,7 +1188,7 @@ class ODF2XHTML(handler.ContentHandler):
|
|||||||
if c:
|
if c:
|
||||||
c = c.replace(".","_")
|
c = c.replace(".","_")
|
||||||
special = special_styles.get("S-"+c)
|
special = special_styles.get("S-"+c)
|
||||||
if special is None:
|
if special is None and self.generate_css:
|
||||||
htmlattrs['class'] = "S-%s" % c
|
htmlattrs['class'] = "S-%s" % c
|
||||||
self.opentag('span', htmlattrs)
|
self.opentag('span', htmlattrs)
|
||||||
self.purgedata()
|
self.purgedata()
|
||||||
@ -1219,7 +1258,10 @@ class ODF2XHTML(handler.ContentHandler):
|
|||||||
# Extract the interesting files
|
# Extract the interesting files
|
||||||
z = zipfile.ZipFile(self._odffile)
|
z = zipfile.ZipFile(self._odffile)
|
||||||
|
|
||||||
parser = xml.sax.make_parser()
|
# For some reason Trac has trouble when xml.sax.make_parser() is used.
|
||||||
|
# Could it be because PyXML is installed, and therefore a different parser
|
||||||
|
# might be chosen? By calling expatreader directly we avoid this issue
|
||||||
|
parser = expatreader.create_parser()
|
||||||
parser.setFeature(handler.feature_namespaces, 1)
|
parser.setFeature(handler.feature_namespaces, 1)
|
||||||
parser.setContentHandler(self)
|
parser.setContentHandler(self)
|
||||||
parser.setErrorHandler(handler.ErrorHandler())
|
parser.setErrorHandler(handler.ErrorHandler())
|
||||||
|
@ -287,7 +287,7 @@ class OpenDocument:
|
|||||||
else:
|
else:
|
||||||
ext = mimetypes.guess_extension(mediatype)
|
ext = mimetypes.guess_extension(mediatype)
|
||||||
manifestfn = "Pictures/%0.0f%s" % ((time.time()*10000000000), ext)
|
manifestfn = "Pictures/%0.0f%s" % ((time.time()*10000000000), ext)
|
||||||
self.Pictures[manifestfn] = (IS_FILENAME, fileobj, mediatype)
|
self.Pictures[manifestfn] = (IS_FILENAME, filename, mediatype)
|
||||||
else:
|
else:
|
||||||
manifestfn = filename
|
manifestfn = filename
|
||||||
self.Pictures[manifestfn] = (IS_IMAGE, content, mediatype)
|
self.Pictures[manifestfn] = (IS_IMAGE, content, mediatype)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user