diff --git a/src/odf/attrconverters.py b/src/odf/attrconverters.py
index 0117324bba..b75f80a2dd 100644
--- a/src/odf/attrconverters.py
+++ b/src/odf/attrconverters.py
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
-# Copyright (C) 2006-2008 Søren Roug, European Environment Agency
+# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -40,6 +40,9 @@ def cnv_boolean(attribute, arg, element):
# Potentially accept color values
def cnv_color(attribute, arg, element):
+ """ A RGB color in conformance with §5.9.11 of [XSL], that is a RGB color in notation “#rrggbb”, where
+ rr, gg and bb are 8-bit hexadecimal digits.
+ """
return str(arg)
def cnv_configtype(attribute, arg, element):
@@ -55,9 +58,15 @@ def cnv_data_source_has_labels(attribute, arg, element):
# Understand different date formats
def cnv_date(attribute, arg, element):
+ """ A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime
+ value.
+ """
return str(arg)
def cnv_dateTime(attribute, arg, element):
+ """ A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime
+ value.
+ """
return str(arg)
def cnv_double(attribute, arg, element):
@@ -67,11 +76,31 @@ def cnv_duration(attribute, arg, element):
return str(arg)
def cnv_family(attribute, arg, element):
+ """ A style family """
if str(arg) not in ("text", "paragraph", "section", "ruby", "table", "table-column", "table-row", "table-cell",
"graphic", "presentation", "drawing-page", "chart"):
raise ValueError, "'%s' not allowed" % str(arg)
return str(arg)
+def __save_prefix(attribute, arg, element):
+ prefix = arg.split(':',1)[0]
+ if prefix == arg:
+ return unicode(arg)
+ namespace = element.get_knownns(prefix)
+ if namespace is None:
+ #raise ValueError, "'%s' is an unknown prefix" % str(prefix)
+ return unicode(arg)
+ p = element.get_nsprefix(namespace)
+ return unicode(arg)
+
+def cnv_formula(attribute, arg, element):
+ """ A string containing a formula. Formulas do not have a predefined syntax, but the string should
+ begin with a namespace prefix, followed by a “:” (COLON, U+003A) separator, followed by the text
+ of the formula. The namespace bound to the prefix determines the syntax and semantics of the
+ formula.
+ """
+ return __save_prefix(attribute, arg, element)
+
def cnv_ID(attribute, arg, element):
return str(arg)
@@ -89,6 +118,9 @@ def cnv_legend_position(attribute, arg, element):
pattern_length = re.compile(r'-?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px))')
def cnv_length(attribute, arg, element):
+ """ A (positive or negative) physical length, consisting of magnitude and unit, in conformance with the
+ Units of Measure defined in §5.9.13 of [XSL].
+ """
global pattern_length
if not pattern_length.match(arg):
raise ValueError, "'%s' is not a valid length" % arg
@@ -120,12 +152,12 @@ def cnv_namespacedToken(attribute, arg, element):
if not pattern_namespacedToken.match(arg):
raise ValueError, "'%s' is not a valid namespaced token" % arg
- return arg
+ return __save_prefix(attribute, arg, element)
-# Must accept string as argument
-# NCName is defined in http://www.w3.org/TR/REC-xml-names/#NT-NCName
-# Essentially an XML name minus ':'
def cnv_NCName(attribute, arg, element):
+ """ NCName is defined in http://www.w3.org/TR/REC-xml-names/#NT-NCName
+ Essentially an XML name minus ':'
+ """
if type(arg) in types.StringTypes:
return make_NCName(arg)
else:
@@ -226,6 +258,7 @@ attrconverters = {
((ANIMNS,u'name'), None): cnv_string,
((ANIMNS,u'sub-item'), None): cnv_string,
((ANIMNS,u'value'), None): cnv_string,
+# ((DBNS,u'type'), None): cnv_namespacedToken,
((CHARTNS,u'attached-axis'), None): cnv_string,
((CHARTNS,u'class'), (CHARTNS,u'grid')): cnv_major_minor,
((CHARTNS,u'class'), None): cnv_namespacedToken,
@@ -288,7 +321,7 @@ attrconverters = {
((CHARTNS,u'values-cell-range-address'), None): cnv_string,
((CHARTNS,u'vertical'), None): cnv_boolean,
((CHARTNS,u'visible'), None): cnv_boolean,
- ((CONFIGNS,u'name'), None): cnv_string,
+ ((CONFIGNS,u'name'), None): cnv_formula,
((CONFIGNS,u'type'), None): cnv_configtype,
((DR3DNS,u'ambient-color'), None): cnv_string,
((DR3DNS,u'back-scale'), None): cnv_string,
@@ -369,11 +402,11 @@ attrconverters = {
((DRAWNS,u'decimal-places'), None): cnv_string,
((DRAWNS,u'display'), None): cnv_string,
((DRAWNS,u'display-name'), None): cnv_string,
- ((DRAWNS,u'distance'), None): cnv_string,
+ ((DRAWNS,u'distance'), None): cnv_lengthorpercent,
((DRAWNS,u'dots1'), None): cnv_integer,
- ((DRAWNS,u'dots1-length'), None): cnv_length,
+ ((DRAWNS,u'dots1-length'), None): cnv_lengthorpercent,
((DRAWNS,u'dots2'), None): cnv_integer,
- ((DRAWNS,u'dots2-length'), None): cnv_length,
+ ((DRAWNS,u'dots2-length'), None): cnv_lengthorpercent,
((DRAWNS,u'end-angle'), None): cnv_double,
((DRAWNS,u'end'), None): cnv_string,
((DRAWNS,u'end-color'), None): cnv_string,
@@ -383,7 +416,7 @@ attrconverters = {
((DRAWNS,u'end-line-spacing-horizontal'), None): cnv_string,
((DRAWNS,u'end-line-spacing-vertical'), None): cnv_string,
((DRAWNS,u'end-shape'), None): cnv_IDREF,
- ((DRAWNS,u'engine'), None): cnv_string,
+ ((DRAWNS,u'engine'), None): cnv_namespacedToken,
((DRAWNS,u'enhanced-path'), None): cnv_string,
((DRAWNS,u'escape-direction'), None): cnv_string,
((DRAWNS,u'extrusion-allowed'), None): cnv_boolean,
@@ -604,7 +637,7 @@ attrconverters = {
((FORMNS,u'button-type'), None): cnv_string,
((FORMNS,u'command'), None): cnv_string,
((FORMNS,u'command-type'), None): cnv_string,
- ((FORMNS,u'control-implementation'), None): cnv_string,
+ ((FORMNS,u'control-implementation'), None): cnv_namespacedToken,
((FORMNS,u'convert-empty-to-null'), None): cnv_boolean,
((FORMNS,u'current-selected'), None): cnv_boolean,
((FORMNS,u'current-state'), None): cnv_string,
@@ -800,8 +833,8 @@ attrconverters = {
((PRESENTATIONNS,u'user-transformed'), None): cnv_boolean,
((PRESENTATIONNS,u'verb'), None): cnv_nonNegativeInteger,
((PRESENTATIONNS,u'visibility'), None): cnv_string,
- ((SCRIPTNS,u'event-name'), None): cnv_string,
- ((SCRIPTNS,u'language'), None): cnv_string,
+ ((SCRIPTNS,u'event-name'), None): cnv_formula,
+ ((SCRIPTNS,u'language'), None): cnv_formula,
((SCRIPTNS,u'macro-name'), None): cnv_string,
((SMILNS,u'accelerate'), None): cnv_double,
((SMILNS,u'accumulate'), None): cnv_string,
@@ -1087,7 +1120,7 @@ attrconverters = {
((SVGNS,u'y2'), None): cnv_lengthorpercent,
((TABLENS,u'acceptance-state'), None): cnv_string,
((TABLENS,u'add-empty-lines'), None): cnv_boolean,
- ((TABLENS,u'algorithm'), None): cnv_string,
+ ((TABLENS,u'algorithm'), None): cnv_formula,
((TABLENS,u'align'), None): cnv_string,
((TABLENS,u'allow-empty-cell'), None): cnv_boolean,
((TABLENS,u'application-data'), None): cnv_string,
@@ -1106,7 +1139,7 @@ attrconverters = {
((TABLENS,u'cell-range'), None): cnv_string,
((TABLENS,u'column'), None): cnv_integer,
((TABLENS,u'comment'), None): cnv_string,
- ((TABLENS,u'condition'), None): cnv_string,
+ ((TABLENS,u'condition'), None): cnv_formula,
((TABLENS,u'condition-source'), None): cnv_string,
((TABLENS,u'condition-source-range-address'), None): cnv_string,
((TABLENS,u'contains-error'), None): cnv_boolean,
@@ -1144,13 +1177,13 @@ attrconverters = {
((TABLENS,u'end-x'), None): cnv_length,
((TABLENS,u'end-y'), None): cnv_length,
((TABLENS,u'execute'), None): cnv_boolean,
- ((TABLENS,u'expression'), None): cnv_string,
+ ((TABLENS,u'expression'), None): cnv_formula,
((TABLENS,u'field-name'), None): cnv_string,
((TABLENS,u'field-number'), None): cnv_nonNegativeInteger,
((TABLENS,u'field-number'), None): cnv_string,
((TABLENS,u'filter-name'), None): cnv_string,
((TABLENS,u'filter-options'), None): cnv_string,
- ((TABLENS,u'formula'), None): cnv_string,
+ ((TABLENS,u'formula'), None): cnv_formula,
((TABLENS,u'function'), None): cnv_string,
((TABLENS,u'function'), None): cnv_string,
((TABLENS,u'grand-total'), None): cnv_string,
@@ -1290,7 +1323,7 @@ attrconverters = {
((TEXTNS,u'combine-entries-with-pp'), None): cnv_boolean,
((TEXTNS,u'comma-separated'), None): cnv_boolean,
((TEXTNS,u'cond-style-name'), None): cnv_StyleNameRef,
- ((TEXTNS,u'condition'), None): cnv_string,
+ ((TEXTNS,u'condition'), None): cnv_formula,
((TEXTNS,u'connection-name'), None): cnv_string,
((TEXTNS,u'consecutive-numbering'), None): cnv_boolean,
((TEXTNS,u'continue-numbering'), None): cnv_boolean,
@@ -1321,7 +1354,7 @@ attrconverters = {
((TEXTNS,u'first-row-start-column'), None): cnv_string,
((TEXTNS,u'fixed'), None): cnv_boolean,
((TEXTNS,u'footnotes-position'), None): cnv_string,
- ((TEXTNS,u'formula'), None): cnv_string,
+ ((TEXTNS,u'formula'), None): cnv_formula,
((TEXTNS,u'global'), None): cnv_boolean,
((TEXTNS,u'howpublished'), None): cnv_string,
((TEXTNS,u'id'), None): cnv_ID,
@@ -1437,7 +1470,10 @@ attrconverters = {
class AttrConverters:
def convert(self, attribute, value, element):
- conversion = attrconverters.get((attribute,element), None)
+ """ Based on the element, figures out how to check/convert the attribute value
+ All values are converted to string
+ """
+ conversion = attrconverters.get((attribute, element.qname), None)
if conversion is not None:
return conversion(attribute, value, element)
else:
diff --git a/src/odf/element.py b/src/odf/element.py
index f0938ba53e..aad698045e 100644
--- a/src/odf/element.py
+++ b/src/odf/element.py
@@ -1,6 +1,6 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
-# Copyright (C) 2007-2008 Søren Roug, European Environment Agency
+# Copyright (C) 2007-2010 Søren Roug, European Environment Agency
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -112,6 +112,9 @@ class Node(xml.dom.Node):
return self.childNodes[-1]
def insertBefore(self, newChild, refChild):
+ """ Inserts the node newChild before the existing child node refChild.
+ If refChild is null, insert newChild at the end of the list of children.
+ """
if newChild.nodeType not in self._child_node_types:
raise IllegalChild, "%s cannot be child of %s" % (newChild.tagName, self.tagName)
if newChild.parentNode is not None:
@@ -135,21 +138,26 @@ class Node(xml.dom.Node):
newChild.parentNode = self
return newChild
- def appendChild(self, node):
- if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
- for c in tuple(node.childNodes):
+ def appendChild(self, newChild):
+ """ Adds the node newChild to the end of the list of children of this node.
+ If the newChild is already in the tree, it is first removed.
+ """
+ if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
+ for c in tuple(newChild.childNodes):
self.appendChild(c)
### The DOM does not clearly specify what to return in this case
- return node
- if node.nodeType not in self._child_node_types:
- raise IllegalChild, "<%s> is not allowed in %s" % ( node.tagName, self.tagName)
- if node.parentNode is not None:
- node.parentNode.removeChild(node)
- _append_child(self, node)
- node.nextSibling = None
- return node
+ return newChild
+ if newChild.nodeType not in self._child_node_types:
+ raise IllegalChild, "<%s> is not allowed in %s" % ( newChild.tagName, self.tagName)
+ if newChild.parentNode is not None:
+ newChild.parentNode.removeChild(newChild)
+ _append_child(self, newChild)
+ newChild.nextSibling = None
+ return newChild
def removeChild(self, oldChild):
+ """ Removes the child node indicated by oldChild from the list of children, and returns it.
+ """
#FIXME: update ownerDocument.element_dict or find other solution
try:
self.childNodes.remove(oldChild)
@@ -191,8 +199,8 @@ def _append_child(self, node):
node.__dict__["parentNode"] = self
class Childless:
- """Mixin that makes childless-ness easy to implement and avoids
- the complexity of the Node methods that deal with children.
+ """ Mixin that makes childless-ness easy to implement and avoids
+ the complexity of the Node methods that deal with children.
"""
attributes = None
@@ -207,6 +215,7 @@ class Childless:
return None
def appendChild(self, node):
+ """ Raises an error """
raise xml.dom.HierarchyRequestErr(
self.tagName + " nodes cannot have children")
@@ -214,14 +223,17 @@ class Childless:
return False
def insertBefore(self, newChild, refChild):
+ """ Raises an error """
raise xml.dom.HierarchyRequestErr(
self.tagName + " nodes do not have children")
def removeChild(self, oldChild):
+ """ Raises an error """
raise xml.dom.NotFoundErr(
self.tagName + " nodes do not have children")
def replaceChild(self, newChild, oldChild):
+ """ Raises an error """
raise xml.dom.HierarchyRequestErr(
self.tagName + " nodes do not have children")
@@ -247,8 +259,12 @@ class CDATASection(Childless, Text):
nodeType = Node.CDATA_SECTION_NODE
def toXml(self,level,f):
+ """ Generate XML output of the node. If the text contains "]]>", then
+ escape it by going out of CDATA mode (]]>), then write the string
+ and then go into CDATA mode again. (' % self.data)
+ f.write('' % self.data.replace(']]>',']]>]]>" % (r[1].lower().replace('-',''), self.tagName)
+ def get_knownns(self, prefix):
+ """ Odfpy maintains a list of known namespaces. In some cases a prefix is used, and
+ we need to know which namespace it resolves to.
+ """
+ global nsdict
+ for ns,p in nsdict.items():
+ if p == prefix: return ns
+ return None
+
def get_nsprefix(self, namespace):
+ """ Odfpy maintains a list of known namespaces. In some cases we have a namespace URL,
+ and needs to look up or assign the prefix for it.
+ """
if namespace is None: namespace = ""
prefix = _nsassign(namespace)
if not self.namespaces.has_key(namespace):
@@ -339,6 +367,9 @@ class Element(Node):
self.ownerDocument.rebuild_caches(element)
def addText(self, text, check_grammar=True):
+ """ Adds text to an element
+ Setting check_grammar=False turns off grammar checking
+ """
if check_grammar and self.qname not in grammar.allows_text:
raise IllegalText, "The <%s> element does not allow text" % self.tagName
else:
@@ -346,6 +377,9 @@ class Element(Node):
self.appendChild(Text(text))
def addCDATA(self, cdata, check_grammar=True):
+ """ Adds CDATA to an element
+ Setting check_grammar=False turns off grammar checking
+ """
if check_grammar and self.qname not in grammar.allows_text:
raise IllegalText, "The <%s> element does not allow text" % self.tagName
else:
@@ -403,17 +437,18 @@ class Element(Node):
# if allowed_attrs and (namespace, localpart) not in allowed_attrs:
# raise AttributeError, "Attribute %s:%s is not allowed in element <%s>" % ( prefix, localpart, self.tagName)
c = AttrConverters()
- self.attributes[prefix + ":" + localpart] = c.convert((namespace, localpart), value, self.qname)
+ self.attributes[(namespace, localpart)] = c.convert((namespace, localpart), value, self)
def getAttrNS(self, namespace, localpart):
prefix = self.get_nsprefix(namespace)
- return self.attributes.get(prefix + ":" + localpart)
+ return self.attributes.get((namespace, localpart))
def removeAttrNS(self, namespace, localpart):
- prefix = self.get_nsprefix(namespace)
- del self.attributes[prefix + ":" + localpart]
+ del self.attributes[(namespace, localpart)]
def getAttribute(self, attr):
+ """ Get an attribute value. The method knows which namespace the attribute is in
+ """
allowed_attrs = self.allowed_attributes()
if allowed_attrs is None:
if type(attr) == type(()):
@@ -432,8 +467,9 @@ class Element(Node):
if level == 0:
for namespace, prefix in self.namespaces.items():
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
- for attkey in self.attributes.keys():
- f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8')))
+ for qname in self.attributes.keys():
+ prefix = self.get_nsprefix(qname[0])
+ f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
f.write('>')
def write_close_tag(self, level, f):
@@ -445,8 +481,9 @@ class Element(Node):
if level == 0:
for namespace, prefix in self.namespaces.items():
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
- for attkey in self.attributes.keys():
- f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8')))
+ for qname in self.attributes.keys():
+ prefix = self.get_nsprefix(qname[0])
+ f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
if self.childNodes:
f.write('>')
for element in self.childNodes:
@@ -464,6 +501,7 @@ class Element(Node):
return accumulator
def getElementsByType(self, element):
+ """ Gets elements based on the type, which is function from text.py, draw.py etc. """
obj = element(check_grammar=False)
return self._getElementsByObj(obj,[])
diff --git a/src/odf/grammar.py b/src/odf/grammar.py
index 09ec02cbaa..d5d8d5970e 100644
--- a/src/odf/grammar.py
+++ b/src/odf/grammar.py
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
-# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
+# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
diff --git a/src/odf/load.py b/src/odf/load.py
index 1f0e45ea23..e48fcaa412 100644
--- a/src/odf/load.py
+++ b/src/odf/load.py
@@ -63,8 +63,8 @@ class LoadParser(handler.ContentHandler):
self.level = self.level + 1
# Add any accumulated text content
- content = ''.join(self.data).strip()
- if len(content) > 0:
+ content = ''.join(self.data)
+ if len(content.strip()) > 0:
self.parent.addText(content, check_grammar=False)
self.data = []
# Create the element
diff --git a/src/odf/namespaces.py b/src/odf/namespaces.py
index 3109210bb5..96ea958e79 100644
--- a/src/odf/namespaces.py
+++ b/src/odf/namespaces.py
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
-# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
+# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -17,7 +17,7 @@
#
# Contributor(s):
#
-TOOLSVERSION = u"ODFPY/0.9.2dev"
+TOOLSVERSION = u"ODFPY/0.9.4dev"
ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"
DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0"
@@ -28,19 +28,23 @@ DCNS = u"http://purl.org/dc/elements/1.1/"
DOMNS = u"http://www.w3.org/2001/xml-events"
DR3DNS = u"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
DRAWNS = u"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
+FIELDNS = u"urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0"
FONS = u"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"
FORMNS = u"urn:oasis:names:tc:opendocument:xmlns:form:1.0"
+GRDDLNS = u"http://www.w3.org/2003/g/data-view#"
KOFFICENS = u"http://www.koffice.org/2005/"
MANIFESTNS = u"urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"
MATHNS = u"http://www.w3.org/1998/Math/MathML"
METANS = u"urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
NUMBERNS = u"urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"
OFFICENS = u"urn:oasis:names:tc:opendocument:xmlns:office:1.0"
+OFNS = u"urn:oasis:names:tc:opendocument:xmlns:of:1.2"
OOONS = u"http://openoffice.org/2004/office"
OOOWNS = u"http://openoffice.org/2004/writer"
OOOCNS = u"http://openoffice.org/2004/calc"
PRESENTATIONNS = u"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0"
RDFANS = u"http://docs.oasis-open.org/opendocument/meta/rdfa#"
+RPTNS = u"http://openoffice.org/2005/report"
SCRIPTNS = u"urn:oasis:names:tc:opendocument:xmlns:script:1.0"
SMILNS = u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0"
STYLENS = u"urn:oasis:names:tc:opendocument:xmlns:style:1.0"
@@ -50,7 +54,8 @@ TEXTNS = u"urn:oasis:names:tc:opendocument:xmlns:text:1.0"
XFORMSNS = u"http://www.w3.org/2002/xforms"
XLINKNS = u"http://www.w3.org/1999/xlink"
XMLNS = u"http://www.w3.org/XML/1998/namespace"
-
+XSDNS = u"http://www.w3.org/2001/XMLSchema"
+XSINS = u"http://www.w3.org/2001/XMLSchema-instance"
nsdict = {
ANIMNS: u'anim',
@@ -61,19 +66,23 @@ nsdict = {
DOMNS: u'dom',
DR3DNS: u'dr3d',
DRAWNS: u'draw',
+ FIELDNS: u'field',
FONS: u'fo',
FORMNS: u'form',
+ GRDDLNS: u'grddl',
KOFFICENS: u'koffice',
MANIFESTNS: u'manifest',
MATHNS: u'math',
METANS: u'meta',
NUMBERNS: u'number',
OFFICENS: u'office',
+ OFNS: u'of',
OOONS: u'ooo',
OOOWNS: u'ooow',
OOOCNS: u'oooc',
PRESENTATIONNS: u'presentation',
RDFANS: u'rdfa',
+ RPTNS: u'rpt',
SCRIPTNS: u'script',
SMILNS: u'smil',
STYLENS: u'style',
@@ -83,4 +92,6 @@ nsdict = {
XFORMSNS: u'xforms',
XLINKNS: u'xlink',
XMLNS: u'xml',
+ XSDNS: u'xsd',
+ XSINS: u'xsi',
}
diff --git a/src/odf/odf2xhtml.py b/src/odf/odf2xhtml.py
index 53a3e87dc2..390d407d16 100644
--- a/src/odf/odf2xhtml.py
+++ b/src/odf/odf2xhtml.py
@@ -1,6 +1,6 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
-# Copyright (C) 2006-2007 Søren Roug, European Environment Agency
+# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -20,15 +20,18 @@
#
#import pdb
#pdb.set_trace()
-import zipfile
-from xml.sax import handler, expatreader
-from xml.sax.xmlreader import InputSource
+from xml.sax import handler
from xml.sax.saxutils import escape, quoteattr
-from cStringIO import StringIO
+from xml.dom import Node
-from namespaces import DCNS, DRAWNS, FONS, \
- METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, \
- STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS
+from opendocument import load
+
+from namespaces import ANIMNS, CHARTNS, CONFIGNS, DCNS, DR3DNS, DRAWNS, FONS, \
+ FORMNS, MATHNS, METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, SCRIPTNS, \
+ SMILNS, STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS
+
+if False: # Added by Kovid
+ DR3DNS, MATHNS, CHARTNS, CONFIGNS, ANIMNS, FORMNS, SMILNS, SCRIPTNS
# Handling of styles
#
@@ -72,8 +75,8 @@ class StyleToCSS:
(FONS,u"border-left"): self.c_fo,
(FONS,u"border-right"): self.c_fo,
(FONS,u"border-top"): self.c_fo,
- (FONS,u"break-after"): self.c_break,
- (FONS,u"break-before"): self.c_break,
+ (FONS,u"break-after"): self.c_break, # Added by Kovid
+ (FONS,u"break-before"): self.c_break,# Added by Kovid
(FONS,u"color"): self.c_fo,
(FONS,u"font-family"): self.c_fo,
(FONS,u"font-size"): self.c_fo,
@@ -136,7 +139,7 @@ class StyleToCSS:
selector = rule[1]
sdict[selector] = val
- def c_break(self, ruleset, sdict, rule, val):
+ def c_break(self, ruleset, sdict, rule, val): # Added by Kovid
property = 'page-' + rule[1]
values = {'auto': 'auto', 'column': 'always', 'page': 'always',
'even-page': 'left', 'odd-page': 'right',
@@ -346,13 +349,16 @@ class ODF2XHTML(handler.ContentHandler):
self.elements = {
(DCNS, 'title'): (self.s_processcont, self.e_dc_title),
(DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage),
- (DCNS, 'creator'): (self.s_processcont, self.e_dc_metatag),
+ (DCNS, 'creator'): (self.s_processcont, self.e_dc_creator),
(DCNS, 'description'): (self.s_processcont, self.e_dc_metatag),
(DCNS, 'date'): (self.s_processcont, self.e_dc_metatag),
+ (DRAWNS, 'custom-shape'): (self.s_custom_shape, self.e_custom_shape),
(DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame),
(DRAWNS, 'image'): (self.s_draw_image, None),
(DRAWNS, 'fill-image'): (self.s_draw_fill_image, None),
(DRAWNS, "layer-set"):(self.s_ignorexml, None),
+ (DRAWNS, 'object'): (self.s_draw_object, None),
+ (DRAWNS, 'object-ole'): (self.s_draw_object_ole, None),
(DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page),
(DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox),
(METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag),
@@ -364,7 +370,9 @@ class ODF2XHTML(handler.ContentHandler):
(NUMBERNS, "date-style"):(self.s_ignorexml, None),
(NUMBERNS, "number-style"):(self.s_ignorexml, None),
(NUMBERNS, "text-style"):(self.s_ignorexml, None),
+ (OFFICENS, "annotation"):(self.s_ignorexml, None),
(OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None),
+ (OFFICENS, "document"):(self.s_office_document_content, self.e_office_document_content),
(OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content),
(OFFICENS, "forms"):(self.s_ignorexml, None),
(OFFICENS, "master-styles"):(self.s_office_master_styles, None),
@@ -374,6 +382,7 @@ class ODF2XHTML(handler.ContentHandler):
(OFFICENS, "styles"):(self.s_office_styles, None),
(OFFICENS, "text"):(self.s_office_text, self.e_office_text),
(OFFICENS, "scripts"):(self.s_ignorexml, None),
+ (OFFICENS, "settings"):(self.s_ignorexml, None),
(PRESENTATIONNS, "notes"):(self.s_ignorexml, None),
# (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout),
(STYLENS, "default-page-layout"):(self.s_ignorexml, None),
@@ -389,8 +398,8 @@ class ODF2XHTML(handler.ContentHandler):
# (STYLENS, "header-style"):(self.s_style_header_style, None),
(STYLENS, "master-page"):(self.s_style_master_page, None),
(STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None),
-# (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
- (STYLENS, "page-layout"):(self.s_ignorexml, None),
+ (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
+# (STYLENS, "page-layout"):(self.s_ignorexml, None),
(STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None),
(STYLENS, "style"):(self.s_style_style, self.e_style_style),
(STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None),
@@ -407,6 +416,10 @@ class ODF2XHTML(handler.ContentHandler):
(TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None),
(TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source),
+ (TEXTNS, 'bookmark'): (self.s_text_bookmark, None),
+ (TEXTNS, 'bookmark-start'): (self.s_text_bookmark, None),
+ (TEXTNS, 'bookmark-ref'): (self.s_text_bookmark_ref, self.e_text_a),
+ (TEXTNS, 'bookmark-ref-start'): (self.s_text_bookmark_ref, None),
(TEXTNS, 'h'): (self.s_text_h, self.e_text_h),
(TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source),
(TEXTNS, 'line-break'):(self.s_text_line_break, None),
@@ -430,10 +443,66 @@ class ODF2XHTML(handler.ContentHandler):
(TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
}
if embedable:
- self.elements[(OFFICENS, u"text")] = (None,None)
- self.elements[(OFFICENS, u"spreadsheet")] = (None,None)
- self.elements[(OFFICENS, u"presentation")] = (None,None)
- self.elements[(OFFICENS, u"document-content")] = (None,None)
+ self.make_embedable()
+ self._resetobject()
+
+ def set_plain(self):
+ """ Tell the parser to not generate CSS """
+ self.generate_css = False
+
+ def set_embedable(self):
+ """ Tells the converter to only output the parts inside the
"""
+ self.elements[(OFFICENS, u"text")] = (None,None)
+ self.elements[(OFFICENS, u"spreadsheet")] = (None,None)
+ self.elements[(OFFICENS, u"presentation")] = (None,None)
+ self.elements[(OFFICENS, u"document-content")] = (None,None)
+
+
+ def add_style_file(self, stylefilename, media=None):
+ """ Add a link to an external style file.
+ Also turns of the embedding of styles in the HTML
+ """
+ self.use_internal_css = False
+ self.stylefilename = stylefilename
+ if media:
+ self.metatags.append('\n' % (stylefilename,media))
+ else:
+ self.metatags.append('\n' % (stylefilename))
+
+ def _resetfootnotes(self):
+ # Footnotes and endnotes
+ self.notedict = {}
+ self.currentnote = 0
+ self.notebody = ''
+
+ def _resetobject(self):
+ self.lines = []
+ self._wfunc = self._wlines
+ self.xmlfile = ''
+ self.title = ''
+ self.language = ''
+ self.creator = ''
+ self.data = []
+ self.tagstack = TagStack()
+ self.htmlstack = []
+ self.pstack = []
+ self.processelem = True
+ self.processcont = True
+ self.listtypes = {}
+ self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10
+ self.use_internal_css = True
+ self.cs = StyleToCSS()
+ self.anchors = {}
+
+ # Style declarations
+ self.stylestack = []
+ self.styledict = {}
+ self.currentstyle = None
+
+ self._resetfootnotes()
+
+ # Tags from meta.xml
+ self.metatags = []
def writeout(self, s):
@@ -447,6 +516,7 @@ class ODF2XHTML(handler.ContentHandler):
def opentag(self, tag, attrs={}, block=False):
""" Create an open HTML tag """
+ self.htmlstack.append((tag,attrs,block))
a = []
for key,val in attrs.items():
a.append('''%s=%s''' % (key, quoteattr(val)))
@@ -458,6 +528,8 @@ class ODF2XHTML(handler.ContentHandler):
self.writeout("\n")
def closetag(self, tag, block=True):
+ """ Close an open HTML tag """
+ self.htmlstack.pop()
self.writeout("%s>" % tag)
if block == True:
self.writeout("\n")
@@ -468,17 +540,13 @@ class ODF2XHTML(handler.ContentHandler):
a.append('''%s=%s''' % (key, quoteattr(val)))
self.writeout("<%s %s/>\n" % (tag, " ".join(a)))
+#--------------------------------------------------
+# Interface to parser
#--------------------------------------------------
def characters(self, data):
if self.processelem and self.processcont:
self.data.append(data)
- def handle_starttag(self, tag, method, attrs):
- method(tag,attrs)
-
- def handle_endtag(self, tag, attrs, method):
- method(tag, attrs)
-
def startElementNS(self, tag, qname, attrs):
self.pstack.append( (self.processelem, self.processcont) )
if self.processelem:
@@ -499,6 +567,13 @@ class ODF2XHTML(handler.ContentHandler):
self.unknown_endtag(tag, attrs)
self.processelem, self.processcont = self.pstack.pop()
+#--------------------------------------------------
+ def handle_starttag(self, tag, method, attrs):
+ method(tag,attrs)
+
+ def handle_endtag(self, tag, attrs, method):
+ method(tag, attrs)
+
def unknown_starttag(self, tag, attrs):
pass
@@ -512,18 +587,21 @@ class ODF2XHTML(handler.ContentHandler):
self.processelem = False
def s_ignorecont(self, tag, attrs):
+ """ Stop processing the text nodes """
self.processcont = False
def s_processcont(self, tag, attrs):
+ """ Start processing the text nodes """
self.processcont = True
def classname(self, attrs):
""" Generate a class name from a style name """
- c = attrs[(TEXTNS,'style-name')]
+ c = attrs.get((TEXTNS,'style-name'),'')
c = c.replace(".","_")
return c
def get_anchor(self, name):
+ """ Create a unique anchor id for a href name """
if not self.anchors.has_key(name):
# Changed by Kovid
self.anchors[name] = "anchor%d" % (len(self.anchors) + 1)
@@ -543,8 +621,8 @@ class ODF2XHTML(handler.ContentHandler):
def e_dc_title(self, tag, attrs):
""" Get the title from the meta data and create a HTML
"""
- self.metatags.append('%s\n' % escape(''.join(self.data)))
self.title = ''.join(self.data)
+ #self.metatags.append('%s\n' % escape(self.title))
self.data = []
def e_dc_metatag(self, tag, attrs):
@@ -556,13 +634,57 @@ class ODF2XHTML(handler.ContentHandler):
def e_dc_contentlanguage(self, tag, attrs):
""" Set the content language. Identifies the targeted audience
"""
- self.metatags.append('\n' % ''.join(self.data))
+ self.language = ''.join(self.data)
+ self.metatags.append('\n' % escape(self.language))
self.data = []
+ def e_dc_creator(self, tag, attrs):
+ """ Set the content creator. Identifies the targeted audience
+ """
+ self.creator = ''.join(self.data)
+ self.metatags.append('\n' % escape(self.creator))
+ self.data = []
+
+ def s_custom_shape(self, tag, attrs):
+ """ A is made into a
in HTML which is then styled
+ """
+ anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound')
+ htmltag = 'div'
+ name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
+ if name == 'G-':
+ name = "PR-" + attrs.get( (PRESENTATIONNS,'style-name'), "")
+ name = name.replace(".","_")
+ if anchor_type == "paragraph":
+ style = 'position:absolute;'
+ elif anchor_type == 'char':
+ style = "position:absolute;"
+ elif anchor_type == 'as-char':
+ htmltag = 'div'
+ style = ''
+ else:
+ style = "position: absolute;"
+ if attrs.has_key( (SVGNS,"width") ):
+ style = style + "width:" + attrs[(SVGNS,"width")] + ";"
+ if attrs.has_key( (SVGNS,"height") ):
+ style = style + "height:" + attrs[(SVGNS,"height")] + ";"
+ if attrs.has_key( (SVGNS,"x") ):
+ style = style + "left:" + attrs[(SVGNS,"x")] + ";"
+ if attrs.has_key( (SVGNS,"y") ):
+ style = style + "top:" + attrs[(SVGNS,"y")] + ";"
+ if self.generate_css:
+ self.opentag(htmltag, {'class': name, 'style': style})
+ else:
+ self.opentag(htmltag)
+
+ def e_custom_shape(self, tag, attrs):
+ """ End the
+ """
+ self.closetag('div')
+
def s_draw_frame(self, tag, attrs):
""" A is made into a
in HTML which is then styled
"""
- anchor_type = attrs.get((TEXTNS,'anchor-type'),'char')
+ anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound')
htmltag = 'div'
name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
if name == 'G-':
@@ -576,7 +698,7 @@ class ODF2XHTML(handler.ContentHandler):
htmltag = 'div'
style = ''
else:
- style = "position: absolute;"
+ style = "position:absolute;"
if attrs.has_key( (SVGNS,"width") ):
style = style + "width:" + attrs[(SVGNS,"width")] + ";"
if attrs.has_key( (SVGNS,"height") ):
@@ -620,6 +742,30 @@ class ODF2XHTML(handler.ContentHandler):
htmlattrs['style'] = "display: block;"
self.emptytag('img', htmlattrs)
+ def s_draw_object(self, tag, attrs):
+ """ A is embedded object in the document (e.g. spreadsheet in presentation).
+ """
+ return # Added by Kovid
+ objhref = attrs[(XLINKNS,"href")]
+ # Remove leading "./": from "./Object 1" to "Object 1"
+# objhref = objhref [2:]
+
+ # Not using os.path.join since it fails to find the file on Windows.
+# objcontentpath = '/'.join([objhref, 'content.xml'])
+
+ for c in self.document.childnodes:
+ if c.folder == objhref:
+ self._walknode(c.topnode)
+
+ def s_draw_object_ole(self, tag, attrs):
+ """ A is embedded OLE object in the document (e.g. MS Graph).
+ """
+ class_id = attrs[(DRAWNS,"class-id")]
+ if class_id and class_id.lower() == "00020803-0000-0000-c000-000000000046": ## Microsoft Graph 97 Chart
+ tagattrs = { 'name':'object_ole_graph', 'class':'ole-graph' }
+ self.opentag('a', tagattrs)
+ self.closetag('a', tagattrs)
+
def s_draw_page(self, tag, attrs):
""" A is a slide in a presentation. We use a