diff --git a/src/odf/attrconverters.py b/src/odf/attrconverters.py index 0117324bba..b75f80a2dd 100644 --- a/src/odf/attrconverters.py +++ b/src/odf/attrconverters.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2006-2008 Søren Roug, European Environment Agency +# Copyright (C) 2006-2010 Søren Roug, European Environment Agency # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -40,6 +40,9 @@ def cnv_boolean(attribute, arg, element): # Potentially accept color values def cnv_color(attribute, arg, element): + """ A RGB color in conformance with §5.9.11 of [XSL], that is a RGB color in notation “#rrggbb”, where + rr, gg and bb are 8-bit hexadecimal digits. + """ return str(arg) def cnv_configtype(attribute, arg, element): @@ -55,9 +58,15 @@ def cnv_data_source_has_labels(attribute, arg, element): # Understand different date formats def cnv_date(attribute, arg, element): + """ A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime + value. + """ return str(arg) def cnv_dateTime(attribute, arg, element): + """ A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime + value. + """ return str(arg) def cnv_double(attribute, arg, element): @@ -67,11 +76,31 @@ def cnv_duration(attribute, arg, element): return str(arg) def cnv_family(attribute, arg, element): + """ A style family """ if str(arg) not in ("text", "paragraph", "section", "ruby", "table", "table-column", "table-row", "table-cell", "graphic", "presentation", "drawing-page", "chart"): raise ValueError, "'%s' not allowed" % str(arg) return str(arg) +def __save_prefix(attribute, arg, element): + prefix = arg.split(':',1)[0] + if prefix == arg: + return unicode(arg) + namespace = element.get_knownns(prefix) + if namespace is None: + #raise ValueError, "'%s' is an unknown prefix" % str(prefix) + return unicode(arg) + p = element.get_nsprefix(namespace) + return unicode(arg) + +def cnv_formula(attribute, arg, element): + """ A string containing a formula. Formulas do not have a predefined syntax, but the string should + begin with a namespace prefix, followed by a “:” (COLON, U+003A) separator, followed by the text + of the formula. The namespace bound to the prefix determines the syntax and semantics of the + formula. + """ + return __save_prefix(attribute, arg, element) + def cnv_ID(attribute, arg, element): return str(arg) @@ -89,6 +118,9 @@ def cnv_legend_position(attribute, arg, element): pattern_length = re.compile(r'-?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px))') def cnv_length(attribute, arg, element): + """ A (positive or negative) physical length, consisting of magnitude and unit, in conformance with the + Units of Measure defined in §5.9.13 of [XSL]. + """ global pattern_length if not pattern_length.match(arg): raise ValueError, "'%s' is not a valid length" % arg @@ -120,12 +152,12 @@ def cnv_namespacedToken(attribute, arg, element): if not pattern_namespacedToken.match(arg): raise ValueError, "'%s' is not a valid namespaced token" % arg - return arg + return __save_prefix(attribute, arg, element) -# Must accept string as argument -# NCName is defined in http://www.w3.org/TR/REC-xml-names/#NT-NCName -# Essentially an XML name minus ':' def cnv_NCName(attribute, arg, element): + """ NCName is defined in http://www.w3.org/TR/REC-xml-names/#NT-NCName + Essentially an XML name minus ':' + """ if type(arg) in types.StringTypes: return make_NCName(arg) else: @@ -226,6 +258,7 @@ attrconverters = { ((ANIMNS,u'name'), None): cnv_string, ((ANIMNS,u'sub-item'), None): cnv_string, ((ANIMNS,u'value'), None): cnv_string, +# ((DBNS,u'type'), None): cnv_namespacedToken, ((CHARTNS,u'attached-axis'), None): cnv_string, ((CHARTNS,u'class'), (CHARTNS,u'grid')): cnv_major_minor, ((CHARTNS,u'class'), None): cnv_namespacedToken, @@ -288,7 +321,7 @@ attrconverters = { ((CHARTNS,u'values-cell-range-address'), None): cnv_string, ((CHARTNS,u'vertical'), None): cnv_boolean, ((CHARTNS,u'visible'), None): cnv_boolean, - ((CONFIGNS,u'name'), None): cnv_string, + ((CONFIGNS,u'name'), None): cnv_formula, ((CONFIGNS,u'type'), None): cnv_configtype, ((DR3DNS,u'ambient-color'), None): cnv_string, ((DR3DNS,u'back-scale'), None): cnv_string, @@ -369,11 +402,11 @@ attrconverters = { ((DRAWNS,u'decimal-places'), None): cnv_string, ((DRAWNS,u'display'), None): cnv_string, ((DRAWNS,u'display-name'), None): cnv_string, - ((DRAWNS,u'distance'), None): cnv_string, + ((DRAWNS,u'distance'), None): cnv_lengthorpercent, ((DRAWNS,u'dots1'), None): cnv_integer, - ((DRAWNS,u'dots1-length'), None): cnv_length, + ((DRAWNS,u'dots1-length'), None): cnv_lengthorpercent, ((DRAWNS,u'dots2'), None): cnv_integer, - ((DRAWNS,u'dots2-length'), None): cnv_length, + ((DRAWNS,u'dots2-length'), None): cnv_lengthorpercent, ((DRAWNS,u'end-angle'), None): cnv_double, ((DRAWNS,u'end'), None): cnv_string, ((DRAWNS,u'end-color'), None): cnv_string, @@ -383,7 +416,7 @@ attrconverters = { ((DRAWNS,u'end-line-spacing-horizontal'), None): cnv_string, ((DRAWNS,u'end-line-spacing-vertical'), None): cnv_string, ((DRAWNS,u'end-shape'), None): cnv_IDREF, - ((DRAWNS,u'engine'), None): cnv_string, + ((DRAWNS,u'engine'), None): cnv_namespacedToken, ((DRAWNS,u'enhanced-path'), None): cnv_string, ((DRAWNS,u'escape-direction'), None): cnv_string, ((DRAWNS,u'extrusion-allowed'), None): cnv_boolean, @@ -604,7 +637,7 @@ attrconverters = { ((FORMNS,u'button-type'), None): cnv_string, ((FORMNS,u'command'), None): cnv_string, ((FORMNS,u'command-type'), None): cnv_string, - ((FORMNS,u'control-implementation'), None): cnv_string, + ((FORMNS,u'control-implementation'), None): cnv_namespacedToken, ((FORMNS,u'convert-empty-to-null'), None): cnv_boolean, ((FORMNS,u'current-selected'), None): cnv_boolean, ((FORMNS,u'current-state'), None): cnv_string, @@ -800,8 +833,8 @@ attrconverters = { ((PRESENTATIONNS,u'user-transformed'), None): cnv_boolean, ((PRESENTATIONNS,u'verb'), None): cnv_nonNegativeInteger, ((PRESENTATIONNS,u'visibility'), None): cnv_string, - ((SCRIPTNS,u'event-name'), None): cnv_string, - ((SCRIPTNS,u'language'), None): cnv_string, + ((SCRIPTNS,u'event-name'), None): cnv_formula, + ((SCRIPTNS,u'language'), None): cnv_formula, ((SCRIPTNS,u'macro-name'), None): cnv_string, ((SMILNS,u'accelerate'), None): cnv_double, ((SMILNS,u'accumulate'), None): cnv_string, @@ -1087,7 +1120,7 @@ attrconverters = { ((SVGNS,u'y2'), None): cnv_lengthorpercent, ((TABLENS,u'acceptance-state'), None): cnv_string, ((TABLENS,u'add-empty-lines'), None): cnv_boolean, - ((TABLENS,u'algorithm'), None): cnv_string, + ((TABLENS,u'algorithm'), None): cnv_formula, ((TABLENS,u'align'), None): cnv_string, ((TABLENS,u'allow-empty-cell'), None): cnv_boolean, ((TABLENS,u'application-data'), None): cnv_string, @@ -1106,7 +1139,7 @@ attrconverters = { ((TABLENS,u'cell-range'), None): cnv_string, ((TABLENS,u'column'), None): cnv_integer, ((TABLENS,u'comment'), None): cnv_string, - ((TABLENS,u'condition'), None): cnv_string, + ((TABLENS,u'condition'), None): cnv_formula, ((TABLENS,u'condition-source'), None): cnv_string, ((TABLENS,u'condition-source-range-address'), None): cnv_string, ((TABLENS,u'contains-error'), None): cnv_boolean, @@ -1144,13 +1177,13 @@ attrconverters = { ((TABLENS,u'end-x'), None): cnv_length, ((TABLENS,u'end-y'), None): cnv_length, ((TABLENS,u'execute'), None): cnv_boolean, - ((TABLENS,u'expression'), None): cnv_string, + ((TABLENS,u'expression'), None): cnv_formula, ((TABLENS,u'field-name'), None): cnv_string, ((TABLENS,u'field-number'), None): cnv_nonNegativeInteger, ((TABLENS,u'field-number'), None): cnv_string, ((TABLENS,u'filter-name'), None): cnv_string, ((TABLENS,u'filter-options'), None): cnv_string, - ((TABLENS,u'formula'), None): cnv_string, + ((TABLENS,u'formula'), None): cnv_formula, ((TABLENS,u'function'), None): cnv_string, ((TABLENS,u'function'), None): cnv_string, ((TABLENS,u'grand-total'), None): cnv_string, @@ -1290,7 +1323,7 @@ attrconverters = { ((TEXTNS,u'combine-entries-with-pp'), None): cnv_boolean, ((TEXTNS,u'comma-separated'), None): cnv_boolean, ((TEXTNS,u'cond-style-name'), None): cnv_StyleNameRef, - ((TEXTNS,u'condition'), None): cnv_string, + ((TEXTNS,u'condition'), None): cnv_formula, ((TEXTNS,u'connection-name'), None): cnv_string, ((TEXTNS,u'consecutive-numbering'), None): cnv_boolean, ((TEXTNS,u'continue-numbering'), None): cnv_boolean, @@ -1321,7 +1354,7 @@ attrconverters = { ((TEXTNS,u'first-row-start-column'), None): cnv_string, ((TEXTNS,u'fixed'), None): cnv_boolean, ((TEXTNS,u'footnotes-position'), None): cnv_string, - ((TEXTNS,u'formula'), None): cnv_string, + ((TEXTNS,u'formula'), None): cnv_formula, ((TEXTNS,u'global'), None): cnv_boolean, ((TEXTNS,u'howpublished'), None): cnv_string, ((TEXTNS,u'id'), None): cnv_ID, @@ -1437,7 +1470,10 @@ attrconverters = { class AttrConverters: def convert(self, attribute, value, element): - conversion = attrconverters.get((attribute,element), None) + """ Based on the element, figures out how to check/convert the attribute value + All values are converted to string + """ + conversion = attrconverters.get((attribute, element.qname), None) if conversion is not None: return conversion(attribute, value, element) else: diff --git a/src/odf/element.py b/src/odf/element.py index f0938ba53e..aad698045e 100644 --- a/src/odf/element.py +++ b/src/odf/element.py @@ -1,6 +1,6 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -# Copyright (C) 2007-2008 Søren Roug, European Environment Agency +# Copyright (C) 2007-2010 Søren Roug, European Environment Agency # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -112,6 +112,9 @@ class Node(xml.dom.Node): return self.childNodes[-1] def insertBefore(self, newChild, refChild): + """ Inserts the node newChild before the existing child node refChild. + If refChild is null, insert newChild at the end of the list of children. + """ if newChild.nodeType not in self._child_node_types: raise IllegalChild, "%s cannot be child of %s" % (newChild.tagName, self.tagName) if newChild.parentNode is not None: @@ -135,21 +138,26 @@ class Node(xml.dom.Node): newChild.parentNode = self return newChild - def appendChild(self, node): - if node.nodeType == self.DOCUMENT_FRAGMENT_NODE: - for c in tuple(node.childNodes): + def appendChild(self, newChild): + """ Adds the node newChild to the end of the list of children of this node. + If the newChild is already in the tree, it is first removed. + """ + if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE: + for c in tuple(newChild.childNodes): self.appendChild(c) ### The DOM does not clearly specify what to return in this case - return node - if node.nodeType not in self._child_node_types: - raise IllegalChild, "<%s> is not allowed in %s" % ( node.tagName, self.tagName) - if node.parentNode is not None: - node.parentNode.removeChild(node) - _append_child(self, node) - node.nextSibling = None - return node + return newChild + if newChild.nodeType not in self._child_node_types: + raise IllegalChild, "<%s> is not allowed in %s" % ( newChild.tagName, self.tagName) + if newChild.parentNode is not None: + newChild.parentNode.removeChild(newChild) + _append_child(self, newChild) + newChild.nextSibling = None + return newChild def removeChild(self, oldChild): + """ Removes the child node indicated by oldChild from the list of children, and returns it. + """ #FIXME: update ownerDocument.element_dict or find other solution try: self.childNodes.remove(oldChild) @@ -191,8 +199,8 @@ def _append_child(self, node): node.__dict__["parentNode"] = self class Childless: - """Mixin that makes childless-ness easy to implement and avoids - the complexity of the Node methods that deal with children. + """ Mixin that makes childless-ness easy to implement and avoids + the complexity of the Node methods that deal with children. """ attributes = None @@ -207,6 +215,7 @@ class Childless: return None def appendChild(self, node): + """ Raises an error """ raise xml.dom.HierarchyRequestErr( self.tagName + " nodes cannot have children") @@ -214,14 +223,17 @@ class Childless: return False def insertBefore(self, newChild, refChild): + """ Raises an error """ raise xml.dom.HierarchyRequestErr( self.tagName + " nodes do not have children") def removeChild(self, oldChild): + """ Raises an error """ raise xml.dom.NotFoundErr( self.tagName + " nodes do not have children") def replaceChild(self, newChild, oldChild): + """ Raises an error """ raise xml.dom.HierarchyRequestErr( self.tagName + " nodes do not have children") @@ -247,8 +259,12 @@ class CDATASection(Childless, Text): nodeType = Node.CDATA_SECTION_NODE def toXml(self,level,f): + """ Generate XML output of the node. If the text contains "]]>", then + escape it by going out of CDATA mode (]]>), then write the string + and then go into CDATA mode again. (' % self.data) + f.write('' % self.data.replace(']]>',']]>]]>" % (r[1].lower().replace('-',''), self.tagName) + def get_knownns(self, prefix): + """ Odfpy maintains a list of known namespaces. In some cases a prefix is used, and + we need to know which namespace it resolves to. + """ + global nsdict + for ns,p in nsdict.items(): + if p == prefix: return ns + return None + def get_nsprefix(self, namespace): + """ Odfpy maintains a list of known namespaces. In some cases we have a namespace URL, + and needs to look up or assign the prefix for it. + """ if namespace is None: namespace = "" prefix = _nsassign(namespace) if not self.namespaces.has_key(namespace): @@ -339,6 +367,9 @@ class Element(Node): self.ownerDocument.rebuild_caches(element) def addText(self, text, check_grammar=True): + """ Adds text to an element + Setting check_grammar=False turns off grammar checking + """ if check_grammar and self.qname not in grammar.allows_text: raise IllegalText, "The <%s> element does not allow text" % self.tagName else: @@ -346,6 +377,9 @@ class Element(Node): self.appendChild(Text(text)) def addCDATA(self, cdata, check_grammar=True): + """ Adds CDATA to an element + Setting check_grammar=False turns off grammar checking + """ if check_grammar and self.qname not in grammar.allows_text: raise IllegalText, "The <%s> element does not allow text" % self.tagName else: @@ -403,17 +437,18 @@ class Element(Node): # if allowed_attrs and (namespace, localpart) not in allowed_attrs: # raise AttributeError, "Attribute %s:%s is not allowed in element <%s>" % ( prefix, localpart, self.tagName) c = AttrConverters() - self.attributes[prefix + ":" + localpart] = c.convert((namespace, localpart), value, self.qname) + self.attributes[(namespace, localpart)] = c.convert((namespace, localpart), value, self) def getAttrNS(self, namespace, localpart): prefix = self.get_nsprefix(namespace) - return self.attributes.get(prefix + ":" + localpart) + return self.attributes.get((namespace, localpart)) def removeAttrNS(self, namespace, localpart): - prefix = self.get_nsprefix(namespace) - del self.attributes[prefix + ":" + localpart] + del self.attributes[(namespace, localpart)] def getAttribute(self, attr): + """ Get an attribute value. The method knows which namespace the attribute is in + """ allowed_attrs = self.allowed_attributes() if allowed_attrs is None: if type(attr) == type(()): @@ -432,8 +467,9 @@ class Element(Node): if level == 0: for namespace, prefix in self.namespaces.items(): f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"') - for attkey in self.attributes.keys(): - f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8'))) + for qname in self.attributes.keys(): + prefix = self.get_nsprefix(qname[0]) + f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8'))) f.write('>') def write_close_tag(self, level, f): @@ -445,8 +481,9 @@ class Element(Node): if level == 0: for namespace, prefix in self.namespaces.items(): f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"') - for attkey in self.attributes.keys(): - f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8'))) + for qname in self.attributes.keys(): + prefix = self.get_nsprefix(qname[0]) + f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8'))) if self.childNodes: f.write('>') for element in self.childNodes: @@ -464,6 +501,7 @@ class Element(Node): return accumulator def getElementsByType(self, element): + """ Gets elements based on the type, which is function from text.py, draw.py etc. """ obj = element(check_grammar=False) return self._getElementsByObj(obj,[]) diff --git a/src/odf/grammar.py b/src/odf/grammar.py index 09ec02cbaa..d5d8d5970e 100644 --- a/src/odf/grammar.py +++ b/src/odf/grammar.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2006-2009 Søren Roug, European Environment Agency +# Copyright (C) 2006-2010 Søren Roug, European Environment Agency # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public diff --git a/src/odf/load.py b/src/odf/load.py index 1f0e45ea23..e48fcaa412 100644 --- a/src/odf/load.py +++ b/src/odf/load.py @@ -63,8 +63,8 @@ class LoadParser(handler.ContentHandler): self.level = self.level + 1 # Add any accumulated text content - content = ''.join(self.data).strip() - if len(content) > 0: + content = ''.join(self.data) + if len(content.strip()) > 0: self.parent.addText(content, check_grammar=False) self.data = [] # Create the element diff --git a/src/odf/namespaces.py b/src/odf/namespaces.py index 3109210bb5..96ea958e79 100644 --- a/src/odf/namespaces.py +++ b/src/odf/namespaces.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2006-2009 Søren Roug, European Environment Agency +# Copyright (C) 2006-2010 Søren Roug, European Environment Agency # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -17,7 +17,7 @@ # # Contributor(s): # -TOOLSVERSION = u"ODFPY/0.9.2dev" +TOOLSVERSION = u"ODFPY/0.9.4dev" ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0" DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0" @@ -28,19 +28,23 @@ DCNS = u"http://purl.org/dc/elements/1.1/" DOMNS = u"http://www.w3.org/2001/xml-events" DR3DNS = u"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" DRAWNS = u"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" +FIELDNS = u"urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0" FONS = u"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" FORMNS = u"urn:oasis:names:tc:opendocument:xmlns:form:1.0" +GRDDLNS = u"http://www.w3.org/2003/g/data-view#" KOFFICENS = u"http://www.koffice.org/2005/" MANIFESTNS = u"urn:oasis:names:tc:opendocument:xmlns:manifest:1.0" MATHNS = u"http://www.w3.org/1998/Math/MathML" METANS = u"urn:oasis:names:tc:opendocument:xmlns:meta:1.0" NUMBERNS = u"urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" OFFICENS = u"urn:oasis:names:tc:opendocument:xmlns:office:1.0" +OFNS = u"urn:oasis:names:tc:opendocument:xmlns:of:1.2" OOONS = u"http://openoffice.org/2004/office" OOOWNS = u"http://openoffice.org/2004/writer" OOOCNS = u"http://openoffice.org/2004/calc" PRESENTATIONNS = u"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0" RDFANS = u"http://docs.oasis-open.org/opendocument/meta/rdfa#" +RPTNS = u"http://openoffice.org/2005/report" SCRIPTNS = u"urn:oasis:names:tc:opendocument:xmlns:script:1.0" SMILNS = u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0" STYLENS = u"urn:oasis:names:tc:opendocument:xmlns:style:1.0" @@ -50,7 +54,8 @@ TEXTNS = u"urn:oasis:names:tc:opendocument:xmlns:text:1.0" XFORMSNS = u"http://www.w3.org/2002/xforms" XLINKNS = u"http://www.w3.org/1999/xlink" XMLNS = u"http://www.w3.org/XML/1998/namespace" - +XSDNS = u"http://www.w3.org/2001/XMLSchema" +XSINS = u"http://www.w3.org/2001/XMLSchema-instance" nsdict = { ANIMNS: u'anim', @@ -61,19 +66,23 @@ nsdict = { DOMNS: u'dom', DR3DNS: u'dr3d', DRAWNS: u'draw', + FIELDNS: u'field', FONS: u'fo', FORMNS: u'form', + GRDDLNS: u'grddl', KOFFICENS: u'koffice', MANIFESTNS: u'manifest', MATHNS: u'math', METANS: u'meta', NUMBERNS: u'number', OFFICENS: u'office', + OFNS: u'of', OOONS: u'ooo', OOOWNS: u'ooow', OOOCNS: u'oooc', PRESENTATIONNS: u'presentation', RDFANS: u'rdfa', + RPTNS: u'rpt', SCRIPTNS: u'script', SMILNS: u'smil', STYLENS: u'style', @@ -83,4 +92,6 @@ nsdict = { XFORMSNS: u'xforms', XLINKNS: u'xlink', XMLNS: u'xml', + XSDNS: u'xsd', + XSINS: u'xsi', } diff --git a/src/odf/odf2xhtml.py b/src/odf/odf2xhtml.py index 53a3e87dc2..390d407d16 100644 --- a/src/odf/odf2xhtml.py +++ b/src/odf/odf2xhtml.py @@ -1,6 +1,6 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -# Copyright (C) 2006-2007 Søren Roug, European Environment Agency +# Copyright (C) 2006-2010 Søren Roug, European Environment Agency # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -20,15 +20,18 @@ # #import pdb #pdb.set_trace() -import zipfile -from xml.sax import handler, expatreader -from xml.sax.xmlreader import InputSource +from xml.sax import handler from xml.sax.saxutils import escape, quoteattr -from cStringIO import StringIO +from xml.dom import Node -from namespaces import DCNS, DRAWNS, FONS, \ - METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, \ - STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS +from opendocument import load + +from namespaces import ANIMNS, CHARTNS, CONFIGNS, DCNS, DR3DNS, DRAWNS, FONS, \ + FORMNS, MATHNS, METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, SCRIPTNS, \ + SMILNS, STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS + +if False: # Added by Kovid + DR3DNS, MATHNS, CHARTNS, CONFIGNS, ANIMNS, FORMNS, SMILNS, SCRIPTNS # Handling of styles # @@ -72,8 +75,8 @@ class StyleToCSS: (FONS,u"border-left"): self.c_fo, (FONS,u"border-right"): self.c_fo, (FONS,u"border-top"): self.c_fo, - (FONS,u"break-after"): self.c_break, - (FONS,u"break-before"): self.c_break, + (FONS,u"break-after"): self.c_break, # Added by Kovid + (FONS,u"break-before"): self.c_break,# Added by Kovid (FONS,u"color"): self.c_fo, (FONS,u"font-family"): self.c_fo, (FONS,u"font-size"): self.c_fo, @@ -136,7 +139,7 @@ class StyleToCSS: selector = rule[1] sdict[selector] = val - def c_break(self, ruleset, sdict, rule, val): + def c_break(self, ruleset, sdict, rule, val): # Added by Kovid property = 'page-' + rule[1] values = {'auto': 'auto', 'column': 'always', 'page': 'always', 'even-page': 'left', 'odd-page': 'right', @@ -346,13 +349,16 @@ class ODF2XHTML(handler.ContentHandler): self.elements = { (DCNS, 'title'): (self.s_processcont, self.e_dc_title), (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage), - (DCNS, 'creator'): (self.s_processcont, self.e_dc_metatag), + (DCNS, 'creator'): (self.s_processcont, self.e_dc_creator), (DCNS, 'description'): (self.s_processcont, self.e_dc_metatag), (DCNS, 'date'): (self.s_processcont, self.e_dc_metatag), + (DRAWNS, 'custom-shape'): (self.s_custom_shape, self.e_custom_shape), (DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame), (DRAWNS, 'image'): (self.s_draw_image, None), (DRAWNS, 'fill-image'): (self.s_draw_fill_image, None), (DRAWNS, "layer-set"):(self.s_ignorexml, None), + (DRAWNS, 'object'): (self.s_draw_object, None), + (DRAWNS, 'object-ole'): (self.s_draw_object_ole, None), (DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page), (DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox), (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag), @@ -364,7 +370,9 @@ class ODF2XHTML(handler.ContentHandler): (NUMBERNS, "date-style"):(self.s_ignorexml, None), (NUMBERNS, "number-style"):(self.s_ignorexml, None), (NUMBERNS, "text-style"):(self.s_ignorexml, None), + (OFFICENS, "annotation"):(self.s_ignorexml, None), (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None), + (OFFICENS, "document"):(self.s_office_document_content, self.e_office_document_content), (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content), (OFFICENS, "forms"):(self.s_ignorexml, None), (OFFICENS, "master-styles"):(self.s_office_master_styles, None), @@ -374,6 +382,7 @@ class ODF2XHTML(handler.ContentHandler): (OFFICENS, "styles"):(self.s_office_styles, None), (OFFICENS, "text"):(self.s_office_text, self.e_office_text), (OFFICENS, "scripts"):(self.s_ignorexml, None), + (OFFICENS, "settings"):(self.s_ignorexml, None), (PRESENTATIONNS, "notes"):(self.s_ignorexml, None), # (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout), (STYLENS, "default-page-layout"):(self.s_ignorexml, None), @@ -389,8 +398,8 @@ class ODF2XHTML(handler.ContentHandler): # (STYLENS, "header-style"):(self.s_style_header_style, None), (STYLENS, "master-page"):(self.s_style_master_page, None), (STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None), -# (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout), - (STYLENS, "page-layout"):(self.s_ignorexml, None), + (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout), +# (STYLENS, "page-layout"):(self.s_ignorexml, None), (STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None), (STYLENS, "style"):(self.s_style_style, self.e_style_style), (STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None), @@ -407,6 +416,10 @@ class ODF2XHTML(handler.ContentHandler): (TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source), (TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None), (TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source), + (TEXTNS, 'bookmark'): (self.s_text_bookmark, None), + (TEXTNS, 'bookmark-start'): (self.s_text_bookmark, None), + (TEXTNS, 'bookmark-ref'): (self.s_text_bookmark_ref, self.e_text_a), + (TEXTNS, 'bookmark-ref-start'): (self.s_text_bookmark_ref, None), (TEXTNS, 'h'): (self.s_text_h, self.e_text_h), (TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source), (TEXTNS, 'line-break'):(self.s_text_line_break, None), @@ -430,10 +443,66 @@ class ODF2XHTML(handler.ContentHandler): (TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source), } if embedable: - self.elements[(OFFICENS, u"text")] = (None,None) - self.elements[(OFFICENS, u"spreadsheet")] = (None,None) - self.elements[(OFFICENS, u"presentation")] = (None,None) - self.elements[(OFFICENS, u"document-content")] = (None,None) + self.make_embedable() + self._resetobject() + + def set_plain(self): + """ Tell the parser to not generate CSS """ + self.generate_css = False + + def set_embedable(self): + """ Tells the converter to only output the parts inside the """ + self.elements[(OFFICENS, u"text")] = (None,None) + self.elements[(OFFICENS, u"spreadsheet")] = (None,None) + self.elements[(OFFICENS, u"presentation")] = (None,None) + self.elements[(OFFICENS, u"document-content")] = (None,None) + + + def add_style_file(self, stylefilename, media=None): + """ Add a link to an external style file. + Also turns of the embedding of styles in the HTML + """ + self.use_internal_css = False + self.stylefilename = stylefilename + if media: + self.metatags.append('\n' % (stylefilename,media)) + else: + self.metatags.append('\n' % (stylefilename)) + + def _resetfootnotes(self): + # Footnotes and endnotes + self.notedict = {} + self.currentnote = 0 + self.notebody = '' + + def _resetobject(self): + self.lines = [] + self._wfunc = self._wlines + self.xmlfile = '' + self.title = '' + self.language = '' + self.creator = '' + self.data = [] + self.tagstack = TagStack() + self.htmlstack = [] + self.pstack = [] + self.processelem = True + self.processcont = True + self.listtypes = {} + self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10 + self.use_internal_css = True + self.cs = StyleToCSS() + self.anchors = {} + + # Style declarations + self.stylestack = [] + self.styledict = {} + self.currentstyle = None + + self._resetfootnotes() + + # Tags from meta.xml + self.metatags = [] def writeout(self, s): @@ -447,6 +516,7 @@ class ODF2XHTML(handler.ContentHandler): def opentag(self, tag, attrs={}, block=False): """ Create an open HTML tag """ + self.htmlstack.append((tag,attrs,block)) a = [] for key,val in attrs.items(): a.append('''%s=%s''' % (key, quoteattr(val))) @@ -458,6 +528,8 @@ class ODF2XHTML(handler.ContentHandler): self.writeout("\n") def closetag(self, tag, block=True): + """ Close an open HTML tag """ + self.htmlstack.pop() self.writeout("" % tag) if block == True: self.writeout("\n") @@ -468,17 +540,13 @@ class ODF2XHTML(handler.ContentHandler): a.append('''%s=%s''' % (key, quoteattr(val))) self.writeout("<%s %s/>\n" % (tag, " ".join(a))) +#-------------------------------------------------- +# Interface to parser #-------------------------------------------------- def characters(self, data): if self.processelem and self.processcont: self.data.append(data) - def handle_starttag(self, tag, method, attrs): - method(tag,attrs) - - def handle_endtag(self, tag, attrs, method): - method(tag, attrs) - def startElementNS(self, tag, qname, attrs): self.pstack.append( (self.processelem, self.processcont) ) if self.processelem: @@ -499,6 +567,13 @@ class ODF2XHTML(handler.ContentHandler): self.unknown_endtag(tag, attrs) self.processelem, self.processcont = self.pstack.pop() +#-------------------------------------------------- + def handle_starttag(self, tag, method, attrs): + method(tag,attrs) + + def handle_endtag(self, tag, attrs, method): + method(tag, attrs) + def unknown_starttag(self, tag, attrs): pass @@ -512,18 +587,21 @@ class ODF2XHTML(handler.ContentHandler): self.processelem = False def s_ignorecont(self, tag, attrs): + """ Stop processing the text nodes """ self.processcont = False def s_processcont(self, tag, attrs): + """ Start processing the text nodes """ self.processcont = True def classname(self, attrs): """ Generate a class name from a style name """ - c = attrs[(TEXTNS,'style-name')] + c = attrs.get((TEXTNS,'style-name'),'') c = c.replace(".","_") return c def get_anchor(self, name): + """ Create a unique anchor id for a href name """ if not self.anchors.has_key(name): # Changed by Kovid self.anchors[name] = "anchor%d" % (len(self.anchors) + 1) @@ -543,8 +621,8 @@ class ODF2XHTML(handler.ContentHandler): def e_dc_title(self, tag, attrs): """ Get the title from the meta data and create a HTML """ - self.metatags.append('<title>%s\n' % escape(''.join(self.data))) self.title = ''.join(self.data) + #self.metatags.append('%s\n' % escape(self.title)) self.data = [] def e_dc_metatag(self, tag, attrs): @@ -556,13 +634,57 @@ class ODF2XHTML(handler.ContentHandler): def e_dc_contentlanguage(self, tag, attrs): """ Set the content language. Identifies the targeted audience """ - self.metatags.append('\n' % ''.join(self.data)) + self.language = ''.join(self.data) + self.metatags.append('\n' % escape(self.language)) self.data = [] + def e_dc_creator(self, tag, attrs): + """ Set the content creator. Identifies the targeted audience + """ + self.creator = ''.join(self.data) + self.metatags.append('\n' % escape(self.creator)) + self.data = [] + + def s_custom_shape(self, tag, attrs): + """ A is made into a
in HTML which is then styled + """ + anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound') + htmltag = 'div' + name = "G-" + attrs.get( (DRAWNS,'style-name'), "") + if name == 'G-': + name = "PR-" + attrs.get( (PRESENTATIONNS,'style-name'), "") + name = name.replace(".","_") + if anchor_type == "paragraph": + style = 'position:absolute;' + elif anchor_type == 'char': + style = "position:absolute;" + elif anchor_type == 'as-char': + htmltag = 'div' + style = '' + else: + style = "position: absolute;" + if attrs.has_key( (SVGNS,"width") ): + style = style + "width:" + attrs[(SVGNS,"width")] + ";" + if attrs.has_key( (SVGNS,"height") ): + style = style + "height:" + attrs[(SVGNS,"height")] + ";" + if attrs.has_key( (SVGNS,"x") ): + style = style + "left:" + attrs[(SVGNS,"x")] + ";" + if attrs.has_key( (SVGNS,"y") ): + style = style + "top:" + attrs[(SVGNS,"y")] + ";" + if self.generate_css: + self.opentag(htmltag, {'class': name, 'style': style}) + else: + self.opentag(htmltag) + + def e_custom_shape(self, tag, attrs): + """ End the + """ + self.closetag('div') + def s_draw_frame(self, tag, attrs): """ A is made into a
in HTML which is then styled """ - anchor_type = attrs.get((TEXTNS,'anchor-type'),'char') + anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound') htmltag = 'div' name = "G-" + attrs.get( (DRAWNS,'style-name'), "") if name == 'G-': @@ -576,7 +698,7 @@ class ODF2XHTML(handler.ContentHandler): htmltag = 'div' style = '' else: - style = "position: absolute;" + style = "position:absolute;" if attrs.has_key( (SVGNS,"width") ): style = style + "width:" + attrs[(SVGNS,"width")] + ";" if attrs.has_key( (SVGNS,"height") ): @@ -620,6 +742,30 @@ class ODF2XHTML(handler.ContentHandler): htmlattrs['style'] = "display: block;" self.emptytag('img', htmlattrs) + def s_draw_object(self, tag, attrs): + """ A is embedded object in the document (e.g. spreadsheet in presentation). + """ + return # Added by Kovid + objhref = attrs[(XLINKNS,"href")] + # Remove leading "./": from "./Object 1" to "Object 1" +# objhref = objhref [2:] + + # Not using os.path.join since it fails to find the file on Windows. +# objcontentpath = '/'.join([objhref, 'content.xml']) + + for c in self.document.childnodes: + if c.folder == objhref: + self._walknode(c.topnode) + + def s_draw_object_ole(self, tag, attrs): + """ A is embedded OLE object in the document (e.g. MS Graph). + """ + class_id = attrs[(DRAWNS,"class-id")] + if class_id and class_id.lower() == "00020803-0000-0000-c000-000000000046": ## Microsoft Graph 97 Chart + tagattrs = { 'name':'object_ole_graph', 'class':'ole-graph' } + self.opentag('a', tagattrs) + self.closetag('a', tagattrs) + def s_draw_page(self, tag, attrs): """ A is a slide in a presentation. We use a
element in HTML. Therefore if you convert a ODP file, you get a series of
s. @@ -655,14 +801,9 @@ class ODF2XHTML(handler.ContentHandler): def html_body(self, tag, attrs): self.writedata() - if self.generate_css: + if self.generate_css and self.use_internal_css: self.opentag('style', {'type':"text/css"}, True) self.writeout('/**/\n') self.closetag('style') @@ -670,6 +811,16 @@ class ODF2XHTML(handler.ContentHandler): self.closetag('head') self.opentag('body', block=True) + # background-color: white removed by Kovid for #9118 + # Specifying an explicit bg color prevents ebook readers + # from successfully inverting colors + default_styles = """ +img { width: 100%; height: 100%; } +* { padding: 0; margin: 0; } +body { margin: 0 1em; } +ol, ul { padding-left: 2em; } +""" + def generate_stylesheet(self): for name in self.stylestack: styles = self.styledict.get(name) @@ -689,6 +840,7 @@ class ODF2XHTML(handler.ContentHandler): styles = parentstyle self.styledict[name] = styles # Write the styles to HTML + self.writeout(self.default_styles) for name in self.stylestack: styles = self.styledict.get(name) css2 = self.cs.convert_styles(styles) @@ -730,6 +882,7 @@ class ODF2XHTML(handler.ContentHandler): self.emptytag('meta', { 'http-equiv':"Content-Type", 'content':"text/html;charset=UTF-8"}) for metaline in self.metatags: self.writeout(metaline) + self.writeout('%s\n' % escape(self.title)) def e_office_document_content(self, tag, attrs): """ Last tag """ @@ -774,7 +927,7 @@ class ODF2XHTML(handler.ContentHandler): """ Copy all attributes to a struct. We will later convert them to CSS2 """ - if self.currentstyle is None: + if self.currentstyle is None: # Added by Kovid return for key,attr in attrs.items(): self.styledict[self.currentstyle][key] = attr @@ -800,7 +953,7 @@ class ODF2XHTML(handler.ContentHandler): def s_style_font_face(self, tag, attrs): """ It is possible that the HTML browser doesn't know how to show a particular font. Luckily ODF provides generic fallbacks - Unluckily they are not the same as CSS2. + Unfortunately they are not the same as CSS2. CSS2: serif, sans-serif, cursive, fantasy, monospace ODF: roman, swiss, modern, decorative, script, system """ @@ -851,7 +1004,7 @@ class ODF2XHTML(handler.ContentHandler): """ name = attrs[(STYLENS,'name')] name = name.replace(".","_") - self.currentstyle = "@page " + name + self.currentstyle = ".PL-" + name self.stylestack.append(self.currentstyle) self.styledict[self.currentstyle] = {} @@ -882,7 +1035,7 @@ class ODF2XHTML(handler.ContentHandler): self.s_ignorexml(tag, attrs) # Short prefixes for class selectors - familyshort = {'drawing-page':'DP', 'paragraph':'P', 'presentation':'PR', + _familyshort = {'drawing-page':'DP', 'paragraph':'P', 'presentation':'PR', 'text':'S', 'section':'D', 'table':'T', 'table-cell':'TD', 'table-column':'TC', 'table-row':'TR', 'graphic':'G' } @@ -898,7 +1051,7 @@ class ODF2XHTML(handler.ContentHandler): name = name.replace(".","_") family = attrs[(STYLENS,'family')] htmlfamily = self.familymap.get(family,'unknown') - sfamily = self.familyshort.get(family,'X') + sfamily = self._familyshort.get(family,'X') name = "%s%s-%s" % (self.autoprefix, sfamily, name) parent = attrs.get( (STYLENS,'parent-style-name') ) self.currentstyle = special_styles.get(name,"."+name) @@ -943,6 +1096,7 @@ class ODF2XHTML(handler.ContentHandler): self.purgedata() def s_table_table_cell(self, tag, attrs): + """ Start a table cell """ #FIXME: number-columns-repeated § 8.1.3 #repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1)) htmlattrs = {} @@ -960,11 +1114,13 @@ class ODF2XHTML(handler.ContentHandler): self.purgedata() def e_table_table_cell(self, tag, attrs): + """ End a table cell """ self.writedata() self.closetag('td') self.purgedata() def s_table_table_column(self, tag, attrs): + """ Start a table column """ c = attrs.get( (TABLENS,'style-name'), None) repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1)) htmlattrs = {} @@ -975,6 +1131,7 @@ class ODF2XHTML(handler.ContentHandler): self.purgedata() def s_table_table_row(self, tag, attrs): + """ Start a table row """ #FIXME: table:number-rows-repeated c = attrs.get( (TABLENS,'style-name'), None) htmlattrs = {} @@ -984,6 +1141,7 @@ class ODF2XHTML(handler.ContentHandler): self.purgedata() def e_table_table_row(self, tag, attrs): + """ End a table row """ self.writedata() self.closetag('tr') self.purgedata() @@ -998,10 +1156,28 @@ class ODF2XHTML(handler.ContentHandler): self.purgedata() def e_text_a(self, tag, attrs): + """ End an anchor or bookmark reference """ self.writedata() self.closetag('a', False) self.purgedata() + def s_text_bookmark(self, tag, attrs): + """ Bookmark definition """ + name = attrs[(TEXTNS,'name')] + html_id = self.get_anchor(name) + self.writedata() + self.opentag('span', {'id':html_id}) + self.closetag('span', False) + self.purgedata() + + def s_text_bookmark_ref(self, tag, attrs): + """ Bookmark reference """ + name = attrs[(TEXTNS,'ref-name')] + html_id = "#" + self.get_anchor(name) + self.writedata() + self.opentag('a', {'href':html_id}) + self.purgedata() + def s_text_h(self, tag, attrs): """ Headings start """ level = int(attrs[(TEXTNS,'outline-level')]) @@ -1019,13 +1195,19 @@ class ODF2XHTML(handler.ContentHandler): self.purgedata() def e_text_h(self, tag, attrs): - """ Headings end """ + """ Headings end + Side-effect: If there is no title in the metadata, then it is taken + from the first heading of any level. + """ self.writedata() level = int(attrs[(TEXTNS,'outline-level')]) if level > 6: level = 6 # Heading levels go only to 6 in XHTML if level < 1: level = 1 lev = self.headinglevels[1:level+1] outline = '.'.join(map(str,lev) ) + heading = ''.join(self.data) + if self.title == '': self.title = heading + # Changed by Kovid tail = ''.join(self.data) anchor = self.get_anchor("%s.%s" % ( outline, tail)) anchor2 = self.get_anchor(tail) # Added by kovid to fix #7506 @@ -1037,12 +1219,14 @@ class ODF2XHTML(handler.ContentHandler): self.purgedata() def s_text_line_break(self, tag, attrs): + """ Force a line break (
) """ self.writedata() self.emptytag('br') self.purgedata() def s_text_list(self, tag, attrs): - """ To know which level we're at, we have to count the number + """ Start a list (
    or
      ) + To know which level we're at, we have to count the number of elements on the tagstack. """ name = attrs.get( (TEXTNS,'style-name') ) @@ -1056,12 +1240,13 @@ class ODF2XHTML(handler.ContentHandler): name = self.tagstack.rfindattr( (TEXTNS,'style-name') ) list_class = "%s_%d" % (name, level) if self.generate_css: - self.opentag('%s' % self.listtypes.get(list_class,'UL'), {'class': list_class }) + self.opentag('%s' % self.listtypes.get(list_class,'ul'), {'class': list_class }) else: - self.opentag('%s' % self.listtypes.get(list_class,'UL')) + self.opentag('%s' % self.listtypes.get(list_class,'ul')) self.purgedata() def e_text_list(self, tag, attrs): + """ End a list """ self.writedata() name = attrs.get( (TEXTNS,'style-name') ) level = self.tagstack.count_tags(tag) + 1 @@ -1073,14 +1258,16 @@ class ODF2XHTML(handler.ContentHandler): # textbox itself may be nested within another list. name = self.tagstack.rfindattr( (TEXTNS,'style-name') ) list_class = "%s_%d" % (name, level) - self.closetag(self.listtypes.get(list_class,'UL')) + self.closetag(self.listtypes.get(list_class,'ul')) self.purgedata() def s_text_list_item(self, tag, attrs): + """ Start list item """ self.opentag('li') self.purgedata() def e_text_list_item(self, tag, attrs): + """ End list item """ self.writedata() self.closetag('li') self.purgedata() @@ -1192,7 +1379,7 @@ class ODF2XHTML(handler.ContentHandler): if specialtag is None: specialtag = 'p' self.writedata() - if not self.data: + if not self.data: # Added by Kovid # Give substance to empty paragraphs, as rendered by OOo self.writeout(' ') self.closetag(specialtag) @@ -1255,55 +1442,30 @@ class ODF2XHTML(handler.ContentHandler): #----------------------------------------------------------------------------- def load(self, odffile): - self._odffile = odffile + """ Loads a document into the parser and parses it. + The argument can either be a filename or a document in memory. + """ + self.lines = [] + self._wfunc = self._wlines + if isinstance(odffile, basestring) \ + or hasattr(odffile, 'read'): # Added by Kovid + self.document = load(odffile) + else: + self.document = odffile + self._walknode(self.document.topnode) - def parseodf(self): - self.xmlfile = '' - self.title = '' - self.data = [] - self.tagstack = TagStack() - self.pstack = [] - self.processelem = True - self.processcont = True - self.listtypes = {} - self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10 - self.cs = StyleToCSS() - self.anchors = {} + def _walknode(self, node): + if node.nodeType == Node.ELEMENT_NODE: + self.startElementNS(node.qname, node.tagName, node.attributes) + for c in node.childNodes: + self._walknode(c) + self.endElementNS(node.qname, node.tagName) + if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE: + self.characters(unicode(node)) - # Style declarations - self.stylestack = [] - self.styledict = {} - self.currentstyle = None - - # Footnotes and endnotes - self.notedict = {} - self.currentnote = 0 - self.notebody = '' - - # Tags from meta.xml - self.metatags = [] - - # Extract the interesting files - z = zipfile.ZipFile(self._odffile) - - # For some reason Trac has trouble when xml.sax.make_parser() is used. - # Could it be because PyXML is installed, and therefore a different parser - # might be chosen? By calling expatreader directly we avoid this issue - parser = expatreader.create_parser() - parser.setFeature(handler.feature_namespaces, 1) - parser.setContentHandler(self) - parser.setErrorHandler(handler.ErrorHandler()) - inpsrc = InputSource() - - for xmlfile in ('meta.xml', 'styles.xml', 'content.xml'): - self.xmlfile = xmlfile - content = z.read(xmlfile) - inpsrc.setByteStream(StringIO(content)) - parser.parse(inpsrc) - z.close() def odf2xhtml(self, odffile): - """ Load a file and return XHTML + """ Load a file and return the XHTML """ self.load(odffile) return self.xhtml() @@ -1312,9 +1474,8 @@ class ODF2XHTML(handler.ContentHandler): if s != '': self.lines.append(s) def xhtml(self): - self.lines = [] - self._wfunc = self._wlines - self.parseodf() + """ Returns the xhtml + """ return ''.join(self.lines) def _writecss(self, s): @@ -1324,11 +1485,127 @@ class ODF2XHTML(handler.ContentHandler): pass def css(self): - self._wfunc = self._writenothing - self.parseodf() + """ Returns the CSS content """ self._csslines = [] self._wfunc = self._writecss self.generate_stylesheet() res = ''.join(self._csslines) + self._wfunc = self._wlines del self._csslines return res + + def save(self, outputfile, addsuffix=False): + """ Save the HTML under the filename. + If the filename is '-' then save to stdout + We have the last style filename in self.stylefilename + """ + if outputfile == '-': + import sys # Added by Kovid + outputfp = sys.stdout + else: + if addsuffix: + outputfile = outputfile + ".html" + outputfp = file(outputfile, "w") + outputfp.write(self.xhtml().encode('us-ascii','xmlcharrefreplace')) + outputfp.close() + + +class ODF2XHTMLembedded(ODF2XHTML): + """ The ODF2XHTML parses an ODF file and produces XHTML""" + + def __init__(self, lines, generate_css=True, embedable=False): + self._resetobject() + self.lines = lines + + # Tags + self.generate_css = generate_css + self.elements = { +# (DCNS, 'title'): (self.s_processcont, self.e_dc_title), +# (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage), +# (DCNS, 'creator'): (self.s_processcont, self.e_dc_metatag), +# (DCNS, 'description'): (self.s_processcont, self.e_dc_metatag), +# (DCNS, 'date'): (self.s_processcont, self.e_dc_metatag), + (DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame), + (DRAWNS, 'image'): (self.s_draw_image, None), + (DRAWNS, 'fill-image'): (self.s_draw_fill_image, None), + (DRAWNS, "layer-set"):(self.s_ignorexml, None), + (DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page), + (DRAWNS, 'object'): (self.s_draw_object, None), + (DRAWNS, 'object-ole'): (self.s_draw_object_ole, None), + (DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox), +# (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag), +# (METANS, 'generator'):(self.s_processcont, self.e_dc_metatag), +# (METANS, 'initial-creator'): (self.s_processcont, self.e_dc_metatag), +# (METANS, 'keyword'): (self.s_processcont, self.e_dc_metatag), + (NUMBERNS, "boolean-style"):(self.s_ignorexml, None), + (NUMBERNS, "currency-style"):(self.s_ignorexml, None), + (NUMBERNS, "date-style"):(self.s_ignorexml, None), + (NUMBERNS, "number-style"):(self.s_ignorexml, None), + (NUMBERNS, "text-style"):(self.s_ignorexml, None), +# (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None), +# (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content), + (OFFICENS, "forms"):(self.s_ignorexml, None), +# (OFFICENS, "master-styles"):(self.s_office_master_styles, None), + (OFFICENS, "meta"):(self.s_ignorecont, None), +# (OFFICENS, "presentation"):(self.s_office_presentation, self.e_office_presentation), +# (OFFICENS, "spreadsheet"):(self.s_office_spreadsheet, self.e_office_spreadsheet), +# (OFFICENS, "styles"):(self.s_office_styles, None), +# (OFFICENS, "text"):(self.s_office_text, self.e_office_text), + (OFFICENS, "scripts"):(self.s_ignorexml, None), + (PRESENTATIONNS, "notes"):(self.s_ignorexml, None), +## (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout), +# (STYLENS, "default-page-layout"):(self.s_ignorexml, None), +# (STYLENS, "default-style"):(self.s_style_default_style, self.e_style_default_style), +# (STYLENS, "drawing-page-properties"):(self.s_style_handle_properties, None), +# (STYLENS, "font-face"):(self.s_style_font_face, None), +## (STYLENS, "footer"):(self.s_style_footer, self.e_style_footer), +## (STYLENS, "footer-style"):(self.s_style_footer_style, None), +# (STYLENS, "graphic-properties"):(self.s_style_handle_properties, None), +# (STYLENS, "handout-master"):(self.s_ignorexml, None), +## (STYLENS, "header"):(self.s_style_header, self.e_style_header), +## (STYLENS, "header-footer-properties"):(self.s_style_handle_properties, None), +## (STYLENS, "header-style"):(self.s_style_header_style, None), +# (STYLENS, "master-page"):(self.s_style_master_page, None), +# (STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None), +## (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout), +# (STYLENS, "page-layout"):(self.s_ignorexml, None), +# (STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None), +# (STYLENS, "style"):(self.s_style_style, self.e_style_style), +# (STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None), +# (STYLENS, "table-column-properties"):(self.s_style_handle_properties, None), +# (STYLENS, "table-properties"):(self.s_style_handle_properties, None), +# (STYLENS, "text-properties"):(self.s_style_handle_properties, None), + (SVGNS, 'desc'): (self.s_ignorexml, None), + (TABLENS, 'covered-table-cell'): (self.s_ignorexml, None), + (TABLENS, 'table-cell'): (self.s_table_table_cell, self.e_table_table_cell), + (TABLENS, 'table-column'): (self.s_table_table_column, None), + (TABLENS, 'table-row'): (self.s_table_table_row, self.e_table_table_row), + (TABLENS, 'table'): (self.s_table_table, self.e_table_table), + (TEXTNS, 'a'): (self.s_text_a, self.e_text_a), + (TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source), + (TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None), + (TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source), + (TEXTNS, 'h'): (self.s_text_h, self.e_text_h), + (TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source), + (TEXTNS, 'line-break'):(self.s_text_line_break, None), + (TEXTNS, "linenumbering-configuration"):(self.s_ignorexml, None), + (TEXTNS, "list"):(self.s_text_list, self.e_text_list), + (TEXTNS, "list-item"):(self.s_text_list_item, self.e_text_list_item), + (TEXTNS, "list-level-style-bullet"):(self.s_text_list_level_style_bullet, self.e_text_list_level_style_bullet), + (TEXTNS, "list-level-style-number"):(self.s_text_list_level_style_number, self.e_text_list_level_style_number), + (TEXTNS, "list-style"):(None, None), + (TEXTNS, "note"):(self.s_text_note, None), + (TEXTNS, "note-body"):(self.s_text_note_body, self.e_text_note_body), + (TEXTNS, "note-citation"):(None, self.e_text_note_citation), + (TEXTNS, "notes-configuration"):(self.s_ignorexml, None), + (TEXTNS, "object-index-source"):(self.s_text_x_source, self.e_text_x_source), + (TEXTNS, 'p'): (self.s_text_p, self.e_text_p), + (TEXTNS, 's'): (self.s_text_s, None), + (TEXTNS, 'span'): (self.s_text_span, self.e_text_span), + (TEXTNS, 'tab'): (self.s_text_tab, None), + (TEXTNS, "table-index-source"):(self.s_text_x_source, self.e_text_x_source), + (TEXTNS, "table-of-content-source"):(self.s_text_x_source, self.e_text_x_source), + (TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source), + (TEXTNS, "page-number"):(None, None), + } + diff --git a/src/odf/opendocument.py b/src/odf/opendocument.py index 9fd16229f6..63196382d5 100644 --- a/src/odf/opendocument.py +++ b/src/odf/opendocument.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Copyright (C) 2006-2009 Søren Roug, European Environment Agency +# Copyright (C) 2006-2010 Søren Roug, European Environment Agency # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -41,7 +41,7 @@ IS_IMAGE = 1 # We need at least Python 2.2 assert sys.version_info[0]>=2 and sys.version_info[1] >= 2 -sys.setrecursionlimit=50 +#sys.setrecursionlimit(100) #The recursion limit is set conservative so mistakes like # s=content() s.addElement(s) won't eat up too much processor time. @@ -128,12 +128,12 @@ class OpenDocument: self.element_dict[element.qname] = [] self.element_dict[element.qname].append(element) if element.qname == (STYLENS, u'style'): - self._register_stylename(element) # Add to style dictionary + self.__register_stylename(element) # Add to style dictionary styleref = element.getAttrNS(TEXTNS,u'style-name') if styleref is not None and self._styles_ooo_fix.has_key(styleref): element.setAttrNS(TEXTNS,u'style-name', self._styles_ooo_fix[styleref]) - def _register_stylename(self, element): + def __register_stylename(self, element): ''' Register a style. But there are three style dictionaries: office:styles, office:automatic-styles and office:master-styles Chapter 14 @@ -165,7 +165,7 @@ class OpenDocument: """ Generates the full document as an XML file Always written as a bytestream in UTF-8 encoding """ - self._replaceGenerator() + self.__replaceGenerator() xml=StringIO() xml.write(_XMLPROLOGUE) self.topnode.toXml(0, xml) @@ -197,8 +197,10 @@ class OpenDocument: x.write_close_tag(0, xml) return xml.getvalue() - def manifestxml(self): - """ Generates the manifest.xml file """ + def __manifestxml(self): + """ Generates the manifest.xml file + The self.manifest isn't avaible unless the document is being saved + """ xml=StringIO() xml.write(_XMLPROLOGUE) self.manifest.toXml(0,xml) @@ -206,7 +208,7 @@ class OpenDocument: def metaxml(self): """ Generates the meta.xml file """ - self._replaceGenerator() + self.__replaceGenerator() x = DocumentMeta() x.addElement(self.meta) xml=StringIO() @@ -344,7 +346,7 @@ class OpenDocument: self.thumbnail = filecontent def addObject(self, document, objectname=None): - """ Add an object. The object must be an OpenDocument class + """ Adds an object (subdocument). The object must be an OpenDocument class The return value will be the folder in the zipfile the object is stored in """ self.childobjects.append(document) @@ -367,15 +369,16 @@ class OpenDocument: zi.compress_type = zipfile.ZIP_STORED zi.external_attr = UNIXPERMS self._z.writestr(zi, fileobj) - if hasPictures: - self.manifest.addElement(manifest.FileEntry(fullpath="%sPictures/" % folder,mediatype="")) + # According to section 17.7.3 in ODF 1.1, the pictures folder should not have a manifest entry +# if hasPictures: +# self.manifest.addElement(manifest.FileEntry(fullpath="%sPictures/" % folder, mediatype="")) # Look in subobjects subobjectnum = 1 for subobject in object.childobjects: self._savePictures(subobject,'%sObject %d/' % (folder, subobjectnum)) subobjectnum += 1 - def _replaceGenerator(self): + def __replaceGenerator(self): """ Section 3.1.1: The application MUST NOT export the original identifier belonging to the application that created the document. """ @@ -385,22 +388,29 @@ class OpenDocument: self.meta.addElement(meta.Generator(text=TOOLSVERSION)) def save(self, outputfile, addsuffix=False): - """ Save the document under the filename """ + """ Save the document under the filename. + If the filename is '-' then save to stdout + """ if outputfile == '-': outputfp = zipfile.ZipFile(sys.stdout,"w") else: if addsuffix: outputfile = outputfile + odmimetypes.get(self.mimetype,'.xxx') outputfp = zipfile.ZipFile(outputfile, "w") - self._zipwrite(outputfp) + self.__zipwrite(outputfp) outputfp.close() def write(self, outputfp): + """ User API to write the ODF file to an open file descriptor + Writes the ZIP format + """ zipoutputfp = zipfile.ZipFile(outputfp,"w") - self._zipwrite(zipoutputfp) + self.__zipwrite(zipoutputfp) - def _zipwrite(self, outputfp): - """ Write the document to an open file pointer """ + def __zipwrite(self, outputfp): + """ Write the document to an open file pointer + This is where the real work is done + """ self._z = outputfp self._now = time.localtime()[:6] self.manifest = manifest.Manifest() @@ -438,7 +448,7 @@ class OpenDocument: zi = zipfile.ZipInfo("META-INF/manifest.xml", self._now) zi.compress_type = zipfile.ZIP_DEFLATED zi.external_attr = UNIXPERMS - self._z.writestr(zi, self.manifestxml() ) + self._z.writestr(zi, self.__manifestxml() ) del self._z del self._now del self.manifest @@ -464,8 +474,8 @@ class OpenDocument: self._z.writestr(zi, object.contentxml() ) # Write settings - if self == object and self.settings.hasChildNodes(): - self.manifest.addElement(manifest.FileEntry(fullpath="settings.xml",mediatype="text/xml")) + if object.settings.hasChildNodes(): + self.manifest.addElement(manifest.FileEntry(fullpath="%ssettings.xml" % folder, mediatype="text/xml")) zi = zipfile.ZipInfo("%ssettings.xml" % folder, self._now) zi.compress_type = zipfile.ZIP_DEFLATED zi.external_attr = UNIXPERMS @@ -473,7 +483,7 @@ class OpenDocument: # Write meta if self == object: - self.manifest.addElement(manifest.FileEntry(fullpath="meta.xml",mediatype="text/xml")) + self.manifest.addElement(manifest.FileEntry(fullpath="meta.xml", mediatype="text/xml")) zi = zipfile.ZipInfo("meta.xml", self._now) zi.compress_type = zipfile.ZIP_DEFLATED zi.external_attr = UNIXPERMS @@ -497,6 +507,7 @@ class OpenDocument: return element.Text(data) def createCDATASection(self, data): + """ Method to create a CDATA section """ return element.CDATASection(cdata) def getMediaType(self): @@ -504,12 +515,14 @@ class OpenDocument: return self.mimetype def getStyleByName(self, name): + """ Finds a style object based on the name """ ncname = make_NCName(name) if self._styles_dict == {}: self.rebuild_caches() return self._styles_dict.get(ncname, None) def getElementsByType(self, element): + """ Gets elements based on the type, which is function from text.py, draw.py etc. """ obj = element(check_grammar=False) if self.element_dict == {}: self.rebuild_caches() @@ -517,53 +530,59 @@ class OpenDocument: # Convenience functions def OpenDocumentChart(): + """ Creates a chart document """ doc = OpenDocument('application/vnd.oasis.opendocument.chart') doc.chart = Chart() doc.body.addElement(doc.chart) return doc def OpenDocumentDrawing(): + """ Creates a drawing document """ doc = OpenDocument('application/vnd.oasis.opendocument.graphics') doc.drawing = Drawing() doc.body.addElement(doc.drawing) return doc def OpenDocumentImage(): + """ Creates an image document """ doc = OpenDocument('application/vnd.oasis.opendocument.image') doc.image = Image() doc.body.addElement(doc.image) return doc def OpenDocumentPresentation(): + """ Creates a presentation document """ doc = OpenDocument('application/vnd.oasis.opendocument.presentation') doc.presentation = Presentation() doc.body.addElement(doc.presentation) return doc def OpenDocumentSpreadsheet(): + """ Creates a spreadsheet document """ doc = OpenDocument('application/vnd.oasis.opendocument.spreadsheet') doc.spreadsheet = Spreadsheet() doc.body.addElement(doc.spreadsheet) return doc def OpenDocumentText(): + """ Creates a text document """ doc = OpenDocument('application/vnd.oasis.opendocument.text') doc.text = Text() doc.body.addElement(doc.text) return doc +def OpenDocumentTextMaster(): + """ Creates a text master document """ + doc = OpenDocument('application/vnd.oasis.opendocument.text-master') + doc.text = Text() + doc.body.addElement(doc.text) + return doc -def load(odffile): +def __loadxmlparts(z, manifest, doc, objectpath): from load import LoadParser from xml.sax import make_parser, handler - z = zipfile.ZipFile(odffile) - mimetype = z.read('mimetype') - doc = OpenDocument(mimetype, add_generator=False) - # Look in the manifest file to see if which of the four files there are - manifestpart = z.read('META-INF/manifest.xml') - manifest = manifestlist(manifestpart) - for xmlfile in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'): + for xmlfile in (objectpath+'settings.xml', objectpath+'meta.xml', objectpath+'content.xml', objectpath+'styles.xml'): if not manifest.has_key(xmlfile): continue try: @@ -580,7 +599,19 @@ def load(odffile): parser.parse(inpsrc) del doc._parsing except KeyError, v: pass - # FIXME: Add subobjects correctly here + +def load(odffile): + """ Load an ODF file into memory + Returns a reference to the structure + """ + z = zipfile.ZipFile(odffile) + mimetype = z.read('mimetype') + doc = OpenDocument(mimetype, add_generator=False) + + # Look in the manifest file to see if which of the four files there are + manifestpart = z.read('META-INF/manifest.xml') + manifest = manifestlist(manifestpart) + __loadxmlparts(z, manifest, doc, '') for mentry,mvalue in manifest.items(): if mentry[:9] == "Pictures/" and len(mentry) > 9: doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry)) @@ -588,6 +619,13 @@ def load(odffile): doc.addThumbnail(z.read(mentry)) elif mentry in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'): pass + # Load subobjects into structure + elif mentry[:7] == "Object " and len(mentry) < 11 and mentry[-1] == "/": + subdoc = OpenDocument(mvalue['media-type'], add_generator=False) + doc.addObject(subdoc, "/" + mentry[:-1]) + __loadxmlparts(z, manifest, subdoc, mentry) + elif mentry[:7] == "Object ": + pass # Don't load subobjects as opaque objects else: if mvalue['full-path'][-1] == '/': doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], None)) @@ -612,4 +650,5 @@ def load(odffile): elif mimetype[:42] == 'application/vnd.oasis.opendocument.formula': doc.formula = b[0].firstChild return doc + # vim: set expandtab sw=4 :