From c67818e6045c268acf7adf43269b3a8e5b3ad43f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 15 Nov 2009 20:54:41 -0700 Subject: [PATCH] Update bundled odfpy library for conversion of ODT files to version 0.9.2 --- src/odf/element.py | 2 + src/odf/namespaces.py | 2 +- src/odf/opendocument.py | 6 +- src/odf/svg.py | 2 + src/odf/text.py | 3 + src/odf/userfield.py | 258 ++++++++-------------------------------- 6 files changed, 60 insertions(+), 213 deletions(-) diff --git a/src/odf/element.py b/src/odf/element.py index d376f94ebf..9754c251b0 100644 --- a/src/odf/element.py +++ b/src/odf/element.py @@ -56,6 +56,8 @@ def _quoteattr(data, entities={}): the optional entities parameter. The keys and values must all be strings; each key will be replaced with its corresponding value. """ + entities['\n']=' ' + entities['\r']=' ' data = _escape(data, entities) if '"' in data: if "'" in data: diff --git a/src/odf/namespaces.py b/src/odf/namespaces.py index a7d1f0d7c5..3109210bb5 100644 --- a/src/odf/namespaces.py +++ b/src/odf/namespaces.py @@ -17,7 +17,7 @@ # # Contributor(s): # -TOOLSVERSION = u"ODFPY/0.9.1dev" +TOOLSVERSION = u"ODFPY/0.9.2dev" ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0" DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0" diff --git a/src/odf/opendocument.py b/src/odf/opendocument.py index ab375cdf1f..9fd16229f6 100644 --- a/src/odf/opendocument.py +++ b/src/odf/opendocument.py @@ -185,7 +185,7 @@ class OpenDocument: if self.fontfacedecls.hasChildNodes(): self.fontfacedecls.toXml(1, xml) a = AutomaticStyles() - stylelist = self._used_auto_styles([self.styles, self.body]) + stylelist = self._used_auto_styles([self.styles, self.automaticstyles, self.body]) if len(stylelist) > 0: a.write_open_tag(1, xml) for s in stylelist: @@ -233,9 +233,11 @@ class OpenDocument: for styleref in ( (DRAWNS,u'style-name'), (DRAWNS,u'text-style-name'), (PRESENTATIONNS,u'style-name'), - (STYLENS,u'style-name'), + (STYLENS,u'data-style-name'), (STYLENS,u'list-style-name'), (STYLENS,u'page-layout-name'), + (STYLENS,u'style-name'), + (TABLENS,u'default-cell-style-name'), (TABLENS,u'style-name'), (TEXTNS,u'style-name') ): if e.getAttrNS(styleref[0],styleref[1]): diff --git a/src/odf/svg.py b/src/odf/svg.py index e53d54001e..346c4904dd 100644 --- a/src/odf/svg.py +++ b/src/odf/svg.py @@ -50,3 +50,5 @@ def Radialgradient(**args): def Stop(**args): return Element(qname = (SVGNS,'stop'), **args) +def Title(**args): + return Element(qname = (SVGNS,'title'), **args) diff --git a/src/odf/text.py b/src/odf/text.py index 13a2c9ca6d..b55a3a4a91 100644 --- a/src/odf/text.py +++ b/src/odf/text.py @@ -446,6 +446,9 @@ def SequenceRef(**args): def SheetName(**args): return Element(qname = (TEXTNS,'sheet-name'), **args) +def SoftPageBreak(**args): + return Element(qname = (TEXTNS,'soft-page-break'), **args) + def SortKey(**args): return Element(qname = (TEXTNS,'sort-key'), **args) diff --git a/src/odf/userfield.py b/src/odf/userfield.py index 0e639769d4..196bae17b3 100644 --- a/src/odf/userfield.py +++ b/src/odf/userfield.py @@ -1,6 +1,6 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -# Copyright (C) 2006-2007 Søren Roug, European Environment Agency +# Copyright (C) 2006-2009 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version @@ -22,16 +22,11 @@ """Class to show and manipulate user fields in odf documents.""" import sys -import time import zipfile -import xml.sax -import xml.sax.handler -import xml.sax.saxutils - -from odf.namespaces import OFFICENS, TEXTNS - -from cStringIO import StringIO +from odf.text import UserFieldDecl +from odf.namespaces import OFFICENS +from odf.opendocument import load OUTENCODING = "utf-8" @@ -60,16 +55,36 @@ class UserFields(object): src ... source document name, file like object or None for stdin dest ... destination document name, file like object or None for stdout - + """ self.src_file = src self.dest_file = dest + self.document = None + + def loaddoc(self): + if isinstance(self.src_file, basestring): + # src_file is a filename, check if it is a zip-file + if not zipfile.is_zipfile(self.src_file): + raise TypeError("%s is no odt file." % self.src_file) + elif self.src_file is None: + # use stdin if no file given + self.src_file = sys.stdin + + self.document = load(self.src_file) + + def savedoc(self): + # write output + if self.dest_file is None: + # use stdout if no filename given + self.document.save('-') + else: + self.document.save(self.dest_file) def list_fields(self): """List (extract) all known user-fields. - + Returns list of user-field names. - + """ return [x[0] for x in self.list_fields_and_values()] @@ -81,15 +96,21 @@ class UserFields(object): Returns list of tuples (, , ). """ + self.loaddoc() found_fields = [] - def _callback(field_name, value_type, value, attrs): + all_fields = self.document.getElementsByType(UserFieldDecl) + for f in all_fields: + value_type = f.getAttribute('valuetype') + if value_type == 'string': + value = f.getAttribute('stringvalue') + else: + value = f.getAttribute('value') + field_name = f.getAttribute('name') + if field_names is None or field_name in field_names: found_fields.append((field_name.encode(OUTENCODING), value_type.encode(OUTENCODING), value.encode(OUTENCODING))) - return attrs - - self._content_handler(_callback) return found_fields def list_values(self, field_names): @@ -133,199 +154,16 @@ class UserFields(object): Returns None """ - def _callback(field_name, value_type, value, attrs): - if field_name in data: - valattr = VALUE_TYPES.get(value_type) - attrs = dict(attrs.items()) - # Take advantage that startElementNS can take a normal - # dict as attrs - attrs[valattr] = data[field_name] - return attrs - self._content_handler(_callback, write_file=True) - - def _content_handler(self, callback_func, write_file=False): - """Handle the content using the callback function and write result if - necessary. - - callback_func ... function called for each field found in odf document - signature: field_name ... name of current field - value_type ... type of current field - value ... value of current field - attrs ... tuple of attrs of current field - returns: tuple or dict of attrs - write_file ... boolean telling wether write result to file - - """ - class DevNull(object): - """IO-object which behaves like /dev/null.""" - def write(self, str): - pass - - # get input - if isinstance(self.src_file, basestring): - # src_file is a filename, check if it is a zip-file - if not zipfile.is_zipfile(self.src_file): - raise TypeError("%s is no odt file." % self.src_file) - elif self.src_file is None: - # use stdin if no file given - self.src_file = sys.stdin - - zin = zipfile.ZipFile(self.src_file, 'r') - content_xml = zin.read('content.xml') - - # prepare output - if write_file: - output_io = StringIO() - if self.dest_file is None: - # use stdout if no filename given - self.dest_file = sys.stdout - zout = zipfile.ZipFile(self.dest_file, 'w') - else: - output_io = DevNull() - - - # parse input - odfs = ODFContentParser(callback_func, output_io) - parser = xml.sax.make_parser() - parser.setFeature(xml.sax.handler.feature_namespaces, 1) - parser.setContentHandler(odfs) - parser.parse(StringIO(content_xml)) - - # write output - if write_file: - # Loop through the input zipfile and copy the content to - # the output until we get to the content.xml. Then - # substitute. - for zinfo in zin.infolist(): - if zinfo.filename == "content.xml": - # Write meta - zi = zipfile.ZipInfo("content.xml", time.localtime()[:6]) - zi.compress_type = zipfile.ZIP_DEFLATED - zout.writestr(zi, odfs.content()) + self.loaddoc() + all_fields = self.document.getElementsByType(UserFieldDecl) + for f in all_fields: + field_name = f.getAttribute('name') + if data.has_key(field_name): + value_type = f.getAttribute('valuetype') + value = data.get(field_name) + if value_type == 'string': + f.setAttribute('stringvalue', value) else: - payload = zin.read(zinfo.filename) - zout.writestr(zinfo, payload) - zout.close() - zin.close() + f.setAttribute('value', value) + self.savedoc() - -class ODFContentParser(xml.sax.saxutils.XMLGenerator): - - def __init__(self, callback_func, out=None, encoding=OUTENCODING): - """Constructor. - - callback_func ... function called for each field found in odf document - signature: field_name ... name of current field - value_type ... type of current field - value ... value of current field - attrs ... tuple of attrs of current field - returns: tuple or dict of attrs - out ... file like object for output - encoding ... encoding for output - - """ - self._callback_func = callback_func - xml.sax.saxutils.XMLGenerator.__init__(self, out, encoding) - - def _qname(self, name): - """Builds a qualified name from a (ns_url, localname) pair""" - if name[0]: - if name[0] == u'http://www.w3.org/XML/1998/namespace': - return u'xml' + ":" + name[1] - # The name is in a non-empty namespace - prefix = self._current_context[name[0]] - if prefix: - # If it is not the default namespace, prepend the prefix - return prefix + ":" + name[1] - # Return the unqualified name - return name[1] - - def startElementNS(self, name, qname, attrs): - if name == (TEXTNS, u'user-field-decl'): - field_name = attrs.get((TEXTNS, u'name')) - value_type = attrs.get((OFFICENS, u'value-type')) - if value_type == 'string': - value = attrs.get((OFFICENS, u'string-value')) - else: - value = attrs.get((OFFICENS, u'value')) - - attrs = self._callback_func(field_name, value_type, value, attrs) - - self._startElementNS(name, qname, attrs) - - def _startElementNS(self, name, qname, attrs): - # copy of xml.sax.saxutils.XMLGenerator.startElementNS - # necessary because we have to provide our own writeattr - # function which is called by this method - if name[0] is None: - name = name[1] - elif self._current_context[name[0]] is None: - # default namespace - name = name[1] - else: - name = self._current_context[name[0]] + ":" + name[1] - self._out.write('<' + name) - - for k,v in self._undeclared_ns_maps: - if k is None: - self._out.write(' xmlns="%s"' % (v or '')) - else: - self._out.write(' xmlns:%s="%s"' % (k,v)) - self._undeclared_ns_maps = [] - - for (name, value) in attrs.items(): - if name[0] is None: - name = name[1] - elif self._current_context[name[0]] is None: - # default namespace - #If an attribute has a nsuri but not a prefix, we must - #create a prefix and add a nsdecl - prefix = self.GENERATED_PREFIX % self._generated_prefix_ctr - self._generated_prefix_ctr = self._generated_prefix_ctr + 1 - name = prefix + ':' + name[1] - self._out.write(' xmlns:%s=%s' % (prefix, quoteattr(name[0]))) - self._current_context[name[0]] = prefix - else: - name = self._current_context[name[0]] + ":" + name[1] - self._out.write(' %s=' % name) - writeattr(self._out, value) - self._out.write('>') - - def content(self): - return self._out.getvalue() - - -ATTR_ENTITIES = { - '\n': ' ' # convert newlines into entities inside attributes - } - - -def writetext(stream, text, entities={}): - text = xml.sax.saxutils.escape(text, entities) - try: - stream.write(text) - except UnicodeError: - for c in text: - try: - stream.write(c) - except UnicodeError: - stream.write(u"&#%d;" % ord(c)) - -def writeattr(stream, text): - # copied from xml.sax.saxutils.writeattr added support for an - # additional entity mapping - countdouble = text.count('"') - entities = ATTR_ENTITIES.copy() - if countdouble: - countsingle = text.count("'") - if countdouble <= countsingle: - entities['"'] = """ - quote = '"' - else: - entities["'"] = "'" - quote = "'" - else: - quote = '"' - stream.write(quote) - writetext(stream, text, entities) - stream.write(quote)