#!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2006-2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): Michael Howitz, gocept gmbh & co. kg # # $Id: userfield.py 447 2008-07-10 20:01:30Z roug $ """Class to show and manipulate user fields in odf documents.""" import sys import time import zipfile import xml.sax import xml.sax.handler import xml.sax.saxutils from odf.namespaces import OFFICENS, TEXTNS try: from cStringIO import StringIO except ImportError: from StringIO import StringIO OUTENCODING = "utf-8" # OpenDocument v.1.0 section 6.7.1 VALUE_TYPES = { 'float': (OFFICENS, u'value'), 'percentage': (OFFICENS, u'value'), 'currency': (OFFICENS, u'value'), 'date': (OFFICENS, u'date-value'), 'time': (OFFICENS, u'time-value'), 'boolean': (OFFICENS, u'boolean-value'), 'string': (OFFICENS, u'string-value'), } class UserFields(object): """List, view and manipulate user fields.""" # these attributes can be a filename or a file like object src_file = None dest_file = None def __init__(self, src=None, dest=None): """Constructor src ... source document name, file like object or None for stdin dest ... destination document name, file like object or None for stdout """ self.src_file = src self.dest_file = dest def list_fields(self): """List (extract) all known user-fields. Returns list of user-field names. """ return [x[0] for x in self.list_fields_and_values()] def list_fields_and_values(self, field_names=None): """List (extract) user-fields with type and value. field_names ... list of field names to show or None for all. Returns list of tuples (, , ). """ found_fields = [] def _callback(field_name, value_type, value, attrs): if field_names is None or field_name in field_names: found_fields.append((field_name.encode(OUTENCODING), value_type.encode(OUTENCODING), value.encode(OUTENCODING))) return attrs self._content_handler(_callback) return found_fields def list_values(self, field_names): """Extract the contents of given field names from the file. field_names ... list of field names Returns list of field values. """ return [x[2] for x in self.list_fields_and_values(field_names)] def get(self, field_name): """Extract the contents of this field from the file. Returns field value or None if field does not exist. """ values = self.list_values([field_name]) if not values: return None return values[0] def get_type_and_value(self, field_name): """Extract the type and contents of this field from the file. Returns tuple (, ) or None if field does not exist. """ fields = self.list_fields_and_values([field_name]) if not fields: return None field_name, value_type, value = fields[0] return value_type, value def update(self, data): """Set the value of user fields. The field types will be the same. data ... dict, with field name as key, field value as value Returns None """ def _callback(field_name, value_type, value, attrs): if field_name in data: valattr = VALUE_TYPES.get(value_type) attrs = dict(attrs.items()) # Take advantage that startElementNS can take a normal # dict as attrs attrs[valattr] = data[field_name] return attrs self._content_handler(_callback, write_file=True) def _content_handler(self, callback_func, write_file=False): """Handle the content using the callback function and write result if necessary. callback_func ... function called for each field found in odf document signature: field_name ... name of current field value_type ... type of current field value ... value of current field attrs ... tuple of attrs of current field returns: tuple or dict of attrs write_file ... boolean telling wether write result to file """ class DevNull(object): """IO-object which behaves like /dev/null.""" def write(self, str): pass # get input if isinstance(self.src_file, basestring): # src_file is a filename, check if it is a zip-file if not zipfile.is_zipfile(self.src_file): raise TypeError("%s is no odt file." % self.src_file) elif self.src_file is None: # use stdin if no file given self.src_file = sys.stdin zin = zipfile.ZipFile(self.src_file, 'r') content_xml = zin.read('content.xml') # prepare output if write_file: output_io = StringIO() if self.dest_file is None: # use stdout if no filename given self.dest_file = sys.stdout zout = zipfile.ZipFile(self.dest_file, 'w') else: output_io = DevNull() # parse input odfs = ODFContentParser(callback_func, output_io) parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_namespaces, 1) parser.setContentHandler(odfs) parser.parse(StringIO(content_xml)) # write output if write_file: # Loop through the input zipfile and copy the content to # the output until we get to the content.xml. Then # substitute. for zinfo in zin.infolist(): if zinfo.filename == "content.xml": # Write meta zi = zipfile.ZipInfo("content.xml", time.localtime()[:6]) zi.compress_type = zipfile.ZIP_DEFLATED zout.writestr(zi, odfs.content()) else: payload = zin.read(zinfo.filename) zout.writestr(zinfo, payload) zout.close() zin.close() class ODFContentParser(xml.sax.saxutils.XMLGenerator): def __init__(self, callback_func, out=None, encoding=OUTENCODING): """Constructor. callback_func ... function called for each field found in odf document signature: field_name ... name of current field value_type ... type of current field value ... value of current field attrs ... tuple of attrs of current field returns: tuple or dict of attrs out ... file like object for output encoding ... encoding for output """ self._callback_func = callback_func xml.sax.saxutils.XMLGenerator.__init__(self, out, encoding) def startElementNS(self, name, qname, attrs): if name == (TEXTNS, u'user-field-decl'): field_name = attrs.get((TEXTNS, u'name')) value_type = attrs.get((OFFICENS, u'value-type')) if value_type == 'string': value = attrs.get((OFFICENS, u'string-value')) else: value = attrs.get((OFFICENS, u'value')) attrs = self._callback_func(field_name, value_type, value, attrs) self._startElementNS(name, qname, attrs) def _startElementNS(self, name, qname, attrs): # copy of xml.sax.saxutils.XMLGenerator.startElementNS # necessary because we have to provide our own writeattr # function which is called by this method if name[0] is None: name = name[1] elif self._current_context[name[0]] is None: # default namespace name = name[1] else: name = self._current_context[name[0]] + ":" + name[1] self._out.write('<' + name) for k,v in self._undeclared_ns_maps: if k is None: self._out.write(' xmlns="%s"' % (v or '')) else: self._out.write(' xmlns:%s="%s"' % (k,v)) self._undeclared_ns_maps = [] for (name, value) in attrs.items(): if name[0] is None: name = name[1] elif self._current_context[name[0]] is None: # default namespace #If an attribute has a nsuri but not a prefix, we must #create a prefix and add a nsdecl prefix = self.GENERATED_PREFIX % self._generated_prefix_ctr self._generated_prefix_ctr = self._generated_prefix_ctr + 1 name = prefix + ':' + name[1] self._out.write(' xmlns:%s=%s' % (prefix, quoteattr(name[0]))) self._current_context[name[0]] = prefix else: name = self._current_context[name[0]] + ":" + name[1] self._out.write(' %s=' % name) writeattr(self._out, value) self._out.write('>') def content(self): return self._out.getvalue() ATTR_ENTITIES = { '\n': ' ' # convert newlines into entities inside attributes } def writetext(stream, text, entities={}): text = xml.sax.saxutils.escape(text, entities) try: stream.write(text) except UnicodeError: for c in text: try: stream.write(c) except UnicodeError: stream.write(u"&#%d;" % ord(c)) def writeattr(stream, text): # copied from xml.sax.saxutils.writeattr added support for an # additional entity mapping countdouble = text.count('"') entities = ATTR_ENTITIES.copy() if countdouble: countsingle = text.count("'") if countdouble <= countsingle: entities['"'] = """ quote = '"' else: entities["'"] = "'" quote = "'" else: quote = '"' stream.write(quote) writetext(stream, text, entities) stream.write(quote)