Update bundled odfpy library for conversion of ODT files to version 0.9.2

This commit is contained in:
Kovid Goyal 2009-11-15 20:54:41 -07:00
parent 10566e98ad
commit c67818e604
6 changed files with 60 additions and 213 deletions

View File

@ -56,6 +56,8 @@ def _quoteattr(data, entities={}):
the optional entities parameter. The keys and values must all be
strings; each key will be replaced with its corresponding value.
"""
entities['\n']='
'
entities['\r']=''
data = _escape(data, entities)
if '"' in data:
if "'" in data:

View File

@ -17,7 +17,7 @@
#
# Contributor(s):
#
TOOLSVERSION = u"ODFPY/0.9.1dev"
TOOLSVERSION = u"ODFPY/0.9.2dev"
ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"
DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0"

View File

@ -185,7 +185,7 @@ class OpenDocument:
if self.fontfacedecls.hasChildNodes():
self.fontfacedecls.toXml(1, xml)
a = AutomaticStyles()
stylelist = self._used_auto_styles([self.styles, self.body])
stylelist = self._used_auto_styles([self.styles, self.automaticstyles, self.body])
if len(stylelist) > 0:
a.write_open_tag(1, xml)
for s in stylelist:
@ -233,9 +233,11 @@ class OpenDocument:
for styleref in ( (DRAWNS,u'style-name'),
(DRAWNS,u'text-style-name'),
(PRESENTATIONNS,u'style-name'),
(STYLENS,u'style-name'),
(STYLENS,u'data-style-name'),
(STYLENS,u'list-style-name'),
(STYLENS,u'page-layout-name'),
(STYLENS,u'style-name'),
(TABLENS,u'default-cell-style-name'),
(TABLENS,u'style-name'),
(TEXTNS,u'style-name') ):
if e.getAttrNS(styleref[0],styleref[1]):

View File

@ -50,3 +50,5 @@ def Radialgradient(**args):
def Stop(**args):
return Element(qname = (SVGNS,'stop'), **args)
def Title(**args):
return Element(qname = (SVGNS,'title'), **args)

View File

@ -446,6 +446,9 @@ def SequenceRef(**args):
def SheetName(**args):
return Element(qname = (TEXTNS,'sheet-name'), **args)
def SoftPageBreak(**args):
return Element(qname = (TEXTNS,'soft-page-break'), **args)
def SortKey(**args):
return Element(qname = (TEXTNS,'sort-key'), **args)

View File

@ -1,6 +1,6 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2006-2007 Søren Roug, European Environment Agency
# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
#
# This is free software. You may redistribute it under the terms
# of the Apache license and the GNU General Public License Version
@ -22,16 +22,11 @@
"""Class to show and manipulate user fields in odf documents."""
import sys
import time
import zipfile
import xml.sax
import xml.sax.handler
import xml.sax.saxutils
from odf.namespaces import OFFICENS, TEXTNS
from cStringIO import StringIO
from odf.text import UserFieldDecl
from odf.namespaces import OFFICENS
from odf.opendocument import load
OUTENCODING = "utf-8"
@ -60,16 +55,36 @@ class UserFields(object):
src ... source document name, file like object or None for stdin
dest ... destination document name, file like object or None for stdout
"""
self.src_file = src
self.dest_file = dest
self.document = None
def loaddoc(self):
if isinstance(self.src_file, basestring):
# src_file is a filename, check if it is a zip-file
if not zipfile.is_zipfile(self.src_file):
raise TypeError("%s is no odt file." % self.src_file)
elif self.src_file is None:
# use stdin if no file given
self.src_file = sys.stdin
self.document = load(self.src_file)
def savedoc(self):
# write output
if self.dest_file is None:
# use stdout if no filename given
self.document.save('-')
else:
self.document.save(self.dest_file)
def list_fields(self):
"""List (extract) all known user-fields.
Returns list of user-field names.
"""
return [x[0] for x in self.list_fields_and_values()]
@ -81,15 +96,21 @@ class UserFields(object):
Returns list of tuples (<field name>, <field type>, <value>).
"""
self.loaddoc()
found_fields = []
def _callback(field_name, value_type, value, attrs):
all_fields = self.document.getElementsByType(UserFieldDecl)
for f in all_fields:
value_type = f.getAttribute('valuetype')
if value_type == 'string':
value = f.getAttribute('stringvalue')
else:
value = f.getAttribute('value')
field_name = f.getAttribute('name')
if field_names is None or field_name in field_names:
found_fields.append((field_name.encode(OUTENCODING),
value_type.encode(OUTENCODING),
value.encode(OUTENCODING)))
return attrs
self._content_handler(_callback)
return found_fields
def list_values(self, field_names):
@ -133,199 +154,16 @@ class UserFields(object):
Returns None
"""
def _callback(field_name, value_type, value, attrs):
if field_name in data:
valattr = VALUE_TYPES.get(value_type)
attrs = dict(attrs.items())
# Take advantage that startElementNS can take a normal
# dict as attrs
attrs[valattr] = data[field_name]
return attrs
self._content_handler(_callback, write_file=True)
def _content_handler(self, callback_func, write_file=False):
"""Handle the content using the callback function and write result if
necessary.
callback_func ... function called for each field found in odf document
signature: field_name ... name of current field
value_type ... type of current field
value ... value of current field
attrs ... tuple of attrs of current field
returns: tuple or dict of attrs
write_file ... boolean telling wether write result to file
"""
class DevNull(object):
"""IO-object which behaves like /dev/null."""
def write(self, str):
pass
# get input
if isinstance(self.src_file, basestring):
# src_file is a filename, check if it is a zip-file
if not zipfile.is_zipfile(self.src_file):
raise TypeError("%s is no odt file." % self.src_file)
elif self.src_file is None:
# use stdin if no file given
self.src_file = sys.stdin
zin = zipfile.ZipFile(self.src_file, 'r')
content_xml = zin.read('content.xml')
# prepare output
if write_file:
output_io = StringIO()
if self.dest_file is None:
# use stdout if no filename given
self.dest_file = sys.stdout
zout = zipfile.ZipFile(self.dest_file, 'w')
else:
output_io = DevNull()
# parse input
odfs = ODFContentParser(callback_func, output_io)
parser = xml.sax.make_parser()
parser.setFeature(xml.sax.handler.feature_namespaces, 1)
parser.setContentHandler(odfs)
parser.parse(StringIO(content_xml))
# write output
if write_file:
# Loop through the input zipfile and copy the content to
# the output until we get to the content.xml. Then
# substitute.
for zinfo in zin.infolist():
if zinfo.filename == "content.xml":
# Write meta
zi = zipfile.ZipInfo("content.xml", time.localtime()[:6])
zi.compress_type = zipfile.ZIP_DEFLATED
zout.writestr(zi, odfs.content())
self.loaddoc()
all_fields = self.document.getElementsByType(UserFieldDecl)
for f in all_fields:
field_name = f.getAttribute('name')
if data.has_key(field_name):
value_type = f.getAttribute('valuetype')
value = data.get(field_name)
if value_type == 'string':
f.setAttribute('stringvalue', value)
else:
payload = zin.read(zinfo.filename)
zout.writestr(zinfo, payload)
zout.close()
zin.close()
f.setAttribute('value', value)
self.savedoc()
class ODFContentParser(xml.sax.saxutils.XMLGenerator):
def __init__(self, callback_func, out=None, encoding=OUTENCODING):
"""Constructor.
callback_func ... function called for each field found in odf document
signature: field_name ... name of current field
value_type ... type of current field
value ... value of current field
attrs ... tuple of attrs of current field
returns: tuple or dict of attrs
out ... file like object for output
encoding ... encoding for output
"""
self._callback_func = callback_func
xml.sax.saxutils.XMLGenerator.__init__(self, out, encoding)
def _qname(self, name):
"""Builds a qualified name from a (ns_url, localname) pair"""
if name[0]:
if name[0] == u'http://www.w3.org/XML/1998/namespace':
return u'xml' + ":" + name[1]
# The name is in a non-empty namespace
prefix = self._current_context[name[0]]
if prefix:
# If it is not the default namespace, prepend the prefix
return prefix + ":" + name[1]
# Return the unqualified name
return name[1]
def startElementNS(self, name, qname, attrs):
if name == (TEXTNS, u'user-field-decl'):
field_name = attrs.get((TEXTNS, u'name'))
value_type = attrs.get((OFFICENS, u'value-type'))
if value_type == 'string':
value = attrs.get((OFFICENS, u'string-value'))
else:
value = attrs.get((OFFICENS, u'value'))
attrs = self._callback_func(field_name, value_type, value, attrs)
self._startElementNS(name, qname, attrs)
def _startElementNS(self, name, qname, attrs):
# copy of xml.sax.saxutils.XMLGenerator.startElementNS
# necessary because we have to provide our own writeattr
# function which is called by this method
if name[0] is None:
name = name[1]
elif self._current_context[name[0]] is None:
# default namespace
name = name[1]
else:
name = self._current_context[name[0]] + ":" + name[1]
self._out.write('<' + name)
for k,v in self._undeclared_ns_maps:
if k is None:
self._out.write(' xmlns="%s"' % (v or ''))
else:
self._out.write(' xmlns:%s="%s"' % (k,v))
self._undeclared_ns_maps = []
for (name, value) in attrs.items():
if name[0] is None:
name = name[1]
elif self._current_context[name[0]] is None:
# default namespace
#If an attribute has a nsuri but not a prefix, we must
#create a prefix and add a nsdecl
prefix = self.GENERATED_PREFIX % self._generated_prefix_ctr
self._generated_prefix_ctr = self._generated_prefix_ctr + 1
name = prefix + ':' + name[1]
self._out.write(' xmlns:%s=%s' % (prefix, quoteattr(name[0])))
self._current_context[name[0]] = prefix
else:
name = self._current_context[name[0]] + ":" + name[1]
self._out.write(' %s=' % name)
writeattr(self._out, value)
self._out.write('>')
def content(self):
return self._out.getvalue()
ATTR_ENTITIES = {
'\n': '&#x0a;' # convert newlines into entities inside attributes
}
def writetext(stream, text, entities={}):
text = xml.sax.saxutils.escape(text, entities)
try:
stream.write(text)
except UnicodeError:
for c in text:
try:
stream.write(c)
except UnicodeError:
stream.write(u"&#%d;" % ord(c))
def writeattr(stream, text):
# copied from xml.sax.saxutils.writeattr added support for an
# additional entity mapping
countdouble = text.count('"')
entities = ATTR_ENTITIES.copy()
if countdouble:
countsingle = text.count("'")
if countdouble <= countsingle:
entities['"'] = "&quot;"
quote = '"'
else:
entities["'"] = "&apos;"
quote = "'"
else:
quote = '"'
stream.write(quote)
writetext(stream, text, entities)
stream.write(quote)