mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-11 09:13:57 -04:00
Update bundled odfpy library for conversion of ODT files to version 0.9.2
This commit is contained in:
parent
10566e98ad
commit
c67818e604
@ -56,6 +56,8 @@ def _quoteattr(data, entities={}):
|
|||||||
the optional entities parameter. The keys and values must all be
|
the optional entities parameter. The keys and values must all be
|
||||||
strings; each key will be replaced with its corresponding value.
|
strings; each key will be replaced with its corresponding value.
|
||||||
"""
|
"""
|
||||||
|
entities['\n']=' '
|
||||||
|
entities['\r']=''
|
||||||
data = _escape(data, entities)
|
data = _escape(data, entities)
|
||||||
if '"' in data:
|
if '"' in data:
|
||||||
if "'" in data:
|
if "'" in data:
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
#
|
#
|
||||||
# Contributor(s):
|
# Contributor(s):
|
||||||
#
|
#
|
||||||
TOOLSVERSION = u"ODFPY/0.9.1dev"
|
TOOLSVERSION = u"ODFPY/0.9.2dev"
|
||||||
|
|
||||||
ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"
|
ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"
|
||||||
DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0"
|
DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0"
|
||||||
|
@ -185,7 +185,7 @@ class OpenDocument:
|
|||||||
if self.fontfacedecls.hasChildNodes():
|
if self.fontfacedecls.hasChildNodes():
|
||||||
self.fontfacedecls.toXml(1, xml)
|
self.fontfacedecls.toXml(1, xml)
|
||||||
a = AutomaticStyles()
|
a = AutomaticStyles()
|
||||||
stylelist = self._used_auto_styles([self.styles, self.body])
|
stylelist = self._used_auto_styles([self.styles, self.automaticstyles, self.body])
|
||||||
if len(stylelist) > 0:
|
if len(stylelist) > 0:
|
||||||
a.write_open_tag(1, xml)
|
a.write_open_tag(1, xml)
|
||||||
for s in stylelist:
|
for s in stylelist:
|
||||||
@ -233,9 +233,11 @@ class OpenDocument:
|
|||||||
for styleref in ( (DRAWNS,u'style-name'),
|
for styleref in ( (DRAWNS,u'style-name'),
|
||||||
(DRAWNS,u'text-style-name'),
|
(DRAWNS,u'text-style-name'),
|
||||||
(PRESENTATIONNS,u'style-name'),
|
(PRESENTATIONNS,u'style-name'),
|
||||||
(STYLENS,u'style-name'),
|
(STYLENS,u'data-style-name'),
|
||||||
(STYLENS,u'list-style-name'),
|
(STYLENS,u'list-style-name'),
|
||||||
(STYLENS,u'page-layout-name'),
|
(STYLENS,u'page-layout-name'),
|
||||||
|
(STYLENS,u'style-name'),
|
||||||
|
(TABLENS,u'default-cell-style-name'),
|
||||||
(TABLENS,u'style-name'),
|
(TABLENS,u'style-name'),
|
||||||
(TEXTNS,u'style-name') ):
|
(TEXTNS,u'style-name') ):
|
||||||
if e.getAttrNS(styleref[0],styleref[1]):
|
if e.getAttrNS(styleref[0],styleref[1]):
|
||||||
|
@ -50,3 +50,5 @@ def Radialgradient(**args):
|
|||||||
def Stop(**args):
|
def Stop(**args):
|
||||||
return Element(qname = (SVGNS,'stop'), **args)
|
return Element(qname = (SVGNS,'stop'), **args)
|
||||||
|
|
||||||
|
def Title(**args):
|
||||||
|
return Element(qname = (SVGNS,'title'), **args)
|
||||||
|
@ -446,6 +446,9 @@ def SequenceRef(**args):
|
|||||||
def SheetName(**args):
|
def SheetName(**args):
|
||||||
return Element(qname = (TEXTNS,'sheet-name'), **args)
|
return Element(qname = (TEXTNS,'sheet-name'), **args)
|
||||||
|
|
||||||
|
def SoftPageBreak(**args):
|
||||||
|
return Element(qname = (TEXTNS,'soft-page-break'), **args)
|
||||||
|
|
||||||
def SortKey(**args):
|
def SortKey(**args):
|
||||||
return Element(qname = (TEXTNS,'sort-key'), **args)
|
return Element(qname = (TEXTNS,'sort-key'), **args)
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
# Copyright (C) 2006-2007 Søren Roug, European Environment Agency
|
# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
|
||||||
#
|
#
|
||||||
# This is free software. You may redistribute it under the terms
|
# This is free software. You may redistribute it under the terms
|
||||||
# of the Apache license and the GNU General Public License Version
|
# of the Apache license and the GNU General Public License Version
|
||||||
@ -22,16 +22,11 @@
|
|||||||
"""Class to show and manipulate user fields in odf documents."""
|
"""Class to show and manipulate user fields in odf documents."""
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import time
|
|
||||||
import zipfile
|
import zipfile
|
||||||
|
|
||||||
import xml.sax
|
from odf.text import UserFieldDecl
|
||||||
import xml.sax.handler
|
from odf.namespaces import OFFICENS
|
||||||
import xml.sax.saxutils
|
from odf.opendocument import load
|
||||||
|
|
||||||
from odf.namespaces import OFFICENS, TEXTNS
|
|
||||||
|
|
||||||
from cStringIO import StringIO
|
|
||||||
|
|
||||||
OUTENCODING = "utf-8"
|
OUTENCODING = "utf-8"
|
||||||
|
|
||||||
@ -64,6 +59,26 @@ class UserFields(object):
|
|||||||
"""
|
"""
|
||||||
self.src_file = src
|
self.src_file = src
|
||||||
self.dest_file = dest
|
self.dest_file = dest
|
||||||
|
self.document = None
|
||||||
|
|
||||||
|
def loaddoc(self):
|
||||||
|
if isinstance(self.src_file, basestring):
|
||||||
|
# src_file is a filename, check if it is a zip-file
|
||||||
|
if not zipfile.is_zipfile(self.src_file):
|
||||||
|
raise TypeError("%s is no odt file." % self.src_file)
|
||||||
|
elif self.src_file is None:
|
||||||
|
# use stdin if no file given
|
||||||
|
self.src_file = sys.stdin
|
||||||
|
|
||||||
|
self.document = load(self.src_file)
|
||||||
|
|
||||||
|
def savedoc(self):
|
||||||
|
# write output
|
||||||
|
if self.dest_file is None:
|
||||||
|
# use stdout if no filename given
|
||||||
|
self.document.save('-')
|
||||||
|
else:
|
||||||
|
self.document.save(self.dest_file)
|
||||||
|
|
||||||
def list_fields(self):
|
def list_fields(self):
|
||||||
"""List (extract) all known user-fields.
|
"""List (extract) all known user-fields.
|
||||||
@ -81,15 +96,21 @@ class UserFields(object):
|
|||||||
Returns list of tuples (<field name>, <field type>, <value>).
|
Returns list of tuples (<field name>, <field type>, <value>).
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
self.loaddoc()
|
||||||
found_fields = []
|
found_fields = []
|
||||||
def _callback(field_name, value_type, value, attrs):
|
all_fields = self.document.getElementsByType(UserFieldDecl)
|
||||||
|
for f in all_fields:
|
||||||
|
value_type = f.getAttribute('valuetype')
|
||||||
|
if value_type == 'string':
|
||||||
|
value = f.getAttribute('stringvalue')
|
||||||
|
else:
|
||||||
|
value = f.getAttribute('value')
|
||||||
|
field_name = f.getAttribute('name')
|
||||||
|
|
||||||
if field_names is None or field_name in field_names:
|
if field_names is None or field_name in field_names:
|
||||||
found_fields.append((field_name.encode(OUTENCODING),
|
found_fields.append((field_name.encode(OUTENCODING),
|
||||||
value_type.encode(OUTENCODING),
|
value_type.encode(OUTENCODING),
|
||||||
value.encode(OUTENCODING)))
|
value.encode(OUTENCODING)))
|
||||||
return attrs
|
|
||||||
|
|
||||||
self._content_handler(_callback)
|
|
||||||
return found_fields
|
return found_fields
|
||||||
|
|
||||||
def list_values(self, field_names):
|
def list_values(self, field_names):
|
||||||
@ -133,199 +154,16 @@ class UserFields(object):
|
|||||||
Returns None
|
Returns None
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def _callback(field_name, value_type, value, attrs):
|
self.loaddoc()
|
||||||
if field_name in data:
|
all_fields = self.document.getElementsByType(UserFieldDecl)
|
||||||
valattr = VALUE_TYPES.get(value_type)
|
for f in all_fields:
|
||||||
attrs = dict(attrs.items())
|
field_name = f.getAttribute('name')
|
||||||
# Take advantage that startElementNS can take a normal
|
if data.has_key(field_name):
|
||||||
# dict as attrs
|
value_type = f.getAttribute('valuetype')
|
||||||
attrs[valattr] = data[field_name]
|
value = data.get(field_name)
|
||||||
return attrs
|
|
||||||
self._content_handler(_callback, write_file=True)
|
|
||||||
|
|
||||||
def _content_handler(self, callback_func, write_file=False):
|
|
||||||
"""Handle the content using the callback function and write result if
|
|
||||||
necessary.
|
|
||||||
|
|
||||||
callback_func ... function called for each field found in odf document
|
|
||||||
signature: field_name ... name of current field
|
|
||||||
value_type ... type of current field
|
|
||||||
value ... value of current field
|
|
||||||
attrs ... tuple of attrs of current field
|
|
||||||
returns: tuple or dict of attrs
|
|
||||||
write_file ... boolean telling wether write result to file
|
|
||||||
|
|
||||||
"""
|
|
||||||
class DevNull(object):
|
|
||||||
"""IO-object which behaves like /dev/null."""
|
|
||||||
def write(self, str):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# get input
|
|
||||||
if isinstance(self.src_file, basestring):
|
|
||||||
# src_file is a filename, check if it is a zip-file
|
|
||||||
if not zipfile.is_zipfile(self.src_file):
|
|
||||||
raise TypeError("%s is no odt file." % self.src_file)
|
|
||||||
elif self.src_file is None:
|
|
||||||
# use stdin if no file given
|
|
||||||
self.src_file = sys.stdin
|
|
||||||
|
|
||||||
zin = zipfile.ZipFile(self.src_file, 'r')
|
|
||||||
content_xml = zin.read('content.xml')
|
|
||||||
|
|
||||||
# prepare output
|
|
||||||
if write_file:
|
|
||||||
output_io = StringIO()
|
|
||||||
if self.dest_file is None:
|
|
||||||
# use stdout if no filename given
|
|
||||||
self.dest_file = sys.stdout
|
|
||||||
zout = zipfile.ZipFile(self.dest_file, 'w')
|
|
||||||
else:
|
|
||||||
output_io = DevNull()
|
|
||||||
|
|
||||||
|
|
||||||
# parse input
|
|
||||||
odfs = ODFContentParser(callback_func, output_io)
|
|
||||||
parser = xml.sax.make_parser()
|
|
||||||
parser.setFeature(xml.sax.handler.feature_namespaces, 1)
|
|
||||||
parser.setContentHandler(odfs)
|
|
||||||
parser.parse(StringIO(content_xml))
|
|
||||||
|
|
||||||
# write output
|
|
||||||
if write_file:
|
|
||||||
# Loop through the input zipfile and copy the content to
|
|
||||||
# the output until we get to the content.xml. Then
|
|
||||||
# substitute.
|
|
||||||
for zinfo in zin.infolist():
|
|
||||||
if zinfo.filename == "content.xml":
|
|
||||||
# Write meta
|
|
||||||
zi = zipfile.ZipInfo("content.xml", time.localtime()[:6])
|
|
||||||
zi.compress_type = zipfile.ZIP_DEFLATED
|
|
||||||
zout.writestr(zi, odfs.content())
|
|
||||||
else:
|
|
||||||
payload = zin.read(zinfo.filename)
|
|
||||||
zout.writestr(zinfo, payload)
|
|
||||||
zout.close()
|
|
||||||
zin.close()
|
|
||||||
|
|
||||||
|
|
||||||
class ODFContentParser(xml.sax.saxutils.XMLGenerator):
|
|
||||||
|
|
||||||
def __init__(self, callback_func, out=None, encoding=OUTENCODING):
|
|
||||||
"""Constructor.
|
|
||||||
|
|
||||||
callback_func ... function called for each field found in odf document
|
|
||||||
signature: field_name ... name of current field
|
|
||||||
value_type ... type of current field
|
|
||||||
value ... value of current field
|
|
||||||
attrs ... tuple of attrs of current field
|
|
||||||
returns: tuple or dict of attrs
|
|
||||||
out ... file like object for output
|
|
||||||
encoding ... encoding for output
|
|
||||||
|
|
||||||
"""
|
|
||||||
self._callback_func = callback_func
|
|
||||||
xml.sax.saxutils.XMLGenerator.__init__(self, out, encoding)
|
|
||||||
|
|
||||||
def _qname(self, name):
|
|
||||||
"""Builds a qualified name from a (ns_url, localname) pair"""
|
|
||||||
if name[0]:
|
|
||||||
if name[0] == u'http://www.w3.org/XML/1998/namespace':
|
|
||||||
return u'xml' + ":" + name[1]
|
|
||||||
# The name is in a non-empty namespace
|
|
||||||
prefix = self._current_context[name[0]]
|
|
||||||
if prefix:
|
|
||||||
# If it is not the default namespace, prepend the prefix
|
|
||||||
return prefix + ":" + name[1]
|
|
||||||
# Return the unqualified name
|
|
||||||
return name[1]
|
|
||||||
|
|
||||||
def startElementNS(self, name, qname, attrs):
|
|
||||||
if name == (TEXTNS, u'user-field-decl'):
|
|
||||||
field_name = attrs.get((TEXTNS, u'name'))
|
|
||||||
value_type = attrs.get((OFFICENS, u'value-type'))
|
|
||||||
if value_type == 'string':
|
if value_type == 'string':
|
||||||
value = attrs.get((OFFICENS, u'string-value'))
|
f.setAttribute('stringvalue', value)
|
||||||
else:
|
else:
|
||||||
value = attrs.get((OFFICENS, u'value'))
|
f.setAttribute('value', value)
|
||||||
|
self.savedoc()
|
||||||
|
|
||||||
attrs = self._callback_func(field_name, value_type, value, attrs)
|
|
||||||
|
|
||||||
self._startElementNS(name, qname, attrs)
|
|
||||||
|
|
||||||
def _startElementNS(self, name, qname, attrs):
|
|
||||||
# copy of xml.sax.saxutils.XMLGenerator.startElementNS
|
|
||||||
# necessary because we have to provide our own writeattr
|
|
||||||
# function which is called by this method
|
|
||||||
if name[0] is None:
|
|
||||||
name = name[1]
|
|
||||||
elif self._current_context[name[0]] is None:
|
|
||||||
# default namespace
|
|
||||||
name = name[1]
|
|
||||||
else:
|
|
||||||
name = self._current_context[name[0]] + ":" + name[1]
|
|
||||||
self._out.write('<' + name)
|
|
||||||
|
|
||||||
for k,v in self._undeclared_ns_maps:
|
|
||||||
if k is None:
|
|
||||||
self._out.write(' xmlns="%s"' % (v or ''))
|
|
||||||
else:
|
|
||||||
self._out.write(' xmlns:%s="%s"' % (k,v))
|
|
||||||
self._undeclared_ns_maps = []
|
|
||||||
|
|
||||||
for (name, value) in attrs.items():
|
|
||||||
if name[0] is None:
|
|
||||||
name = name[1]
|
|
||||||
elif self._current_context[name[0]] is None:
|
|
||||||
# default namespace
|
|
||||||
#If an attribute has a nsuri but not a prefix, we must
|
|
||||||
#create a prefix and add a nsdecl
|
|
||||||
prefix = self.GENERATED_PREFIX % self._generated_prefix_ctr
|
|
||||||
self._generated_prefix_ctr = self._generated_prefix_ctr + 1
|
|
||||||
name = prefix + ':' + name[1]
|
|
||||||
self._out.write(' xmlns:%s=%s' % (prefix, quoteattr(name[0])))
|
|
||||||
self._current_context[name[0]] = prefix
|
|
||||||
else:
|
|
||||||
name = self._current_context[name[0]] + ":" + name[1]
|
|
||||||
self._out.write(' %s=' % name)
|
|
||||||
writeattr(self._out, value)
|
|
||||||
self._out.write('>')
|
|
||||||
|
|
||||||
def content(self):
|
|
||||||
return self._out.getvalue()
|
|
||||||
|
|
||||||
|
|
||||||
ATTR_ENTITIES = {
|
|
||||||
'\n': '
' # convert newlines into entities inside attributes
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def writetext(stream, text, entities={}):
|
|
||||||
text = xml.sax.saxutils.escape(text, entities)
|
|
||||||
try:
|
|
||||||
stream.write(text)
|
|
||||||
except UnicodeError:
|
|
||||||
for c in text:
|
|
||||||
try:
|
|
||||||
stream.write(c)
|
|
||||||
except UnicodeError:
|
|
||||||
stream.write(u"&#%d;" % ord(c))
|
|
||||||
|
|
||||||
def writeattr(stream, text):
|
|
||||||
# copied from xml.sax.saxutils.writeattr added support for an
|
|
||||||
# additional entity mapping
|
|
||||||
countdouble = text.count('"')
|
|
||||||
entities = ATTR_ENTITIES.copy()
|
|
||||||
if countdouble:
|
|
||||||
countsingle = text.count("'")
|
|
||||||
if countdouble <= countsingle:
|
|
||||||
entities['"'] = """
|
|
||||||
quote = '"'
|
|
||||||
else:
|
|
||||||
entities["'"] = "'"
|
|
||||||
quote = "'"
|
|
||||||
else:
|
|
||||||
quote = '"'
|
|
||||||
stream.write(quote)
|
|
||||||
writetext(stream, text, entities)
|
|
||||||
stream.write(quote)
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user