mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
ODT input: Update odfpy library to latest version, adds support for bookmarks
This commit is contained in:
parent
07d1ca7ec3
commit
02562da2a9
@ -1,5 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2006-2008 Søren Roug, European Environment Agency
|
||||
# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
@ -40,6 +40,9 @@ def cnv_boolean(attribute, arg, element):
|
||||
|
||||
# Potentially accept color values
|
||||
def cnv_color(attribute, arg, element):
|
||||
""" A RGB color in conformance with §5.9.11 of [XSL], that is a RGB color in notation “#rrggbb”, where
|
||||
rr, gg and bb are 8-bit hexadecimal digits.
|
||||
"""
|
||||
return str(arg)
|
||||
|
||||
def cnv_configtype(attribute, arg, element):
|
||||
@ -55,9 +58,15 @@ def cnv_data_source_has_labels(attribute, arg, element):
|
||||
|
||||
# Understand different date formats
|
||||
def cnv_date(attribute, arg, element):
|
||||
""" A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime
|
||||
value.
|
||||
"""
|
||||
return str(arg)
|
||||
|
||||
def cnv_dateTime(attribute, arg, element):
|
||||
""" A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime
|
||||
value.
|
||||
"""
|
||||
return str(arg)
|
||||
|
||||
def cnv_double(attribute, arg, element):
|
||||
@ -67,11 +76,31 @@ def cnv_duration(attribute, arg, element):
|
||||
return str(arg)
|
||||
|
||||
def cnv_family(attribute, arg, element):
|
||||
""" A style family """
|
||||
if str(arg) not in ("text", "paragraph", "section", "ruby", "table", "table-column", "table-row", "table-cell",
|
||||
"graphic", "presentation", "drawing-page", "chart"):
|
||||
raise ValueError, "'%s' not allowed" % str(arg)
|
||||
return str(arg)
|
||||
|
||||
def __save_prefix(attribute, arg, element):
|
||||
prefix = arg.split(':',1)[0]
|
||||
if prefix == arg:
|
||||
return unicode(arg)
|
||||
namespace = element.get_knownns(prefix)
|
||||
if namespace is None:
|
||||
#raise ValueError, "'%s' is an unknown prefix" % str(prefix)
|
||||
return unicode(arg)
|
||||
p = element.get_nsprefix(namespace)
|
||||
return unicode(arg)
|
||||
|
||||
def cnv_formula(attribute, arg, element):
|
||||
""" A string containing a formula. Formulas do not have a predefined syntax, but the string should
|
||||
begin with a namespace prefix, followed by a “:” (COLON, U+003A) separator, followed by the text
|
||||
of the formula. The namespace bound to the prefix determines the syntax and semantics of the
|
||||
formula.
|
||||
"""
|
||||
return __save_prefix(attribute, arg, element)
|
||||
|
||||
def cnv_ID(attribute, arg, element):
|
||||
return str(arg)
|
||||
|
||||
@ -89,6 +118,9 @@ def cnv_legend_position(attribute, arg, element):
|
||||
pattern_length = re.compile(r'-?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px))')
|
||||
|
||||
def cnv_length(attribute, arg, element):
|
||||
""" A (positive or negative) physical length, consisting of magnitude and unit, in conformance with the
|
||||
Units of Measure defined in §5.9.13 of [XSL].
|
||||
"""
|
||||
global pattern_length
|
||||
if not pattern_length.match(arg):
|
||||
raise ValueError, "'%s' is not a valid length" % arg
|
||||
@ -120,12 +152,12 @@ def cnv_namespacedToken(attribute, arg, element):
|
||||
|
||||
if not pattern_namespacedToken.match(arg):
|
||||
raise ValueError, "'%s' is not a valid namespaced token" % arg
|
||||
return arg
|
||||
return __save_prefix(attribute, arg, element)
|
||||
|
||||
# Must accept string as argument
|
||||
# NCName is defined in http://www.w3.org/TR/REC-xml-names/#NT-NCName
|
||||
# Essentially an XML name minus ':'
|
||||
def cnv_NCName(attribute, arg, element):
|
||||
""" NCName is defined in http://www.w3.org/TR/REC-xml-names/#NT-NCName
|
||||
Essentially an XML name minus ':'
|
||||
"""
|
||||
if type(arg) in types.StringTypes:
|
||||
return make_NCName(arg)
|
||||
else:
|
||||
@ -226,6 +258,7 @@ attrconverters = {
|
||||
((ANIMNS,u'name'), None): cnv_string,
|
||||
((ANIMNS,u'sub-item'), None): cnv_string,
|
||||
((ANIMNS,u'value'), None): cnv_string,
|
||||
# ((DBNS,u'type'), None): cnv_namespacedToken,
|
||||
((CHARTNS,u'attached-axis'), None): cnv_string,
|
||||
((CHARTNS,u'class'), (CHARTNS,u'grid')): cnv_major_minor,
|
||||
((CHARTNS,u'class'), None): cnv_namespacedToken,
|
||||
@ -288,7 +321,7 @@ attrconverters = {
|
||||
((CHARTNS,u'values-cell-range-address'), None): cnv_string,
|
||||
((CHARTNS,u'vertical'), None): cnv_boolean,
|
||||
((CHARTNS,u'visible'), None): cnv_boolean,
|
||||
((CONFIGNS,u'name'), None): cnv_string,
|
||||
((CONFIGNS,u'name'), None): cnv_formula,
|
||||
((CONFIGNS,u'type'), None): cnv_configtype,
|
||||
((DR3DNS,u'ambient-color'), None): cnv_string,
|
||||
((DR3DNS,u'back-scale'), None): cnv_string,
|
||||
@ -369,11 +402,11 @@ attrconverters = {
|
||||
((DRAWNS,u'decimal-places'), None): cnv_string,
|
||||
((DRAWNS,u'display'), None): cnv_string,
|
||||
((DRAWNS,u'display-name'), None): cnv_string,
|
||||
((DRAWNS,u'distance'), None): cnv_string,
|
||||
((DRAWNS,u'distance'), None): cnv_lengthorpercent,
|
||||
((DRAWNS,u'dots1'), None): cnv_integer,
|
||||
((DRAWNS,u'dots1-length'), None): cnv_length,
|
||||
((DRAWNS,u'dots1-length'), None): cnv_lengthorpercent,
|
||||
((DRAWNS,u'dots2'), None): cnv_integer,
|
||||
((DRAWNS,u'dots2-length'), None): cnv_length,
|
||||
((DRAWNS,u'dots2-length'), None): cnv_lengthorpercent,
|
||||
((DRAWNS,u'end-angle'), None): cnv_double,
|
||||
((DRAWNS,u'end'), None): cnv_string,
|
||||
((DRAWNS,u'end-color'), None): cnv_string,
|
||||
@ -383,7 +416,7 @@ attrconverters = {
|
||||
((DRAWNS,u'end-line-spacing-horizontal'), None): cnv_string,
|
||||
((DRAWNS,u'end-line-spacing-vertical'), None): cnv_string,
|
||||
((DRAWNS,u'end-shape'), None): cnv_IDREF,
|
||||
((DRAWNS,u'engine'), None): cnv_string,
|
||||
((DRAWNS,u'engine'), None): cnv_namespacedToken,
|
||||
((DRAWNS,u'enhanced-path'), None): cnv_string,
|
||||
((DRAWNS,u'escape-direction'), None): cnv_string,
|
||||
((DRAWNS,u'extrusion-allowed'), None): cnv_boolean,
|
||||
@ -604,7 +637,7 @@ attrconverters = {
|
||||
((FORMNS,u'button-type'), None): cnv_string,
|
||||
((FORMNS,u'command'), None): cnv_string,
|
||||
((FORMNS,u'command-type'), None): cnv_string,
|
||||
((FORMNS,u'control-implementation'), None): cnv_string,
|
||||
((FORMNS,u'control-implementation'), None): cnv_namespacedToken,
|
||||
((FORMNS,u'convert-empty-to-null'), None): cnv_boolean,
|
||||
((FORMNS,u'current-selected'), None): cnv_boolean,
|
||||
((FORMNS,u'current-state'), None): cnv_string,
|
||||
@ -800,8 +833,8 @@ attrconverters = {
|
||||
((PRESENTATIONNS,u'user-transformed'), None): cnv_boolean,
|
||||
((PRESENTATIONNS,u'verb'), None): cnv_nonNegativeInteger,
|
||||
((PRESENTATIONNS,u'visibility'), None): cnv_string,
|
||||
((SCRIPTNS,u'event-name'), None): cnv_string,
|
||||
((SCRIPTNS,u'language'), None): cnv_string,
|
||||
((SCRIPTNS,u'event-name'), None): cnv_formula,
|
||||
((SCRIPTNS,u'language'), None): cnv_formula,
|
||||
((SCRIPTNS,u'macro-name'), None): cnv_string,
|
||||
((SMILNS,u'accelerate'), None): cnv_double,
|
||||
((SMILNS,u'accumulate'), None): cnv_string,
|
||||
@ -1087,7 +1120,7 @@ attrconverters = {
|
||||
((SVGNS,u'y2'), None): cnv_lengthorpercent,
|
||||
((TABLENS,u'acceptance-state'), None): cnv_string,
|
||||
((TABLENS,u'add-empty-lines'), None): cnv_boolean,
|
||||
((TABLENS,u'algorithm'), None): cnv_string,
|
||||
((TABLENS,u'algorithm'), None): cnv_formula,
|
||||
((TABLENS,u'align'), None): cnv_string,
|
||||
((TABLENS,u'allow-empty-cell'), None): cnv_boolean,
|
||||
((TABLENS,u'application-data'), None): cnv_string,
|
||||
@ -1106,7 +1139,7 @@ attrconverters = {
|
||||
((TABLENS,u'cell-range'), None): cnv_string,
|
||||
((TABLENS,u'column'), None): cnv_integer,
|
||||
((TABLENS,u'comment'), None): cnv_string,
|
||||
((TABLENS,u'condition'), None): cnv_string,
|
||||
((TABLENS,u'condition'), None): cnv_formula,
|
||||
((TABLENS,u'condition-source'), None): cnv_string,
|
||||
((TABLENS,u'condition-source-range-address'), None): cnv_string,
|
||||
((TABLENS,u'contains-error'), None): cnv_boolean,
|
||||
@ -1144,13 +1177,13 @@ attrconverters = {
|
||||
((TABLENS,u'end-x'), None): cnv_length,
|
||||
((TABLENS,u'end-y'), None): cnv_length,
|
||||
((TABLENS,u'execute'), None): cnv_boolean,
|
||||
((TABLENS,u'expression'), None): cnv_string,
|
||||
((TABLENS,u'expression'), None): cnv_formula,
|
||||
((TABLENS,u'field-name'), None): cnv_string,
|
||||
((TABLENS,u'field-number'), None): cnv_nonNegativeInteger,
|
||||
((TABLENS,u'field-number'), None): cnv_string,
|
||||
((TABLENS,u'filter-name'), None): cnv_string,
|
||||
((TABLENS,u'filter-options'), None): cnv_string,
|
||||
((TABLENS,u'formula'), None): cnv_string,
|
||||
((TABLENS,u'formula'), None): cnv_formula,
|
||||
((TABLENS,u'function'), None): cnv_string,
|
||||
((TABLENS,u'function'), None): cnv_string,
|
||||
((TABLENS,u'grand-total'), None): cnv_string,
|
||||
@ -1290,7 +1323,7 @@ attrconverters = {
|
||||
((TEXTNS,u'combine-entries-with-pp'), None): cnv_boolean,
|
||||
((TEXTNS,u'comma-separated'), None): cnv_boolean,
|
||||
((TEXTNS,u'cond-style-name'), None): cnv_StyleNameRef,
|
||||
((TEXTNS,u'condition'), None): cnv_string,
|
||||
((TEXTNS,u'condition'), None): cnv_formula,
|
||||
((TEXTNS,u'connection-name'), None): cnv_string,
|
||||
((TEXTNS,u'consecutive-numbering'), None): cnv_boolean,
|
||||
((TEXTNS,u'continue-numbering'), None): cnv_boolean,
|
||||
@ -1321,7 +1354,7 @@ attrconverters = {
|
||||
((TEXTNS,u'first-row-start-column'), None): cnv_string,
|
||||
((TEXTNS,u'fixed'), None): cnv_boolean,
|
||||
((TEXTNS,u'footnotes-position'), None): cnv_string,
|
||||
((TEXTNS,u'formula'), None): cnv_string,
|
||||
((TEXTNS,u'formula'), None): cnv_formula,
|
||||
((TEXTNS,u'global'), None): cnv_boolean,
|
||||
((TEXTNS,u'howpublished'), None): cnv_string,
|
||||
((TEXTNS,u'id'), None): cnv_ID,
|
||||
@ -1437,7 +1470,10 @@ attrconverters = {
|
||||
|
||||
class AttrConverters:
|
||||
def convert(self, attribute, value, element):
|
||||
conversion = attrconverters.get((attribute,element), None)
|
||||
""" Based on the element, figures out how to check/convert the attribute value
|
||||
All values are converted to string
|
||||
"""
|
||||
conversion = attrconverters.get((attribute, element.qname), None)
|
||||
if conversion is not None:
|
||||
return conversion(attribute, value, element)
|
||||
else:
|
||||
|
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2007-2008 Søren Roug, European Environment Agency
|
||||
# Copyright (C) 2007-2010 Søren Roug, European Environment Agency
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
@ -112,6 +112,9 @@ class Node(xml.dom.Node):
|
||||
return self.childNodes[-1]
|
||||
|
||||
def insertBefore(self, newChild, refChild):
|
||||
""" Inserts the node newChild before the existing child node refChild.
|
||||
If refChild is null, insert newChild at the end of the list of children.
|
||||
"""
|
||||
if newChild.nodeType not in self._child_node_types:
|
||||
raise IllegalChild, "%s cannot be child of %s" % (newChild.tagName, self.tagName)
|
||||
if newChild.parentNode is not None:
|
||||
@ -135,21 +138,26 @@ class Node(xml.dom.Node):
|
||||
newChild.parentNode = self
|
||||
return newChild
|
||||
|
||||
def appendChild(self, node):
|
||||
if node.nodeType == self.DOCUMENT_FRAGMENT_NODE:
|
||||
for c in tuple(node.childNodes):
|
||||
def appendChild(self, newChild):
|
||||
""" Adds the node newChild to the end of the list of children of this node.
|
||||
If the newChild is already in the tree, it is first removed.
|
||||
"""
|
||||
if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
|
||||
for c in tuple(newChild.childNodes):
|
||||
self.appendChild(c)
|
||||
### The DOM does not clearly specify what to return in this case
|
||||
return node
|
||||
if node.nodeType not in self._child_node_types:
|
||||
raise IllegalChild, "<%s> is not allowed in %s" % ( node.tagName, self.tagName)
|
||||
if node.parentNode is not None:
|
||||
node.parentNode.removeChild(node)
|
||||
_append_child(self, node)
|
||||
node.nextSibling = None
|
||||
return node
|
||||
return newChild
|
||||
if newChild.nodeType not in self._child_node_types:
|
||||
raise IllegalChild, "<%s> is not allowed in %s" % ( newChild.tagName, self.tagName)
|
||||
if newChild.parentNode is not None:
|
||||
newChild.parentNode.removeChild(newChild)
|
||||
_append_child(self, newChild)
|
||||
newChild.nextSibling = None
|
||||
return newChild
|
||||
|
||||
def removeChild(self, oldChild):
|
||||
""" Removes the child node indicated by oldChild from the list of children, and returns it.
|
||||
"""
|
||||
#FIXME: update ownerDocument.element_dict or find other solution
|
||||
try:
|
||||
self.childNodes.remove(oldChild)
|
||||
@ -191,8 +199,8 @@ def _append_child(self, node):
|
||||
node.__dict__["parentNode"] = self
|
||||
|
||||
class Childless:
|
||||
"""Mixin that makes childless-ness easy to implement and avoids
|
||||
the complexity of the Node methods that deal with children.
|
||||
""" Mixin that makes childless-ness easy to implement and avoids
|
||||
the complexity of the Node methods that deal with children.
|
||||
"""
|
||||
|
||||
attributes = None
|
||||
@ -207,6 +215,7 @@ class Childless:
|
||||
return None
|
||||
|
||||
def appendChild(self, node):
|
||||
""" Raises an error """
|
||||
raise xml.dom.HierarchyRequestErr(
|
||||
self.tagName + " nodes cannot have children")
|
||||
|
||||
@ -214,14 +223,17 @@ class Childless:
|
||||
return False
|
||||
|
||||
def insertBefore(self, newChild, refChild):
|
||||
""" Raises an error """
|
||||
raise xml.dom.HierarchyRequestErr(
|
||||
self.tagName + " nodes do not have children")
|
||||
|
||||
def removeChild(self, oldChild):
|
||||
""" Raises an error """
|
||||
raise xml.dom.NotFoundErr(
|
||||
self.tagName + " nodes do not have children")
|
||||
|
||||
def replaceChild(self, newChild, oldChild):
|
||||
""" Raises an error """
|
||||
raise xml.dom.HierarchyRequestErr(
|
||||
self.tagName + " nodes do not have children")
|
||||
|
||||
@ -247,8 +259,12 @@ class CDATASection(Childless, Text):
|
||||
nodeType = Node.CDATA_SECTION_NODE
|
||||
|
||||
def toXml(self,level,f):
|
||||
""" Generate XML output of the node. If the text contains "]]>", then
|
||||
escape it by going out of CDATA mode (]]>), then write the string
|
||||
and then go into CDATA mode again. (<![CDATA[)
|
||||
"""
|
||||
if self.data:
|
||||
f.write('<![CDATA[%s]]>' % self.data)
|
||||
f.write('<![CDATA[%s]]>' % self.data.replace(']]>',']]>]]><![CDATA['))
|
||||
|
||||
class Element(Node):
|
||||
""" Creates a arbitrary element and is intended to be subclassed not used on its own.
|
||||
@ -310,7 +326,19 @@ class Element(Node):
|
||||
if self.getAttrNS(r[0],r[1]) is None:
|
||||
raise AttributeError, "Required attribute missing: %s in <%s>" % (r[1].lower().replace('-',''), self.tagName)
|
||||
|
||||
def get_knownns(self, prefix):
|
||||
""" Odfpy maintains a list of known namespaces. In some cases a prefix is used, and
|
||||
we need to know which namespace it resolves to.
|
||||
"""
|
||||
global nsdict
|
||||
for ns,p in nsdict.items():
|
||||
if p == prefix: return ns
|
||||
return None
|
||||
|
||||
def get_nsprefix(self, namespace):
|
||||
""" Odfpy maintains a list of known namespaces. In some cases we have a namespace URL,
|
||||
and needs to look up or assign the prefix for it.
|
||||
"""
|
||||
if namespace is None: namespace = ""
|
||||
prefix = _nsassign(namespace)
|
||||
if not self.namespaces.has_key(namespace):
|
||||
@ -339,6 +367,9 @@ class Element(Node):
|
||||
self.ownerDocument.rebuild_caches(element)
|
||||
|
||||
def addText(self, text, check_grammar=True):
|
||||
""" Adds text to an element
|
||||
Setting check_grammar=False turns off grammar checking
|
||||
"""
|
||||
if check_grammar and self.qname not in grammar.allows_text:
|
||||
raise IllegalText, "The <%s> element does not allow text" % self.tagName
|
||||
else:
|
||||
@ -346,6 +377,9 @@ class Element(Node):
|
||||
self.appendChild(Text(text))
|
||||
|
||||
def addCDATA(self, cdata, check_grammar=True):
|
||||
""" Adds CDATA to an element
|
||||
Setting check_grammar=False turns off grammar checking
|
||||
"""
|
||||
if check_grammar and self.qname not in grammar.allows_text:
|
||||
raise IllegalText, "The <%s> element does not allow text" % self.tagName
|
||||
else:
|
||||
@ -403,17 +437,18 @@ class Element(Node):
|
||||
# if allowed_attrs and (namespace, localpart) not in allowed_attrs:
|
||||
# raise AttributeError, "Attribute %s:%s is not allowed in element <%s>" % ( prefix, localpart, self.tagName)
|
||||
c = AttrConverters()
|
||||
self.attributes[prefix + ":" + localpart] = c.convert((namespace, localpart), value, self.qname)
|
||||
self.attributes[(namespace, localpart)] = c.convert((namespace, localpart), value, self)
|
||||
|
||||
def getAttrNS(self, namespace, localpart):
|
||||
prefix = self.get_nsprefix(namespace)
|
||||
return self.attributes.get(prefix + ":" + localpart)
|
||||
return self.attributes.get((namespace, localpart))
|
||||
|
||||
def removeAttrNS(self, namespace, localpart):
|
||||
prefix = self.get_nsprefix(namespace)
|
||||
del self.attributes[prefix + ":" + localpart]
|
||||
del self.attributes[(namespace, localpart)]
|
||||
|
||||
def getAttribute(self, attr):
|
||||
""" Get an attribute value. The method knows which namespace the attribute is in
|
||||
"""
|
||||
allowed_attrs = self.allowed_attributes()
|
||||
if allowed_attrs is None:
|
||||
if type(attr) == type(()):
|
||||
@ -432,8 +467,9 @@ class Element(Node):
|
||||
if level == 0:
|
||||
for namespace, prefix in self.namespaces.items():
|
||||
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
|
||||
for attkey in self.attributes.keys():
|
||||
f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8')))
|
||||
for qname in self.attributes.keys():
|
||||
prefix = self.get_nsprefix(qname[0])
|
||||
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
|
||||
f.write('>')
|
||||
|
||||
def write_close_tag(self, level, f):
|
||||
@ -445,8 +481,9 @@ class Element(Node):
|
||||
if level == 0:
|
||||
for namespace, prefix in self.namespaces.items():
|
||||
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
|
||||
for attkey in self.attributes.keys():
|
||||
f.write(' '+_escape(str(attkey))+'='+_quoteattr(unicode(self.attributes[attkey]).encode('utf-8')))
|
||||
for qname in self.attributes.keys():
|
||||
prefix = self.get_nsprefix(qname[0])
|
||||
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(unicode(self.attributes[qname]).encode('utf-8')))
|
||||
if self.childNodes:
|
||||
f.write('>')
|
||||
for element in self.childNodes:
|
||||
@ -464,6 +501,7 @@ class Element(Node):
|
||||
return accumulator
|
||||
|
||||
def getElementsByType(self, element):
|
||||
""" Gets elements based on the type, which is function from text.py, draw.py etc. """
|
||||
obj = element(check_grammar=False)
|
||||
return self._getElementsByObj(obj,[])
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
|
||||
# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
|
@ -63,8 +63,8 @@ class LoadParser(handler.ContentHandler):
|
||||
|
||||
self.level = self.level + 1
|
||||
# Add any accumulated text content
|
||||
content = ''.join(self.data).strip()
|
||||
if len(content) > 0:
|
||||
content = ''.join(self.data)
|
||||
if len(content.strip()) > 0:
|
||||
self.parent.addText(content, check_grammar=False)
|
||||
self.data = []
|
||||
# Create the element
|
||||
|
@ -1,5 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
|
||||
# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
@ -17,7 +17,7 @@
|
||||
#
|
||||
# Contributor(s):
|
||||
#
|
||||
TOOLSVERSION = u"ODFPY/0.9.2dev"
|
||||
TOOLSVERSION = u"ODFPY/0.9.4dev"
|
||||
|
||||
ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"
|
||||
DBNS = u"urn:oasis:names:tc:opendocument:xmlns:database:1.0"
|
||||
@ -28,19 +28,23 @@ DCNS = u"http://purl.org/dc/elements/1.1/"
|
||||
DOMNS = u"http://www.w3.org/2001/xml-events"
|
||||
DR3DNS = u"urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
|
||||
DRAWNS = u"urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
|
||||
FIELDNS = u"urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0"
|
||||
FONS = u"urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"
|
||||
FORMNS = u"urn:oasis:names:tc:opendocument:xmlns:form:1.0"
|
||||
GRDDLNS = u"http://www.w3.org/2003/g/data-view#"
|
||||
KOFFICENS = u"http://www.koffice.org/2005/"
|
||||
MANIFESTNS = u"urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"
|
||||
MATHNS = u"http://www.w3.org/1998/Math/MathML"
|
||||
METANS = u"urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
|
||||
NUMBERNS = u"urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"
|
||||
OFFICENS = u"urn:oasis:names:tc:opendocument:xmlns:office:1.0"
|
||||
OFNS = u"urn:oasis:names:tc:opendocument:xmlns:of:1.2"
|
||||
OOONS = u"http://openoffice.org/2004/office"
|
||||
OOOWNS = u"http://openoffice.org/2004/writer"
|
||||
OOOCNS = u"http://openoffice.org/2004/calc"
|
||||
PRESENTATIONNS = u"urn:oasis:names:tc:opendocument:xmlns:presentation:1.0"
|
||||
RDFANS = u"http://docs.oasis-open.org/opendocument/meta/rdfa#"
|
||||
RPTNS = u"http://openoffice.org/2005/report"
|
||||
SCRIPTNS = u"urn:oasis:names:tc:opendocument:xmlns:script:1.0"
|
||||
SMILNS = u"urn:oasis:names:tc:opendocument:xmlns:smil-compatible:1.0"
|
||||
STYLENS = u"urn:oasis:names:tc:opendocument:xmlns:style:1.0"
|
||||
@ -50,7 +54,8 @@ TEXTNS = u"urn:oasis:names:tc:opendocument:xmlns:text:1.0"
|
||||
XFORMSNS = u"http://www.w3.org/2002/xforms"
|
||||
XLINKNS = u"http://www.w3.org/1999/xlink"
|
||||
XMLNS = u"http://www.w3.org/XML/1998/namespace"
|
||||
|
||||
XSDNS = u"http://www.w3.org/2001/XMLSchema"
|
||||
XSINS = u"http://www.w3.org/2001/XMLSchema-instance"
|
||||
|
||||
nsdict = {
|
||||
ANIMNS: u'anim',
|
||||
@ -61,19 +66,23 @@ nsdict = {
|
||||
DOMNS: u'dom',
|
||||
DR3DNS: u'dr3d',
|
||||
DRAWNS: u'draw',
|
||||
FIELDNS: u'field',
|
||||
FONS: u'fo',
|
||||
FORMNS: u'form',
|
||||
GRDDLNS: u'grddl',
|
||||
KOFFICENS: u'koffice',
|
||||
MANIFESTNS: u'manifest',
|
||||
MATHNS: u'math',
|
||||
METANS: u'meta',
|
||||
NUMBERNS: u'number',
|
||||
OFFICENS: u'office',
|
||||
OFNS: u'of',
|
||||
OOONS: u'ooo',
|
||||
OOOWNS: u'ooow',
|
||||
OOOCNS: u'oooc',
|
||||
PRESENTATIONNS: u'presentation',
|
||||
RDFANS: u'rdfa',
|
||||
RPTNS: u'rpt',
|
||||
SCRIPTNS: u'script',
|
||||
SMILNS: u'smil',
|
||||
STYLENS: u'style',
|
||||
@ -83,4 +92,6 @@ nsdict = {
|
||||
XFORMSNS: u'xforms',
|
||||
XLINKNS: u'xlink',
|
||||
XMLNS: u'xml',
|
||||
XSDNS: u'xsd',
|
||||
XSINS: u'xsi',
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2006-2007 Søren Roug, European Environment Agency
|
||||
# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
@ -20,15 +20,18 @@
|
||||
#
|
||||
#import pdb
|
||||
#pdb.set_trace()
|
||||
import zipfile
|
||||
from xml.sax import handler, expatreader
|
||||
from xml.sax.xmlreader import InputSource
|
||||
from xml.sax import handler
|
||||
from xml.sax.saxutils import escape, quoteattr
|
||||
from cStringIO import StringIO
|
||||
from xml.dom import Node
|
||||
|
||||
from namespaces import DCNS, DRAWNS, FONS, \
|
||||
METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, \
|
||||
STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS
|
||||
from opendocument import load
|
||||
|
||||
from namespaces import ANIMNS, CHARTNS, CONFIGNS, DCNS, DR3DNS, DRAWNS, FONS, \
|
||||
FORMNS, MATHNS, METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, SCRIPTNS, \
|
||||
SMILNS, STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS
|
||||
|
||||
if False: # Added by Kovid
|
||||
DR3DNS, MATHNS, CHARTNS, CONFIGNS, ANIMNS, FORMNS, SMILNS, SCRIPTNS
|
||||
|
||||
# Handling of styles
|
||||
#
|
||||
@ -72,8 +75,8 @@ class StyleToCSS:
|
||||
(FONS,u"border-left"): self.c_fo,
|
||||
(FONS,u"border-right"): self.c_fo,
|
||||
(FONS,u"border-top"): self.c_fo,
|
||||
(FONS,u"break-after"): self.c_break,
|
||||
(FONS,u"break-before"): self.c_break,
|
||||
(FONS,u"break-after"): self.c_break, # Added by Kovid
|
||||
(FONS,u"break-before"): self.c_break,# Added by Kovid
|
||||
(FONS,u"color"): self.c_fo,
|
||||
(FONS,u"font-family"): self.c_fo,
|
||||
(FONS,u"font-size"): self.c_fo,
|
||||
@ -136,7 +139,7 @@ class StyleToCSS:
|
||||
selector = rule[1]
|
||||
sdict[selector] = val
|
||||
|
||||
def c_break(self, ruleset, sdict, rule, val):
|
||||
def c_break(self, ruleset, sdict, rule, val): # Added by Kovid
|
||||
property = 'page-' + rule[1]
|
||||
values = {'auto': 'auto', 'column': 'always', 'page': 'always',
|
||||
'even-page': 'left', 'odd-page': 'right',
|
||||
@ -346,13 +349,16 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
self.elements = {
|
||||
(DCNS, 'title'): (self.s_processcont, self.e_dc_title),
|
||||
(DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage),
|
||||
(DCNS, 'creator'): (self.s_processcont, self.e_dc_metatag),
|
||||
(DCNS, 'creator'): (self.s_processcont, self.e_dc_creator),
|
||||
(DCNS, 'description'): (self.s_processcont, self.e_dc_metatag),
|
||||
(DCNS, 'date'): (self.s_processcont, self.e_dc_metatag),
|
||||
(DRAWNS, 'custom-shape'): (self.s_custom_shape, self.e_custom_shape),
|
||||
(DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame),
|
||||
(DRAWNS, 'image'): (self.s_draw_image, None),
|
||||
(DRAWNS, 'fill-image'): (self.s_draw_fill_image, None),
|
||||
(DRAWNS, "layer-set"):(self.s_ignorexml, None),
|
||||
(DRAWNS, 'object'): (self.s_draw_object, None),
|
||||
(DRAWNS, 'object-ole'): (self.s_draw_object_ole, None),
|
||||
(DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page),
|
||||
(DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox),
|
||||
(METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag),
|
||||
@ -364,7 +370,9 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
(NUMBERNS, "date-style"):(self.s_ignorexml, None),
|
||||
(NUMBERNS, "number-style"):(self.s_ignorexml, None),
|
||||
(NUMBERNS, "text-style"):(self.s_ignorexml, None),
|
||||
(OFFICENS, "annotation"):(self.s_ignorexml, None),
|
||||
(OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None),
|
||||
(OFFICENS, "document"):(self.s_office_document_content, self.e_office_document_content),
|
||||
(OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content),
|
||||
(OFFICENS, "forms"):(self.s_ignorexml, None),
|
||||
(OFFICENS, "master-styles"):(self.s_office_master_styles, None),
|
||||
@ -374,6 +382,7 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
(OFFICENS, "styles"):(self.s_office_styles, None),
|
||||
(OFFICENS, "text"):(self.s_office_text, self.e_office_text),
|
||||
(OFFICENS, "scripts"):(self.s_ignorexml, None),
|
||||
(OFFICENS, "settings"):(self.s_ignorexml, None),
|
||||
(PRESENTATIONNS, "notes"):(self.s_ignorexml, None),
|
||||
# (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout),
|
||||
(STYLENS, "default-page-layout"):(self.s_ignorexml, None),
|
||||
@ -389,8 +398,8 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
# (STYLENS, "header-style"):(self.s_style_header_style, None),
|
||||
(STYLENS, "master-page"):(self.s_style_master_page, None),
|
||||
(STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None),
|
||||
# (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
|
||||
(STYLENS, "page-layout"):(self.s_ignorexml, None),
|
||||
(STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
|
||||
# (STYLENS, "page-layout"):(self.s_ignorexml, None),
|
||||
(STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None),
|
||||
(STYLENS, "style"):(self.s_style_style, self.e_style_style),
|
||||
(STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None),
|
||||
@ -407,6 +416,10 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
(TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source),
|
||||
(TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None),
|
||||
(TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source),
|
||||
(TEXTNS, 'bookmark'): (self.s_text_bookmark, None),
|
||||
(TEXTNS, 'bookmark-start'): (self.s_text_bookmark, None),
|
||||
(TEXTNS, 'bookmark-ref'): (self.s_text_bookmark_ref, self.e_text_a),
|
||||
(TEXTNS, 'bookmark-ref-start'): (self.s_text_bookmark_ref, None),
|
||||
(TEXTNS, 'h'): (self.s_text_h, self.e_text_h),
|
||||
(TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source),
|
||||
(TEXTNS, 'line-break'):(self.s_text_line_break, None),
|
||||
@ -430,10 +443,66 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
(TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
|
||||
}
|
||||
if embedable:
|
||||
self.elements[(OFFICENS, u"text")] = (None,None)
|
||||
self.elements[(OFFICENS, u"spreadsheet")] = (None,None)
|
||||
self.elements[(OFFICENS, u"presentation")] = (None,None)
|
||||
self.elements[(OFFICENS, u"document-content")] = (None,None)
|
||||
self.make_embedable()
|
||||
self._resetobject()
|
||||
|
||||
def set_plain(self):
|
||||
""" Tell the parser to not generate CSS """
|
||||
self.generate_css = False
|
||||
|
||||
def set_embedable(self):
|
||||
""" Tells the converter to only output the parts inside the <body>"""
|
||||
self.elements[(OFFICENS, u"text")] = (None,None)
|
||||
self.elements[(OFFICENS, u"spreadsheet")] = (None,None)
|
||||
self.elements[(OFFICENS, u"presentation")] = (None,None)
|
||||
self.elements[(OFFICENS, u"document-content")] = (None,None)
|
||||
|
||||
|
||||
def add_style_file(self, stylefilename, media=None):
|
||||
""" Add a link to an external style file.
|
||||
Also turns of the embedding of styles in the HTML
|
||||
"""
|
||||
self.use_internal_css = False
|
||||
self.stylefilename = stylefilename
|
||||
if media:
|
||||
self.metatags.append('<link rel="stylesheet" type="text/css" href="%s" media="%s"/>\n' % (stylefilename,media))
|
||||
else:
|
||||
self.metatags.append('<link rel="stylesheet" type="text/css" href="%s"/>\n' % (stylefilename))
|
||||
|
||||
def _resetfootnotes(self):
|
||||
# Footnotes and endnotes
|
||||
self.notedict = {}
|
||||
self.currentnote = 0
|
||||
self.notebody = ''
|
||||
|
||||
def _resetobject(self):
|
||||
self.lines = []
|
||||
self._wfunc = self._wlines
|
||||
self.xmlfile = ''
|
||||
self.title = ''
|
||||
self.language = ''
|
||||
self.creator = ''
|
||||
self.data = []
|
||||
self.tagstack = TagStack()
|
||||
self.htmlstack = []
|
||||
self.pstack = []
|
||||
self.processelem = True
|
||||
self.processcont = True
|
||||
self.listtypes = {}
|
||||
self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10
|
||||
self.use_internal_css = True
|
||||
self.cs = StyleToCSS()
|
||||
self.anchors = {}
|
||||
|
||||
# Style declarations
|
||||
self.stylestack = []
|
||||
self.styledict = {}
|
||||
self.currentstyle = None
|
||||
|
||||
self._resetfootnotes()
|
||||
|
||||
# Tags from meta.xml
|
||||
self.metatags = []
|
||||
|
||||
|
||||
def writeout(self, s):
|
||||
@ -447,6 +516,7 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
|
||||
def opentag(self, tag, attrs={}, block=False):
|
||||
""" Create an open HTML tag """
|
||||
self.htmlstack.append((tag,attrs,block))
|
||||
a = []
|
||||
for key,val in attrs.items():
|
||||
a.append('''%s=%s''' % (key, quoteattr(val)))
|
||||
@ -458,6 +528,8 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
self.writeout("\n")
|
||||
|
||||
def closetag(self, tag, block=True):
|
||||
""" Close an open HTML tag """
|
||||
self.htmlstack.pop()
|
||||
self.writeout("</%s>" % tag)
|
||||
if block == True:
|
||||
self.writeout("\n")
|
||||
@ -468,17 +540,13 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
a.append('''%s=%s''' % (key, quoteattr(val)))
|
||||
self.writeout("<%s %s/>\n" % (tag, " ".join(a)))
|
||||
|
||||
#--------------------------------------------------
|
||||
# Interface to parser
|
||||
#--------------------------------------------------
|
||||
def characters(self, data):
|
||||
if self.processelem and self.processcont:
|
||||
self.data.append(data)
|
||||
|
||||
def handle_starttag(self, tag, method, attrs):
|
||||
method(tag,attrs)
|
||||
|
||||
def handle_endtag(self, tag, attrs, method):
|
||||
method(tag, attrs)
|
||||
|
||||
def startElementNS(self, tag, qname, attrs):
|
||||
self.pstack.append( (self.processelem, self.processcont) )
|
||||
if self.processelem:
|
||||
@ -499,6 +567,13 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
self.unknown_endtag(tag, attrs)
|
||||
self.processelem, self.processcont = self.pstack.pop()
|
||||
|
||||
#--------------------------------------------------
|
||||
def handle_starttag(self, tag, method, attrs):
|
||||
method(tag,attrs)
|
||||
|
||||
def handle_endtag(self, tag, attrs, method):
|
||||
method(tag, attrs)
|
||||
|
||||
def unknown_starttag(self, tag, attrs):
|
||||
pass
|
||||
|
||||
@ -512,18 +587,21 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
self.processelem = False
|
||||
|
||||
def s_ignorecont(self, tag, attrs):
|
||||
""" Stop processing the text nodes """
|
||||
self.processcont = False
|
||||
|
||||
def s_processcont(self, tag, attrs):
|
||||
""" Start processing the text nodes """
|
||||
self.processcont = True
|
||||
|
||||
def classname(self, attrs):
|
||||
""" Generate a class name from a style name """
|
||||
c = attrs[(TEXTNS,'style-name')]
|
||||
c = attrs.get((TEXTNS,'style-name'),'')
|
||||
c = c.replace(".","_")
|
||||
return c
|
||||
|
||||
def get_anchor(self, name):
|
||||
""" Create a unique anchor id for a href name """
|
||||
if not self.anchors.has_key(name):
|
||||
# Changed by Kovid
|
||||
self.anchors[name] = "anchor%d" % (len(self.anchors) + 1)
|
||||
@ -543,8 +621,8 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
def e_dc_title(self, tag, attrs):
|
||||
""" Get the title from the meta data and create a HTML <title>
|
||||
"""
|
||||
self.metatags.append('<title>%s</title>\n' % escape(''.join(self.data)))
|
||||
self.title = ''.join(self.data)
|
||||
#self.metatags.append('<title>%s</title>\n' % escape(self.title))
|
||||
self.data = []
|
||||
|
||||
def e_dc_metatag(self, tag, attrs):
|
||||
@ -556,13 +634,57 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
def e_dc_contentlanguage(self, tag, attrs):
|
||||
""" Set the content language. Identifies the targeted audience
|
||||
"""
|
||||
self.metatags.append('<meta http-equiv="content-language" content="%s"/>\n' % ''.join(self.data))
|
||||
self.language = ''.join(self.data)
|
||||
self.metatags.append('<meta http-equiv="content-language" content="%s"/>\n' % escape(self.language))
|
||||
self.data = []
|
||||
|
||||
def e_dc_creator(self, tag, attrs):
|
||||
""" Set the content creator. Identifies the targeted audience
|
||||
"""
|
||||
self.creator = ''.join(self.data)
|
||||
self.metatags.append('<meta http-equiv="creator" content="%s"/>\n' % escape(self.creator))
|
||||
self.data = []
|
||||
|
||||
def s_custom_shape(self, tag, attrs):
|
||||
""" A <draw:custom-shape> is made into a <div> in HTML which is then styled
|
||||
"""
|
||||
anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound')
|
||||
htmltag = 'div'
|
||||
name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
|
||||
if name == 'G-':
|
||||
name = "PR-" + attrs.get( (PRESENTATIONNS,'style-name'), "")
|
||||
name = name.replace(".","_")
|
||||
if anchor_type == "paragraph":
|
||||
style = 'position:absolute;'
|
||||
elif anchor_type == 'char':
|
||||
style = "position:absolute;"
|
||||
elif anchor_type == 'as-char':
|
||||
htmltag = 'div'
|
||||
style = ''
|
||||
else:
|
||||
style = "position: absolute;"
|
||||
if attrs.has_key( (SVGNS,"width") ):
|
||||
style = style + "width:" + attrs[(SVGNS,"width")] + ";"
|
||||
if attrs.has_key( (SVGNS,"height") ):
|
||||
style = style + "height:" + attrs[(SVGNS,"height")] + ";"
|
||||
if attrs.has_key( (SVGNS,"x") ):
|
||||
style = style + "left:" + attrs[(SVGNS,"x")] + ";"
|
||||
if attrs.has_key( (SVGNS,"y") ):
|
||||
style = style + "top:" + attrs[(SVGNS,"y")] + ";"
|
||||
if self.generate_css:
|
||||
self.opentag(htmltag, {'class': name, 'style': style})
|
||||
else:
|
||||
self.opentag(htmltag)
|
||||
|
||||
def e_custom_shape(self, tag, attrs):
|
||||
""" End the <draw:frame>
|
||||
"""
|
||||
self.closetag('div')
|
||||
|
||||
def s_draw_frame(self, tag, attrs):
|
||||
""" A <draw:frame> is made into a <div> in HTML which is then styled
|
||||
"""
|
||||
anchor_type = attrs.get((TEXTNS,'anchor-type'),'char')
|
||||
anchor_type = attrs.get((TEXTNS,'anchor-type'),'notfound')
|
||||
htmltag = 'div'
|
||||
name = "G-" + attrs.get( (DRAWNS,'style-name'), "")
|
||||
if name == 'G-':
|
||||
@ -576,7 +698,7 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
htmltag = 'div'
|
||||
style = ''
|
||||
else:
|
||||
style = "position: absolute;"
|
||||
style = "position:absolute;"
|
||||
if attrs.has_key( (SVGNS,"width") ):
|
||||
style = style + "width:" + attrs[(SVGNS,"width")] + ";"
|
||||
if attrs.has_key( (SVGNS,"height") ):
|
||||
@ -620,6 +742,30 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
htmlattrs['style'] = "display: block;"
|
||||
self.emptytag('img', htmlattrs)
|
||||
|
||||
def s_draw_object(self, tag, attrs):
|
||||
""" A <draw:object> is embedded object in the document (e.g. spreadsheet in presentation).
|
||||
"""
|
||||
return # Added by Kovid
|
||||
objhref = attrs[(XLINKNS,"href")]
|
||||
# Remove leading "./": from "./Object 1" to "Object 1"
|
||||
# objhref = objhref [2:]
|
||||
|
||||
# Not using os.path.join since it fails to find the file on Windows.
|
||||
# objcontentpath = '/'.join([objhref, 'content.xml'])
|
||||
|
||||
for c in self.document.childnodes:
|
||||
if c.folder == objhref:
|
||||
self._walknode(c.topnode)
|
||||
|
||||
def s_draw_object_ole(self, tag, attrs):
|
||||
""" A <draw:object-ole> is embedded OLE object in the document (e.g. MS Graph).
|
||||
"""
|
||||
class_id = attrs[(DRAWNS,"class-id")]
|
||||
if class_id and class_id.lower() == "00020803-0000-0000-c000-000000000046": ## Microsoft Graph 97 Chart
|
||||
tagattrs = { 'name':'object_ole_graph', 'class':'ole-graph' }
|
||||
self.opentag('a', tagattrs)
|
||||
self.closetag('a', tagattrs)
|
||||
|
||||
def s_draw_page(self, tag, attrs):
|
||||
""" A <draw:page> is a slide in a presentation. We use a <fieldset> element in HTML.
|
||||
Therefore if you convert a ODP file, you get a series of <fieldset>s.
|
||||
@ -655,14 +801,9 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
|
||||
def html_body(self, tag, attrs):
|
||||
self.writedata()
|
||||
if self.generate_css:
|
||||
if self.generate_css and self.use_internal_css:
|
||||
self.opentag('style', {'type':"text/css"}, True)
|
||||
self.writeout('/*<![CDATA[*/\n')
|
||||
self.writeout('\nimg { width: 100%; height: 100%; }\n')
|
||||
# background-color: white removed by Kovid for #9118
|
||||
self.writeout('* { padding: 0; margin: 0; }\n')
|
||||
self.writeout('body { margin: 0 1em; }\n')
|
||||
self.writeout('ol, ul { padding-left: 2em; }\n')
|
||||
self.generate_stylesheet()
|
||||
self.writeout('/*]]>*/\n')
|
||||
self.closetag('style')
|
||||
@ -670,6 +811,16 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
self.closetag('head')
|
||||
self.opentag('body', block=True)
|
||||
|
||||
# background-color: white removed by Kovid for #9118
|
||||
# Specifying an explicit bg color prevents ebook readers
|
||||
# from successfully inverting colors
|
||||
default_styles = """
|
||||
img { width: 100%; height: 100%; }
|
||||
* { padding: 0; margin: 0; }
|
||||
body { margin: 0 1em; }
|
||||
ol, ul { padding-left: 2em; }
|
||||
"""
|
||||
|
||||
def generate_stylesheet(self):
|
||||
for name in self.stylestack:
|
||||
styles = self.styledict.get(name)
|
||||
@ -689,6 +840,7 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
styles = parentstyle
|
||||
self.styledict[name] = styles
|
||||
# Write the styles to HTML
|
||||
self.writeout(self.default_styles)
|
||||
for name in self.stylestack:
|
||||
styles = self.styledict.get(name)
|
||||
css2 = self.cs.convert_styles(styles)
|
||||
@ -730,6 +882,7 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
self.emptytag('meta', { 'http-equiv':"Content-Type", 'content':"text/html;charset=UTF-8"})
|
||||
for metaline in self.metatags:
|
||||
self.writeout(metaline)
|
||||
self.writeout('<title>%s</title>\n' % escape(self.title))
|
||||
|
||||
def e_office_document_content(self, tag, attrs):
|
||||
""" Last tag """
|
||||
@ -774,7 +927,7 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
""" Copy all attributes to a struct.
|
||||
We will later convert them to CSS2
|
||||
"""
|
||||
if self.currentstyle is None:
|
||||
if self.currentstyle is None: # Added by Kovid
|
||||
return
|
||||
for key,attr in attrs.items():
|
||||
self.styledict[self.currentstyle][key] = attr
|
||||
@ -800,7 +953,7 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
def s_style_font_face(self, tag, attrs):
|
||||
""" It is possible that the HTML browser doesn't know how to
|
||||
show a particular font. Luckily ODF provides generic fallbacks
|
||||
Unluckily they are not the same as CSS2.
|
||||
Unfortunately they are not the same as CSS2.
|
||||
CSS2: serif, sans-serif, cursive, fantasy, monospace
|
||||
ODF: roman, swiss, modern, decorative, script, system
|
||||
"""
|
||||
@ -851,7 +1004,7 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
"""
|
||||
name = attrs[(STYLENS,'name')]
|
||||
name = name.replace(".","_")
|
||||
self.currentstyle = "@page " + name
|
||||
self.currentstyle = ".PL-" + name
|
||||
self.stylestack.append(self.currentstyle)
|
||||
self.styledict[self.currentstyle] = {}
|
||||
|
||||
@ -882,7 +1035,7 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
self.s_ignorexml(tag, attrs)
|
||||
|
||||
# Short prefixes for class selectors
|
||||
familyshort = {'drawing-page':'DP', 'paragraph':'P', 'presentation':'PR',
|
||||
_familyshort = {'drawing-page':'DP', 'paragraph':'P', 'presentation':'PR',
|
||||
'text':'S', 'section':'D',
|
||||
'table':'T', 'table-cell':'TD', 'table-column':'TC',
|
||||
'table-row':'TR', 'graphic':'G' }
|
||||
@ -898,7 +1051,7 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
name = name.replace(".","_")
|
||||
family = attrs[(STYLENS,'family')]
|
||||
htmlfamily = self.familymap.get(family,'unknown')
|
||||
sfamily = self.familyshort.get(family,'X')
|
||||
sfamily = self._familyshort.get(family,'X')
|
||||
name = "%s%s-%s" % (self.autoprefix, sfamily, name)
|
||||
parent = attrs.get( (STYLENS,'parent-style-name') )
|
||||
self.currentstyle = special_styles.get(name,"."+name)
|
||||
@ -943,6 +1096,7 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
self.purgedata()
|
||||
|
||||
def s_table_table_cell(self, tag, attrs):
|
||||
""" Start a table cell """
|
||||
#FIXME: number-columns-repeated § 8.1.3
|
||||
#repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1))
|
||||
htmlattrs = {}
|
||||
@ -960,11 +1114,13 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
self.purgedata()
|
||||
|
||||
def e_table_table_cell(self, tag, attrs):
|
||||
""" End a table cell """
|
||||
self.writedata()
|
||||
self.closetag('td')
|
||||
self.purgedata()
|
||||
|
||||
def s_table_table_column(self, tag, attrs):
|
||||
""" Start a table column """
|
||||
c = attrs.get( (TABLENS,'style-name'), None)
|
||||
repeated = int(attrs.get( (TABLENS,'number-columns-repeated'), 1))
|
||||
htmlattrs = {}
|
||||
@ -975,6 +1131,7 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
self.purgedata()
|
||||
|
||||
def s_table_table_row(self, tag, attrs):
|
||||
""" Start a table row """
|
||||
#FIXME: table:number-rows-repeated
|
||||
c = attrs.get( (TABLENS,'style-name'), None)
|
||||
htmlattrs = {}
|
||||
@ -984,6 +1141,7 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
self.purgedata()
|
||||
|
||||
def e_table_table_row(self, tag, attrs):
|
||||
""" End a table row """
|
||||
self.writedata()
|
||||
self.closetag('tr')
|
||||
self.purgedata()
|
||||
@ -998,10 +1156,28 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
self.purgedata()
|
||||
|
||||
def e_text_a(self, tag, attrs):
|
||||
""" End an anchor or bookmark reference """
|
||||
self.writedata()
|
||||
self.closetag('a', False)
|
||||
self.purgedata()
|
||||
|
||||
def s_text_bookmark(self, tag, attrs):
|
||||
""" Bookmark definition """
|
||||
name = attrs[(TEXTNS,'name')]
|
||||
html_id = self.get_anchor(name)
|
||||
self.writedata()
|
||||
self.opentag('span', {'id':html_id})
|
||||
self.closetag('span', False)
|
||||
self.purgedata()
|
||||
|
||||
def s_text_bookmark_ref(self, tag, attrs):
|
||||
""" Bookmark reference """
|
||||
name = attrs[(TEXTNS,'ref-name')]
|
||||
html_id = "#" + self.get_anchor(name)
|
||||
self.writedata()
|
||||
self.opentag('a', {'href':html_id})
|
||||
self.purgedata()
|
||||
|
||||
def s_text_h(self, tag, attrs):
|
||||
""" Headings start """
|
||||
level = int(attrs[(TEXTNS,'outline-level')])
|
||||
@ -1019,13 +1195,19 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
self.purgedata()
|
||||
|
||||
def e_text_h(self, tag, attrs):
|
||||
""" Headings end """
|
||||
""" Headings end
|
||||
Side-effect: If there is no title in the metadata, then it is taken
|
||||
from the first heading of any level.
|
||||
"""
|
||||
self.writedata()
|
||||
level = int(attrs[(TEXTNS,'outline-level')])
|
||||
if level > 6: level = 6 # Heading levels go only to 6 in XHTML
|
||||
if level < 1: level = 1
|
||||
lev = self.headinglevels[1:level+1]
|
||||
outline = '.'.join(map(str,lev) )
|
||||
heading = ''.join(self.data)
|
||||
if self.title == '': self.title = heading
|
||||
# Changed by Kovid
|
||||
tail = ''.join(self.data)
|
||||
anchor = self.get_anchor("%s.%s" % ( outline, tail))
|
||||
anchor2 = self.get_anchor(tail) # Added by kovid to fix #7506
|
||||
@ -1037,12 +1219,14 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
self.purgedata()
|
||||
|
||||
def s_text_line_break(self, tag, attrs):
|
||||
""" Force a line break (<br/>) """
|
||||
self.writedata()
|
||||
self.emptytag('br')
|
||||
self.purgedata()
|
||||
|
||||
def s_text_list(self, tag, attrs):
|
||||
""" To know which level we're at, we have to count the number
|
||||
""" Start a list (<ul> or <ol>)
|
||||
To know which level we're at, we have to count the number
|
||||
of <text:list> elements on the tagstack.
|
||||
"""
|
||||
name = attrs.get( (TEXTNS,'style-name') )
|
||||
@ -1056,12 +1240,13 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
|
||||
list_class = "%s_%d" % (name, level)
|
||||
if self.generate_css:
|
||||
self.opentag('%s' % self.listtypes.get(list_class,'UL'), {'class': list_class })
|
||||
self.opentag('%s' % self.listtypes.get(list_class,'ul'), {'class': list_class })
|
||||
else:
|
||||
self.opentag('%s' % self.listtypes.get(list_class,'UL'))
|
||||
self.opentag('%s' % self.listtypes.get(list_class,'ul'))
|
||||
self.purgedata()
|
||||
|
||||
def e_text_list(self, tag, attrs):
|
||||
""" End a list """
|
||||
self.writedata()
|
||||
name = attrs.get( (TEXTNS,'style-name') )
|
||||
level = self.tagstack.count_tags(tag) + 1
|
||||
@ -1073,14 +1258,16 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
# textbox itself may be nested within another list.
|
||||
name = self.tagstack.rfindattr( (TEXTNS,'style-name') )
|
||||
list_class = "%s_%d" % (name, level)
|
||||
self.closetag(self.listtypes.get(list_class,'UL'))
|
||||
self.closetag(self.listtypes.get(list_class,'ul'))
|
||||
self.purgedata()
|
||||
|
||||
def s_text_list_item(self, tag, attrs):
|
||||
""" Start list item """
|
||||
self.opentag('li')
|
||||
self.purgedata()
|
||||
|
||||
def e_text_list_item(self, tag, attrs):
|
||||
""" End list item """
|
||||
self.writedata()
|
||||
self.closetag('li')
|
||||
self.purgedata()
|
||||
@ -1192,7 +1379,7 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
if specialtag is None:
|
||||
specialtag = 'p'
|
||||
self.writedata()
|
||||
if not self.data:
|
||||
if not self.data: # Added by Kovid
|
||||
# Give substance to empty paragraphs, as rendered by OOo
|
||||
self.writeout(' ')
|
||||
self.closetag(specialtag)
|
||||
@ -1255,55 +1442,30 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
def load(self, odffile):
|
||||
self._odffile = odffile
|
||||
""" Loads a document into the parser and parses it.
|
||||
The argument can either be a filename or a document in memory.
|
||||
"""
|
||||
self.lines = []
|
||||
self._wfunc = self._wlines
|
||||
if isinstance(odffile, basestring) \
|
||||
or hasattr(odffile, 'read'): # Added by Kovid
|
||||
self.document = load(odffile)
|
||||
else:
|
||||
self.document = odffile
|
||||
self._walknode(self.document.topnode)
|
||||
|
||||
def parseodf(self):
|
||||
self.xmlfile = ''
|
||||
self.title = ''
|
||||
self.data = []
|
||||
self.tagstack = TagStack()
|
||||
self.pstack = []
|
||||
self.processelem = True
|
||||
self.processcont = True
|
||||
self.listtypes = {}
|
||||
self.headinglevels = [0, 0,0,0,0,0, 0,0,0,0,0] # level 0 to 10
|
||||
self.cs = StyleToCSS()
|
||||
self.anchors = {}
|
||||
def _walknode(self, node):
|
||||
if node.nodeType == Node.ELEMENT_NODE:
|
||||
self.startElementNS(node.qname, node.tagName, node.attributes)
|
||||
for c in node.childNodes:
|
||||
self._walknode(c)
|
||||
self.endElementNS(node.qname, node.tagName)
|
||||
if node.nodeType == Node.TEXT_NODE or node.nodeType == Node.CDATA_SECTION_NODE:
|
||||
self.characters(unicode(node))
|
||||
|
||||
# Style declarations
|
||||
self.stylestack = []
|
||||
self.styledict = {}
|
||||
self.currentstyle = None
|
||||
|
||||
# Footnotes and endnotes
|
||||
self.notedict = {}
|
||||
self.currentnote = 0
|
||||
self.notebody = ''
|
||||
|
||||
# Tags from meta.xml
|
||||
self.metatags = []
|
||||
|
||||
# Extract the interesting files
|
||||
z = zipfile.ZipFile(self._odffile)
|
||||
|
||||
# For some reason Trac has trouble when xml.sax.make_parser() is used.
|
||||
# Could it be because PyXML is installed, and therefore a different parser
|
||||
# might be chosen? By calling expatreader directly we avoid this issue
|
||||
parser = expatreader.create_parser()
|
||||
parser.setFeature(handler.feature_namespaces, 1)
|
||||
parser.setContentHandler(self)
|
||||
parser.setErrorHandler(handler.ErrorHandler())
|
||||
inpsrc = InputSource()
|
||||
|
||||
for xmlfile in ('meta.xml', 'styles.xml', 'content.xml'):
|
||||
self.xmlfile = xmlfile
|
||||
content = z.read(xmlfile)
|
||||
inpsrc.setByteStream(StringIO(content))
|
||||
parser.parse(inpsrc)
|
||||
z.close()
|
||||
|
||||
def odf2xhtml(self, odffile):
|
||||
""" Load a file and return XHTML
|
||||
""" Load a file and return the XHTML
|
||||
"""
|
||||
self.load(odffile)
|
||||
return self.xhtml()
|
||||
@ -1312,9 +1474,8 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
if s != '': self.lines.append(s)
|
||||
|
||||
def xhtml(self):
|
||||
self.lines = []
|
||||
self._wfunc = self._wlines
|
||||
self.parseodf()
|
||||
""" Returns the xhtml
|
||||
"""
|
||||
return ''.join(self.lines)
|
||||
|
||||
def _writecss(self, s):
|
||||
@ -1324,11 +1485,127 @@ class ODF2XHTML(handler.ContentHandler):
|
||||
pass
|
||||
|
||||
def css(self):
|
||||
self._wfunc = self._writenothing
|
||||
self.parseodf()
|
||||
""" Returns the CSS content """
|
||||
self._csslines = []
|
||||
self._wfunc = self._writecss
|
||||
self.generate_stylesheet()
|
||||
res = ''.join(self._csslines)
|
||||
self._wfunc = self._wlines
|
||||
del self._csslines
|
||||
return res
|
||||
|
||||
def save(self, outputfile, addsuffix=False):
|
||||
""" Save the HTML under the filename.
|
||||
If the filename is '-' then save to stdout
|
||||
We have the last style filename in self.stylefilename
|
||||
"""
|
||||
if outputfile == '-':
|
||||
import sys # Added by Kovid
|
||||
outputfp = sys.stdout
|
||||
else:
|
||||
if addsuffix:
|
||||
outputfile = outputfile + ".html"
|
||||
outputfp = file(outputfile, "w")
|
||||
outputfp.write(self.xhtml().encode('us-ascii','xmlcharrefreplace'))
|
||||
outputfp.close()
|
||||
|
||||
|
||||
class ODF2XHTMLembedded(ODF2XHTML):
|
||||
""" The ODF2XHTML parses an ODF file and produces XHTML"""
|
||||
|
||||
def __init__(self, lines, generate_css=True, embedable=False):
|
||||
self._resetobject()
|
||||
self.lines = lines
|
||||
|
||||
# Tags
|
||||
self.generate_css = generate_css
|
||||
self.elements = {
|
||||
# (DCNS, 'title'): (self.s_processcont, self.e_dc_title),
|
||||
# (DCNS, 'language'): (self.s_processcont, self.e_dc_contentlanguage),
|
||||
# (DCNS, 'creator'): (self.s_processcont, self.e_dc_metatag),
|
||||
# (DCNS, 'description'): (self.s_processcont, self.e_dc_metatag),
|
||||
# (DCNS, 'date'): (self.s_processcont, self.e_dc_metatag),
|
||||
(DRAWNS, 'frame'): (self.s_draw_frame, self.e_draw_frame),
|
||||
(DRAWNS, 'image'): (self.s_draw_image, None),
|
||||
(DRAWNS, 'fill-image'): (self.s_draw_fill_image, None),
|
||||
(DRAWNS, "layer-set"):(self.s_ignorexml, None),
|
||||
(DRAWNS, 'page'): (self.s_draw_page, self.e_draw_page),
|
||||
(DRAWNS, 'object'): (self.s_draw_object, None),
|
||||
(DRAWNS, 'object-ole'): (self.s_draw_object_ole, None),
|
||||
(DRAWNS, 'text-box'): (self.s_draw_textbox, self.e_draw_textbox),
|
||||
# (METANS, 'creation-date'):(self.s_processcont, self.e_dc_metatag),
|
||||
# (METANS, 'generator'):(self.s_processcont, self.e_dc_metatag),
|
||||
# (METANS, 'initial-creator'): (self.s_processcont, self.e_dc_metatag),
|
||||
# (METANS, 'keyword'): (self.s_processcont, self.e_dc_metatag),
|
||||
(NUMBERNS, "boolean-style"):(self.s_ignorexml, None),
|
||||
(NUMBERNS, "currency-style"):(self.s_ignorexml, None),
|
||||
(NUMBERNS, "date-style"):(self.s_ignorexml, None),
|
||||
(NUMBERNS, "number-style"):(self.s_ignorexml, None),
|
||||
(NUMBERNS, "text-style"):(self.s_ignorexml, None),
|
||||
# (OFFICENS, "automatic-styles"):(self.s_office_automatic_styles, None),
|
||||
# (OFFICENS, "document-content"):(self.s_office_document_content, self.e_office_document_content),
|
||||
(OFFICENS, "forms"):(self.s_ignorexml, None),
|
||||
# (OFFICENS, "master-styles"):(self.s_office_master_styles, None),
|
||||
(OFFICENS, "meta"):(self.s_ignorecont, None),
|
||||
# (OFFICENS, "presentation"):(self.s_office_presentation, self.e_office_presentation),
|
||||
# (OFFICENS, "spreadsheet"):(self.s_office_spreadsheet, self.e_office_spreadsheet),
|
||||
# (OFFICENS, "styles"):(self.s_office_styles, None),
|
||||
# (OFFICENS, "text"):(self.s_office_text, self.e_office_text),
|
||||
(OFFICENS, "scripts"):(self.s_ignorexml, None),
|
||||
(PRESENTATIONNS, "notes"):(self.s_ignorexml, None),
|
||||
## (STYLENS, "default-page-layout"):(self.s_style_default_page_layout, self.e_style_page_layout),
|
||||
# (STYLENS, "default-page-layout"):(self.s_ignorexml, None),
|
||||
# (STYLENS, "default-style"):(self.s_style_default_style, self.e_style_default_style),
|
||||
# (STYLENS, "drawing-page-properties"):(self.s_style_handle_properties, None),
|
||||
# (STYLENS, "font-face"):(self.s_style_font_face, None),
|
||||
## (STYLENS, "footer"):(self.s_style_footer, self.e_style_footer),
|
||||
## (STYLENS, "footer-style"):(self.s_style_footer_style, None),
|
||||
# (STYLENS, "graphic-properties"):(self.s_style_handle_properties, None),
|
||||
# (STYLENS, "handout-master"):(self.s_ignorexml, None),
|
||||
## (STYLENS, "header"):(self.s_style_header, self.e_style_header),
|
||||
## (STYLENS, "header-footer-properties"):(self.s_style_handle_properties, None),
|
||||
## (STYLENS, "header-style"):(self.s_style_header_style, None),
|
||||
# (STYLENS, "master-page"):(self.s_style_master_page, None),
|
||||
# (STYLENS, "page-layout-properties"):(self.s_style_handle_properties, None),
|
||||
## (STYLENS, "page-layout"):(self.s_style_page_layout, self.e_style_page_layout),
|
||||
# (STYLENS, "page-layout"):(self.s_ignorexml, None),
|
||||
# (STYLENS, "paragraph-properties"):(self.s_style_handle_properties, None),
|
||||
# (STYLENS, "style"):(self.s_style_style, self.e_style_style),
|
||||
# (STYLENS, "table-cell-properties"):(self.s_style_handle_properties, None),
|
||||
# (STYLENS, "table-column-properties"):(self.s_style_handle_properties, None),
|
||||
# (STYLENS, "table-properties"):(self.s_style_handle_properties, None),
|
||||
# (STYLENS, "text-properties"):(self.s_style_handle_properties, None),
|
||||
(SVGNS, 'desc'): (self.s_ignorexml, None),
|
||||
(TABLENS, 'covered-table-cell'): (self.s_ignorexml, None),
|
||||
(TABLENS, 'table-cell'): (self.s_table_table_cell, self.e_table_table_cell),
|
||||
(TABLENS, 'table-column'): (self.s_table_table_column, None),
|
||||
(TABLENS, 'table-row'): (self.s_table_table_row, self.e_table_table_row),
|
||||
(TABLENS, 'table'): (self.s_table_table, self.e_table_table),
|
||||
(TEXTNS, 'a'): (self.s_text_a, self.e_text_a),
|
||||
(TEXTNS, "alphabetical-index-source"):(self.s_text_x_source, self.e_text_x_source),
|
||||
(TEXTNS, "bibliography-configuration"):(self.s_ignorexml, None),
|
||||
(TEXTNS, "bibliography-source"):(self.s_text_x_source, self.e_text_x_source),
|
||||
(TEXTNS, 'h'): (self.s_text_h, self.e_text_h),
|
||||
(TEXTNS, "illustration-index-source"):(self.s_text_x_source, self.e_text_x_source),
|
||||
(TEXTNS, 'line-break'):(self.s_text_line_break, None),
|
||||
(TEXTNS, "linenumbering-configuration"):(self.s_ignorexml, None),
|
||||
(TEXTNS, "list"):(self.s_text_list, self.e_text_list),
|
||||
(TEXTNS, "list-item"):(self.s_text_list_item, self.e_text_list_item),
|
||||
(TEXTNS, "list-level-style-bullet"):(self.s_text_list_level_style_bullet, self.e_text_list_level_style_bullet),
|
||||
(TEXTNS, "list-level-style-number"):(self.s_text_list_level_style_number, self.e_text_list_level_style_number),
|
||||
(TEXTNS, "list-style"):(None, None),
|
||||
(TEXTNS, "note"):(self.s_text_note, None),
|
||||
(TEXTNS, "note-body"):(self.s_text_note_body, self.e_text_note_body),
|
||||
(TEXTNS, "note-citation"):(None, self.e_text_note_citation),
|
||||
(TEXTNS, "notes-configuration"):(self.s_ignorexml, None),
|
||||
(TEXTNS, "object-index-source"):(self.s_text_x_source, self.e_text_x_source),
|
||||
(TEXTNS, 'p'): (self.s_text_p, self.e_text_p),
|
||||
(TEXTNS, 's'): (self.s_text_s, None),
|
||||
(TEXTNS, 'span'): (self.s_text_span, self.e_text_span),
|
||||
(TEXTNS, 'tab'): (self.s_text_tab, None),
|
||||
(TEXTNS, "table-index-source"):(self.s_text_x_source, self.e_text_x_source),
|
||||
(TEXTNS, "table-of-content-source"):(self.s_text_x_source, self.e_text_x_source),
|
||||
(TEXTNS, "user-index-source"):(self.s_text_x_source, self.e_text_x_source),
|
||||
(TEXTNS, "page-number"):(None, None),
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2006-2009 Søren Roug, European Environment Agency
|
||||
# Copyright (C) 2006-2010 Søren Roug, European Environment Agency
|
||||
#
|
||||
# This library is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Lesser General Public
|
||||
@ -41,7 +41,7 @@ IS_IMAGE = 1
|
||||
# We need at least Python 2.2
|
||||
assert sys.version_info[0]>=2 and sys.version_info[1] >= 2
|
||||
|
||||
sys.setrecursionlimit=50
|
||||
#sys.setrecursionlimit(100)
|
||||
#The recursion limit is set conservative so mistakes like
|
||||
# s=content() s.addElement(s) won't eat up too much processor time.
|
||||
|
||||
@ -128,12 +128,12 @@ class OpenDocument:
|
||||
self.element_dict[element.qname] = []
|
||||
self.element_dict[element.qname].append(element)
|
||||
if element.qname == (STYLENS, u'style'):
|
||||
self._register_stylename(element) # Add to style dictionary
|
||||
self.__register_stylename(element) # Add to style dictionary
|
||||
styleref = element.getAttrNS(TEXTNS,u'style-name')
|
||||
if styleref is not None and self._styles_ooo_fix.has_key(styleref):
|
||||
element.setAttrNS(TEXTNS,u'style-name', self._styles_ooo_fix[styleref])
|
||||
|
||||
def _register_stylename(self, element):
|
||||
def __register_stylename(self, element):
|
||||
''' Register a style. But there are three style dictionaries:
|
||||
office:styles, office:automatic-styles and office:master-styles
|
||||
Chapter 14
|
||||
@ -165,7 +165,7 @@ class OpenDocument:
|
||||
""" Generates the full document as an XML file
|
||||
Always written as a bytestream in UTF-8 encoding
|
||||
"""
|
||||
self._replaceGenerator()
|
||||
self.__replaceGenerator()
|
||||
xml=StringIO()
|
||||
xml.write(_XMLPROLOGUE)
|
||||
self.topnode.toXml(0, xml)
|
||||
@ -197,8 +197,10 @@ class OpenDocument:
|
||||
x.write_close_tag(0, xml)
|
||||
return xml.getvalue()
|
||||
|
||||
def manifestxml(self):
|
||||
""" Generates the manifest.xml file """
|
||||
def __manifestxml(self):
|
||||
""" Generates the manifest.xml file
|
||||
The self.manifest isn't avaible unless the document is being saved
|
||||
"""
|
||||
xml=StringIO()
|
||||
xml.write(_XMLPROLOGUE)
|
||||
self.manifest.toXml(0,xml)
|
||||
@ -206,7 +208,7 @@ class OpenDocument:
|
||||
|
||||
def metaxml(self):
|
||||
""" Generates the meta.xml file """
|
||||
self._replaceGenerator()
|
||||
self.__replaceGenerator()
|
||||
x = DocumentMeta()
|
||||
x.addElement(self.meta)
|
||||
xml=StringIO()
|
||||
@ -344,7 +346,7 @@ class OpenDocument:
|
||||
self.thumbnail = filecontent
|
||||
|
||||
def addObject(self, document, objectname=None):
|
||||
""" Add an object. The object must be an OpenDocument class
|
||||
""" Adds an object (subdocument). The object must be an OpenDocument class
|
||||
The return value will be the folder in the zipfile the object is stored in
|
||||
"""
|
||||
self.childobjects.append(document)
|
||||
@ -367,15 +369,16 @@ class OpenDocument:
|
||||
zi.compress_type = zipfile.ZIP_STORED
|
||||
zi.external_attr = UNIXPERMS
|
||||
self._z.writestr(zi, fileobj)
|
||||
if hasPictures:
|
||||
self.manifest.addElement(manifest.FileEntry(fullpath="%sPictures/" % folder,mediatype=""))
|
||||
# According to section 17.7.3 in ODF 1.1, the pictures folder should not have a manifest entry
|
||||
# if hasPictures:
|
||||
# self.manifest.addElement(manifest.FileEntry(fullpath="%sPictures/" % folder, mediatype=""))
|
||||
# Look in subobjects
|
||||
subobjectnum = 1
|
||||
for subobject in object.childobjects:
|
||||
self._savePictures(subobject,'%sObject %d/' % (folder, subobjectnum))
|
||||
subobjectnum += 1
|
||||
|
||||
def _replaceGenerator(self):
|
||||
def __replaceGenerator(self):
|
||||
""" Section 3.1.1: The application MUST NOT export the original identifier
|
||||
belonging to the application that created the document.
|
||||
"""
|
||||
@ -385,22 +388,29 @@ class OpenDocument:
|
||||
self.meta.addElement(meta.Generator(text=TOOLSVERSION))
|
||||
|
||||
def save(self, outputfile, addsuffix=False):
|
||||
""" Save the document under the filename """
|
||||
""" Save the document under the filename.
|
||||
If the filename is '-' then save to stdout
|
||||
"""
|
||||
if outputfile == '-':
|
||||
outputfp = zipfile.ZipFile(sys.stdout,"w")
|
||||
else:
|
||||
if addsuffix:
|
||||
outputfile = outputfile + odmimetypes.get(self.mimetype,'.xxx')
|
||||
outputfp = zipfile.ZipFile(outputfile, "w")
|
||||
self._zipwrite(outputfp)
|
||||
self.__zipwrite(outputfp)
|
||||
outputfp.close()
|
||||
|
||||
def write(self, outputfp):
|
||||
""" User API to write the ODF file to an open file descriptor
|
||||
Writes the ZIP format
|
||||
"""
|
||||
zipoutputfp = zipfile.ZipFile(outputfp,"w")
|
||||
self._zipwrite(zipoutputfp)
|
||||
self.__zipwrite(zipoutputfp)
|
||||
|
||||
def _zipwrite(self, outputfp):
|
||||
""" Write the document to an open file pointer """
|
||||
def __zipwrite(self, outputfp):
|
||||
""" Write the document to an open file pointer
|
||||
This is where the real work is done
|
||||
"""
|
||||
self._z = outputfp
|
||||
self._now = time.localtime()[:6]
|
||||
self.manifest = manifest.Manifest()
|
||||
@ -438,7 +448,7 @@ class OpenDocument:
|
||||
zi = zipfile.ZipInfo("META-INF/manifest.xml", self._now)
|
||||
zi.compress_type = zipfile.ZIP_DEFLATED
|
||||
zi.external_attr = UNIXPERMS
|
||||
self._z.writestr(zi, self.manifestxml() )
|
||||
self._z.writestr(zi, self.__manifestxml() )
|
||||
del self._z
|
||||
del self._now
|
||||
del self.manifest
|
||||
@ -464,8 +474,8 @@ class OpenDocument:
|
||||
self._z.writestr(zi, object.contentxml() )
|
||||
|
||||
# Write settings
|
||||
if self == object and self.settings.hasChildNodes():
|
||||
self.manifest.addElement(manifest.FileEntry(fullpath="settings.xml",mediatype="text/xml"))
|
||||
if object.settings.hasChildNodes():
|
||||
self.manifest.addElement(manifest.FileEntry(fullpath="%ssettings.xml" % folder, mediatype="text/xml"))
|
||||
zi = zipfile.ZipInfo("%ssettings.xml" % folder, self._now)
|
||||
zi.compress_type = zipfile.ZIP_DEFLATED
|
||||
zi.external_attr = UNIXPERMS
|
||||
@ -473,7 +483,7 @@ class OpenDocument:
|
||||
|
||||
# Write meta
|
||||
if self == object:
|
||||
self.manifest.addElement(manifest.FileEntry(fullpath="meta.xml",mediatype="text/xml"))
|
||||
self.manifest.addElement(manifest.FileEntry(fullpath="meta.xml", mediatype="text/xml"))
|
||||
zi = zipfile.ZipInfo("meta.xml", self._now)
|
||||
zi.compress_type = zipfile.ZIP_DEFLATED
|
||||
zi.external_attr = UNIXPERMS
|
||||
@ -497,6 +507,7 @@ class OpenDocument:
|
||||
return element.Text(data)
|
||||
|
||||
def createCDATASection(self, data):
|
||||
""" Method to create a CDATA section """
|
||||
return element.CDATASection(cdata)
|
||||
|
||||
def getMediaType(self):
|
||||
@ -504,12 +515,14 @@ class OpenDocument:
|
||||
return self.mimetype
|
||||
|
||||
def getStyleByName(self, name):
|
||||
""" Finds a style object based on the name """
|
||||
ncname = make_NCName(name)
|
||||
if self._styles_dict == {}:
|
||||
self.rebuild_caches()
|
||||
return self._styles_dict.get(ncname, None)
|
||||
|
||||
def getElementsByType(self, element):
|
||||
""" Gets elements based on the type, which is function from text.py, draw.py etc. """
|
||||
obj = element(check_grammar=False)
|
||||
if self.element_dict == {}:
|
||||
self.rebuild_caches()
|
||||
@ -517,53 +530,59 @@ class OpenDocument:
|
||||
|
||||
# Convenience functions
|
||||
def OpenDocumentChart():
|
||||
""" Creates a chart document """
|
||||
doc = OpenDocument('application/vnd.oasis.opendocument.chart')
|
||||
doc.chart = Chart()
|
||||
doc.body.addElement(doc.chart)
|
||||
return doc
|
||||
|
||||
def OpenDocumentDrawing():
|
||||
""" Creates a drawing document """
|
||||
doc = OpenDocument('application/vnd.oasis.opendocument.graphics')
|
||||
doc.drawing = Drawing()
|
||||
doc.body.addElement(doc.drawing)
|
||||
return doc
|
||||
|
||||
def OpenDocumentImage():
|
||||
""" Creates an image document """
|
||||
doc = OpenDocument('application/vnd.oasis.opendocument.image')
|
||||
doc.image = Image()
|
||||
doc.body.addElement(doc.image)
|
||||
return doc
|
||||
|
||||
def OpenDocumentPresentation():
|
||||
""" Creates a presentation document """
|
||||
doc = OpenDocument('application/vnd.oasis.opendocument.presentation')
|
||||
doc.presentation = Presentation()
|
||||
doc.body.addElement(doc.presentation)
|
||||
return doc
|
||||
|
||||
def OpenDocumentSpreadsheet():
|
||||
""" Creates a spreadsheet document """
|
||||
doc = OpenDocument('application/vnd.oasis.opendocument.spreadsheet')
|
||||
doc.spreadsheet = Spreadsheet()
|
||||
doc.body.addElement(doc.spreadsheet)
|
||||
return doc
|
||||
|
||||
def OpenDocumentText():
|
||||
""" Creates a text document """
|
||||
doc = OpenDocument('application/vnd.oasis.opendocument.text')
|
||||
doc.text = Text()
|
||||
doc.body.addElement(doc.text)
|
||||
return doc
|
||||
|
||||
def OpenDocumentTextMaster():
|
||||
""" Creates a text master document """
|
||||
doc = OpenDocument('application/vnd.oasis.opendocument.text-master')
|
||||
doc.text = Text()
|
||||
doc.body.addElement(doc.text)
|
||||
return doc
|
||||
|
||||
def load(odffile):
|
||||
def __loadxmlparts(z, manifest, doc, objectpath):
|
||||
from load import LoadParser
|
||||
from xml.sax import make_parser, handler
|
||||
z = zipfile.ZipFile(odffile)
|
||||
mimetype = z.read('mimetype')
|
||||
doc = OpenDocument(mimetype, add_generator=False)
|
||||
|
||||
# Look in the manifest file to see if which of the four files there are
|
||||
manifestpart = z.read('META-INF/manifest.xml')
|
||||
manifest = manifestlist(manifestpart)
|
||||
for xmlfile in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'):
|
||||
for xmlfile in (objectpath+'settings.xml', objectpath+'meta.xml', objectpath+'content.xml', objectpath+'styles.xml'):
|
||||
if not manifest.has_key(xmlfile):
|
||||
continue
|
||||
try:
|
||||
@ -580,7 +599,19 @@ def load(odffile):
|
||||
parser.parse(inpsrc)
|
||||
del doc._parsing
|
||||
except KeyError, v: pass
|
||||
# FIXME: Add subobjects correctly here
|
||||
|
||||
def load(odffile):
|
||||
""" Load an ODF file into memory
|
||||
Returns a reference to the structure
|
||||
"""
|
||||
z = zipfile.ZipFile(odffile)
|
||||
mimetype = z.read('mimetype')
|
||||
doc = OpenDocument(mimetype, add_generator=False)
|
||||
|
||||
# Look in the manifest file to see if which of the four files there are
|
||||
manifestpart = z.read('META-INF/manifest.xml')
|
||||
manifest = manifestlist(manifestpart)
|
||||
__loadxmlparts(z, manifest, doc, '')
|
||||
for mentry,mvalue in manifest.items():
|
||||
if mentry[:9] == "Pictures/" and len(mentry) > 9:
|
||||
doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry))
|
||||
@ -588,6 +619,13 @@ def load(odffile):
|
||||
doc.addThumbnail(z.read(mentry))
|
||||
elif mentry in ('settings.xml', 'meta.xml', 'content.xml', 'styles.xml'):
|
||||
pass
|
||||
# Load subobjects into structure
|
||||
elif mentry[:7] == "Object " and len(mentry) < 11 and mentry[-1] == "/":
|
||||
subdoc = OpenDocument(mvalue['media-type'], add_generator=False)
|
||||
doc.addObject(subdoc, "/" + mentry[:-1])
|
||||
__loadxmlparts(z, manifest, subdoc, mentry)
|
||||
elif mentry[:7] == "Object ":
|
||||
pass # Don't load subobjects as opaque objects
|
||||
else:
|
||||
if mvalue['full-path'][-1] == '/':
|
||||
doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], None))
|
||||
@ -612,4 +650,5 @@ def load(odffile):
|
||||
elif mimetype[:42] == 'application/vnd.oasis.opendocument.formula':
|
||||
doc.formula = b[0].firstChild
|
||||
return doc
|
||||
|
||||
# vim: set expandtab sw=4 :
|
||||
|
Loading…
x
Reference in New Issue
Block a user