Fix str() and unicode_literals in odf package

This commit is contained in:
Kovid Goyal 2019-05-20 17:17:33 +05:30
parent 43c0b1216e
commit 9d14bebafb
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
35 changed files with 101 additions and 74 deletions

View File

@ -19,7 +19,7 @@ from odf.namespaces import TEXTNS as odTEXTNS
from calibre import CurrentDir, walk
from calibre.ebooks.oeb.base import _css_logger
from polyglot.builtins import unicode_type, string_or_bytes, filter, getcwd
from polyglot.builtins import unicode_type, string_or_bytes, filter, getcwd, as_bytes
class Extract(ODF2XHTML):
@ -292,7 +292,7 @@ class Extract(ODF2XHTML):
except:
log.exception('Failed to filter CSS, conversion may be slow')
with open('index.xhtml', 'wb') as f:
f.write(html.encode('utf-8'))
f.write(as_bytes(html))
zf = ZipFile(stream, 'r')
self.extract_pictures(zf)
opf = OPFCreator(os.path.abspath(getcwd()), mi)

View File

@ -0,0 +1 @@
from __future__ import print_function, unicode_literals, absolute_import, division

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import ANIMNS
from .element import Element

View File

@ -17,14 +17,15 @@
#
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
import re
import types
from .namespaces import (
ANIMNS, CHARTNS, CONFIGNS, DR3DNS, DRAWNS, FONS, FORMNS, MANIFESTNS, METANS,
NUMBERNS, OFFICENS, PRESENTATIONNS, SCRIPTNS, SMILNS, STYLENS, SVGNS, TABLENS,
TEXTNS, XFORMSNS, XLINKNS
)
from polyglot.builtins import string_or_bytes, unicode_type
pattern_color = re.compile(r'#[0-9a-fA-F]{6}')
pattern_vector3D = re.compile(r'\([ ]*-?([0-9]+(\.[0-9]*)?|\.[0-9]+)([ ]+-?([0-9]+(\.[0-9]*)?|\.[0-9]+)){2}[ ]*\)')
@ -54,20 +55,20 @@ def cnv_color(attribute, arg, element):
""" A RGB color in conformance with §5.9.11 of [XSL], that is a RGB color in notation “#rrggbb”, where
rr, gg and bb are 8-bit hexadecimal digits.
"""
return str(arg)
return unicode_type(arg)
def cnv_configtype(attribute, arg, element):
if str(arg) not in ("boolean", "short", "int", "long",
if unicode_type(arg) not in ("boolean", "short", "int", "long",
"double", "string", "datetime", "base64Binary"):
raise ValueError("'%s' not allowed" % str(arg))
return str(arg)
raise ValueError("'%s' not allowed" % unicode_type(arg))
return unicode_type(arg)
def cnv_data_source_has_labels(attribute, arg, element):
if str(arg) not in ("none","row","column","both"):
raise ValueError("'%s' not allowed" % str(arg))
return str(arg)
if unicode_type(arg) not in ("none","row","column","both"):
raise ValueError("'%s' not allowed" % unicode_type(arg))
return unicode_type(arg)
# Understand different date formats
@ -76,30 +77,30 @@ def cnv_date(attribute, arg, element):
""" A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime
value.
"""
return str(arg)
return unicode_type(arg)
def cnv_dateTime(attribute, arg, element):
""" A dateOrDateTime value is either an [xmlschema-2] date value or an [xmlschema-2] dateTime
value.
"""
return str(arg)
return unicode_type(arg)
def cnv_double(attribute, arg, element):
return str(arg)
return unicode_type(arg)
def cnv_duration(attribute, arg, element):
return str(arg)
return unicode_type(arg)
def cnv_family(attribute, arg, element):
""" A style family """
if str(arg) not in ("text", "paragraph", "section", "ruby", "table", "table-column", "table-row", "table-cell",
if unicode_type(arg) not in ("text", "paragraph", "section", "ruby", "table", "table-column", "table-row", "table-cell",
"graphic", "presentation", "drawing-page", "chart"):
raise ValueError("'%s' not allowed" % str(arg))
return str(arg)
raise ValueError("'%s' not allowed" % unicode_type(arg))
return unicode_type(arg)
def __save_prefix(attribute, arg, element):
@ -108,7 +109,7 @@ def __save_prefix(attribute, arg, element):
return type(u'')(arg)
namespace = element.get_knownns(prefix)
if namespace is None:
# raise ValueError, "'%s' is an unknown prefix" % str(prefix)
# raise ValueError, "'%s' is an unknown prefix" % unicode_type(prefix)
return type(u'')(arg)
return type(u'')(arg)
@ -123,21 +124,21 @@ def cnv_formula(attribute, arg, element):
def cnv_ID(attribute, arg, element):
return str(arg)
return unicode_type(arg)
def cnv_IDREF(attribute, arg, element):
return str(arg)
return unicode_type(arg)
def cnv_integer(attribute, arg, element):
return str(arg)
return unicode_type(arg)
def cnv_legend_position(attribute, arg, element):
if str(arg) not in ("start", "end", "top", "bottom", "top-start", "bottom-start", "top-end", "bottom-end"):
raise ValueError("'%s' not allowed" % str(arg))
return str(arg)
if unicode_type(arg) not in ("start", "end", "top", "bottom", "top-start", "bottom-start", "top-end", "bottom-end"):
raise ValueError("'%s' not allowed" % unicode_type(arg))
return unicode_type(arg)
pattern_length = re.compile(r'-?([0-9]+(\.[0-9]*)?|\.[0-9]+)((cm)|(mm)|(in)|(pt)|(pc)|(px))')
@ -169,9 +170,9 @@ def cnv_lengthorpercent(attribute, arg, element):
def cnv_metavaluetype(attribute, arg, element):
if str(arg) not in ("float", "date", "time", "boolean", "string"):
raise ValueError("'%s' not allowed" % str(arg))
return str(arg)
if unicode_type(arg) not in ("float", "date", "time", "boolean", "string"):
raise ValueError("'%s' not allowed" % unicode_type(arg))
return unicode_type(arg)
def cnv_major_minor(attribute, arg, element):
@ -194,7 +195,7 @@ def cnv_NCName(attribute, arg, element):
""" NCName is defined in http://www.w3.org/TR/REC-xml-names/#NT-NCName
Essentially an XML name minus ':'
"""
if type(arg) in types.StringTypes:
if isinstance(arg, string_or_bytes):
return make_NCName(arg)
else:
return arg.getAttrNS(STYLENS, 'name')
@ -231,7 +232,7 @@ def cnv_NCNames(attribute, arg, element):
def cnv_nonNegativeInteger(attribute, arg, element):
return str(arg)
return unicode_type(arg)
pattern_percent = re.compile(r'-?([0-9]+(\.[0-9]*)?|\.[0-9]+)%')
@ -251,7 +252,7 @@ pattern_points = re.compile(r'-?[0-9]+,-?[0-9]+([ ]+-?[0-9]+,-?[0-9]+)*')
def cnv_points(attribute, arg, element):
global pattern_points
if type(arg) in types.StringTypes:
if isinstance(arg, string_or_bytes):
if not pattern_points.match(arg):
raise ValueError("x,y are separated by a comma and the points are separated by white spaces")
return arg
@ -264,7 +265,7 @@ def cnv_points(attribute, arg, element):
def cnv_positiveInteger(attribute, arg, element):
return str(arg)
return unicode_type(arg)
def cnv_string(attribute, arg, element):
@ -272,19 +273,19 @@ def cnv_string(attribute, arg, element):
def cnv_textnoteclass(attribute, arg, element):
if str(arg) not in ("footnote", "endnote"):
raise ValueError("'%s' not allowed" % str(arg))
return str(arg)
if unicode_type(arg) not in ("footnote", "endnote"):
raise ValueError("'%s' not allowed" % unicode_type(arg))
return unicode_type(arg)
# Understand different time formats
def cnv_time(attribute, arg, element):
return str(arg)
return unicode_type(arg)
def cnv_token(attribute, arg, element):
return str(arg)
return unicode_type(arg)
pattern_viewbox = re.compile(r'-?[0-9]+([ ]+-?[0-9]+){3}$')
@ -298,9 +299,9 @@ def cnv_viewbox(attribute, arg, element):
def cnv_xlinkshow(attribute, arg, element):
if str(arg) not in ("new", "replace", "embed"):
raise ValueError("'%s' not allowed" % str(arg))
return str(arg)
if unicode_type(arg) not in ("new", "replace", "embed"):
raise ValueError("'%s' not allowed" % unicode_type(arg))
return unicode_type(arg)
attrconverters = {

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import CHARTNS
from .element import Element

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import CONFIGNS
from .element import Element

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import DCNS
from .element import Element

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import DR3DNS
from .draw import StyleRefElement

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import DRAWNS, STYLENS, PRESENTATIONNS
from .element import Element

View File

@ -19,9 +19,11 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
import re
from .style import ListLevelProperties
from .text import ListStyle,ListLevelStyleNumber,ListLevelStyleBullet
from polyglot.builtins import unicode_type
"""
Create a <text:list-style> element from a string or array.
@ -92,8 +94,8 @@ def styleFromList(styleName, specArray, spacing, showAllLevels):
else:
lls = ListLevelStyleBullet(level=(i+1),bulletchar=bullet[0])
llp = ListLevelProperties()
llp.setAttribute('spacebefore', str(cssLengthNum * (i+1)) + cssLengthUnits)
llp.setAttribute('minlabelwidth', str(cssLengthNum) + cssLengthUnits)
llp.setAttribute('spacebefore', unicode_type(cssLengthNum * (i+1)) + cssLengthUnits)
llp.setAttribute('minlabelwidth', unicode_type(cssLengthNum) + cssLengthUnits)
lls.addElement(llp)
listStyle.addElement(lls)
i += 1

View File

@ -22,12 +22,15 @@
# Note: This script has copied a lot of text from xml.dom.minidom.
# Whatever license applies to that file also applies to this file.
#
from __future__ import print_function, unicode_literals, absolute_import, division
import xml.dom
from xml.dom.minicompat import defproperty, EmptyNodeList
from .namespaces import nsdict
from . import grammar
from .attrconverters import AttrConverters
from polyglot.builtins import unicode_type
# The following code is pasted form xml.sax.saxutils
# Tt makes it possible to run the code without the xml sax package installed
# To make it possible to have <rubbish> in your text elements, it is necessary to escape the texts
@ -82,7 +85,7 @@ def _nssplit(qualifiedName):
def _nsassign(namespace):
return nsdict.setdefault(namespace,"ns" + str(len(nsdict)))
return nsdict.setdefault(namespace,"ns" + unicode_type(len(nsdict)))
# Exceptions
@ -183,17 +186,12 @@ class Node(xml.dom.Node):
oldChild.parentNode = None
return oldChild
def __str__(self):
val = []
for c in self.childNodes:
val.append(str(c))
return ''.join(val)
def __unicode__(self):
val = []
for c in self.childNodes:
val.append(type(u'')(c))
return u''.join(val)
__str__ = __unicode__
defproperty(Node, "firstChild", doc="First child node, or None.")
@ -259,10 +257,8 @@ class Text(Childless, Node):
self.data = data
def __str__(self):
return self.data.encode()
def __unicode__(self):
return self.data
__unicode__ = __str__
def toXml(self,level,f):
""" Write XML in UTF-8 """
@ -477,10 +473,10 @@ class Element(Node):
f.write('<'+self.tagName)
if level == 0:
for namespace, prefix in self.namespaces.items():
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
f.write(' xmlns:' + prefix + '="'+ _escape(unicode_type(namespace))+'"')
for qname in self.attributes.keys():
prefix = self.get_nsprefix(qname[0])
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(type(u'')(self.attributes[qname]).encode('utf-8')))
f.write(' '+_escape(unicode_type(prefix+':'+qname[1]))+'='+_quoteattr(type(u'')(self.attributes[qname]).encode('utf-8')))
f.write('>')
def write_close_tag(self, level, f):
@ -491,10 +487,10 @@ class Element(Node):
f.write('<'+self.tagName)
if level == 0:
for namespace, prefix in self.namespaces.items():
f.write(' xmlns:' + prefix + '="'+ _escape(str(namespace))+'"')
f.write(' xmlns:' + prefix + '="'+ _escape(unicode_type(namespace))+'"')
for qname in self.attributes.keys():
prefix = self.get_nsprefix(qname[0])
f.write(' '+_escape(str(prefix+':'+qname[1]))+'='+_quoteattr(type(u'')(self.attributes[qname]).encode('utf-8')))
f.write(' '+_escape(unicode_type(prefix+':'+qname[1]))+'='+_quoteattr(type(u'')(self.attributes[qname]).encode('utf-8')))
if self.childNodes:
f.write('>')
for element in self.childNodes:

View File

@ -19,6 +19,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import (
ANIMNS, CHARTNS, DR3DNS, DRAWNS, FORMNS, MANIFESTNS, METANS, NUMBERNS, OFFICENS,
PRESENTATIONNS, SCRIPTNS, STYLENS, SVGNS, TABLENS, TEXTNS

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import FORMNS
from .element import Element
@ -141,4 +142,3 @@ def Time(**args):
def ValueRange(**args):
return Element(qname=(FORMNS,'value-range'), **args)

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
__doc__=""" In principle the OpenDocument schema converted to python structures.
Currently it contains the legal child elements of a given element.
To be used for validation check in the API

View File

@ -24,7 +24,7 @@
# in memory. The user should then be able to make operations and then save
# the structure again.
from __future__ import print_function
from __future__ import print_function, unicode_literals, absolute_import, division
from xml.sax import handler
from .element import Element
from .namespaces import OFFICENS

View File

@ -20,6 +20,7 @@
#
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import MANIFESTNS
from .element import Element

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import MATHNS
from .element import Element
@ -29,4 +30,3 @@ from .element import Element
def Math(**args):
return Element(qname=(MATHNS,'math'), **args)

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import METANS
from .element import Element
@ -78,4 +79,3 @@ def Template(**args):
def UserDefined(**args):
return Element(qname=(METANS,'user-defined'), **args)

View File

@ -17,6 +17,7 @@
#
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
TOOLSVERSION = u"ODFPY/0.9.4dev"
ANIMNS = u"urn:oasis:names:tc:opendocument:xmlns:animation:1.0"

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import NUMBERNS
from .element import Element
from .style import StyleElement

View File

@ -20,9 +20,11 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
import zipfile, xml.dom.minidom
from .namespaces import nsdict
from .elementtypes import empty_elements, inline_elements
from polyglot.builtins import unicode_type
IGNORED_TAGS = [
'draw:a'
@ -94,11 +96,12 @@ class TextProps:
self.superscript = True
self.subscript = False
def __str__(self):
def __unicode__(self):
return "[italic=%s, bold=i%s, fixed=%s]" % (str(self.italic),
str(self.bold),
str(self.fixed))
return "[italic=%s, bold=i%s, fixed=%s]" % (unicode_type(self.italic),
unicode_type(self.bold),
unicode_type(self.fixed))
__str__ = __unicode__
class ParagraphProps:
@ -124,11 +127,12 @@ class ParagraphProps:
def setCode(self, value):
self.code = value
def __str__(self):
def __unicode__(self):
return "[bq=%s, h=%d, code=%s]" % (str(self.blockquote),
return "[bq=%s, h=%d, code=%s]" % (unicode_type(self.blockquote),
self.headingLevel,
str(self.code))
unicode_type(self.code))
__str__ = __unicode__
class ListProperties:
@ -398,7 +402,7 @@ class ODF2MoinMoin(object):
buffer.append(" "*indent)
i += 1
if props.ordered:
number = str(i)
number = unicode_type(i)
number = " " + number + ". "
buffer.append(" 1. ")
else:

View File

@ -20,6 +20,7 @@
#
# import pdb
# pdb.set_trace()
from __future__ import print_function, unicode_literals, absolute_import, division
from collections import defaultdict
from xml.sax import handler
from xml.sax.saxutils import escape, quoteattr
@ -30,6 +31,7 @@ from .opendocument import load
from .namespaces import ANIMNS, CHARTNS, CONFIGNS, DCNS, DR3DNS, DRAWNS, FONS, \
FORMNS, MATHNS, METANS, NUMBERNS, OFFICENS, PRESENTATIONNS, SCRIPTNS, \
SMILNS, STYLENS, SVGNS, TABLENS, TEXTNS, XLINKNS
from polyglot.builtins import unicode_type
if False: # Added by Kovid
DR3DNS, MATHNS, CHARTNS, CONFIGNS, ANIMNS, FORMNS, SMILNS, SCRIPTNS
@ -1331,7 +1333,7 @@ dl.notes dd:last-of-type { page-break-after: avoid }
self.list_class_stack.append(number_class)
attrs = {}
if tag_name == 'ol' and self.list_number_map[number_class] != 1:
attrs = {'start': str(self.list_number_map[number_class])}
attrs = {'start': unicode_type(self.list_number_map[number_class])}
if self.generate_css:
attrs['class'] = list_class
self.opentag('%s' % tag_name, attrs)

View File

@ -20,7 +20,7 @@
#
# This script lists the content of the manifest.xml file
from __future__ import print_function
from __future__ import print_function, unicode_literals, absolute_import, division
import zipfile
from xml.sax import make_parser,handler
from xml.sax.xmlreader import InputSource

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import OFFICENS
from .element import Element
from .draw import StyleRefElement

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
__doc__="""Use OpenDocument to generate your documents."""
import mimetypes
@ -28,6 +29,7 @@ from io import BytesIO
from xml.sax.xmlreader import InputSource
from polyglot.io import PolyglotBytesIO, PolyglotStringIO
from polyglot.builtins import unicode_type
from . import element, manifest, meta
from .attrconverters import make_NCName
@ -383,7 +385,7 @@ class OpenDocument:
if what_it_is == IS_FILENAME:
self._z.write(fileobj, arcname, zipfile.ZIP_STORED)
else:
zi = zipfile.ZipInfo(str(arcname), self._now)
zi = zipfile.ZipInfo(unicode_type(arcname), self._now)
zi.compress_type = zipfile.ZIP_STORED
zi.external_attr = UNIXPERMS
self._z.writestr(zi, fileobj)

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import PRESENTATIONNS
from .element import Element
@ -103,4 +104,3 @@ def ShowText(**args):
def Sound(**args):
return Element(qname=(PRESENTATIONNS,'sound'), **args)

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import SCRIPTNS
from .element import Element
@ -29,4 +30,3 @@ from .element import Element
def EventListener(**args):
return Element(qname=(SCRIPTNS,'event-listener'), **args)

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import STYLENS
from .element import Element

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import SVGNS
from .element import Element
from .draw import DrawElement

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import TABLENS
from .element import Element
@ -397,4 +398,3 @@ def TargetRangeAddress(**args):
def TrackedChanges(**args):
return Element(qname=(TABLENS,'tracked-changes'), **args)

View File

@ -27,6 +27,7 @@ the appropriate <text:s>, <text:tab>, or <text:line-break>
elements. This module takes care of that problem.
"""
from __future__ import print_function, unicode_literals, absolute_import, division
from .element import Node
from .text import S,LineBreak,Tab

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import TEXTNS
from .element import Element
from .style import StyleElement

View File

@ -4,9 +4,10 @@
# Taken from http://www.zwahlendesign.ch/en/node/20
# openoffice_icons/openoffice_icons_linux/openoffice11.png
# License: Freeware
from __future__ import print_function, unicode_literals, absolute_import, division
import base64
iconstr = """\
iconstr = b"""\
iVBORw0KGgoAAAANSUhEUgAAAIAAAACACAYAAAG0OVFdAAAABGdBTUEAANbY1E9YMgAAABl0RVh0
U29mdHdhcmUAQWRvYmUgSW1hZ2VSZWFkeXHJZTwAAFoHSURBVHjaYvz//z8DJQAggFhu3LiBU1JI
SOiPmJgYM7IYUD0jMh8ggFhAhKamJuOHDx/+8fPz4zQsMTGRYf78+RjiAAHEBCJOnTr1HZvmN2/e

View File

@ -21,6 +21,7 @@
"""Class to show and manipulate user fields in odf documents."""
from __future__ import print_function, unicode_literals, absolute_import, division
import sys
import zipfile

View File

@ -18,6 +18,7 @@
# Contributor(s):
#
from __future__ import print_function, unicode_literals, absolute_import, division
from .namespaces import XFORMSNS
from .element import Element