This commit is contained in:
Marshall T. Vandegrift 2008-10-07 17:18:36 -04:00
commit b77dda40b4

View File

@ -110,6 +110,9 @@ def consume_sized_utf8_string(bytes, zpad=False):
class UnBinary(object): class UnBinary(object):
AMPERSAND_RE = re.compile( AMPERSAND_RE = re.compile(
r'&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)') r'&(?!(?:#[0-9]+|#x[0-9a-fA-F]+|[a-zA-Z_:][a-zA-Z0-9.-_:]+);)')
OPEN_ANGLE_RE = re.compile(r'<<(?![!]--)')
CLOSE_ANGLE_RE = re.compile(r'(?<!--)>>')
DOUBLE_ANGLE_RE = re.compile(r'([<>])\1')
def __init__(self, bin, path, manifest, map=OPF_MAP): def __init__(self, bin, path, manifest, map=OPF_MAP):
self.manifest = manifest self.manifest = manifest
@ -120,10 +123,15 @@ class UnBinary(object):
self.buf = cStringIO.StringIO() self.buf = cStringIO.StringIO()
self.binary_to_text() self.binary_to_text()
self.raw = self.buf.getvalue().lstrip().decode('utf-8') self.raw = self.buf.getvalue().lstrip().decode('utf-8')
self.escape_ampersands() self.escape_reserved()
def escape_ampersands(self): def escape_reserved(self):
self.raw = self.AMPERSAND_RE.sub('&amp;', self.raw) raw = self.raw
raw = self.AMPERSAND_RE.sub(r'&amp;', raw)
raw = self.OPEN_ANGLE_RE.sub(r'&lt;', raw)
raw = self.CLOSE_ANGLE_RE.sub(r'&gt;', raw)
raw = self.DOUBLE_ANGLE_RE.sub(r'\1', raw)
self.raw = raw
def item_path(self, internal_id): def item_path(self, internal_id):
try: try:
@ -162,6 +170,10 @@ class UnBinary(object):
continue continue
elif c == '\v': elif c == '\v':
c = '\n' c = '\n'
elif c == '>':
c = '>>'
elif c == '<':
c = '<<'
self.buf.write(c.encode('ascii', 'xmlcharrefreplace')) self.buf.write(c.encode('ascii', 'xmlcharrefreplace'))
elif state == 'get flags': elif state == 'get flags':