Fix #5662 (< hexa entity problem)

This commit is contained in:
Kovid Goyal 2010-06-01 18:21:39 -06:00
parent 721b48038e
commit a529cb0303
2 changed files with 8 additions and 3 deletions

View File

@ -14,8 +14,14 @@ XMLDECL_RE = re.compile(r'^\s*<[?]xml.*?[?]>')
SVG_NS = 'http://www.w3.org/2000/svg' SVG_NS = 'http://www.w3.org/2000/svg'
XLINK_NS = 'http://www.w3.org/1999/xlink' XLINK_NS = 'http://www.w3.org/1999/xlink'
convert_entities = functools.partial(entity_to_unicode, exceptions=['quot', convert_entities = functools.partial(entity_to_unicode,
'apos', 'lt', 'gt', 'amp', '#60', '#62']) result_exceptions = {
u'<' : '&lt;',
u'>' : '&gt;',
u"'" : '&apos;',
u'"' : '&quot;',
u'&' : '&amp;',
})
_span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE) _span_pat = re.compile('<span.*?</span>', re.DOTALL|re.IGNORECASE)
LIGATURES = { LIGATURES = {

View File

@ -787,7 +787,6 @@ class Manifest(object):
data = self.oeb.decode(data) data = self.oeb.decode(data)
data = self.oeb.html_preprocessor(data) data = self.oeb.html_preprocessor(data)
# Remove DOCTYPE declaration as it messes up parsing # Remove DOCTYPE declaration as it messes up parsing
# In particular, it causes tostring to insert xmlns # In particular, it causes tostring to insert xmlns
# declarations, which messes up the coercing logic # declarations, which messes up the coercing logic