This commit is contained in:
Kovid Goyal 2008-01-23 08:16:45 +00:00
parent 4e49c9807c
commit 7a3cd084a5

View File

@ -308,16 +308,26 @@ def Book(options, logger, font_delta=0, header=None,
raise ConversionError, 'Could not find the normal version of the ' + family + ' font'
return book, fonts
def entity_to_unicode(match, exceptions=[]):
def entity_to_unicode(match, exceptions=[], encoding='cp1252'):
'''
@param encoding: The encoding to use to decode numeric entities between 128 and 256.
If None, the Unicode UCS encoding is used. A common encoding is cp1252.
'''
ent = match.group(1)
if ent in exceptions:
return '&'+ent+';'
if ent == 'apos':
return "'"
if ent.startswith(u'#x'):
return unichr(int(ent[2:], 16))
num = int(ent[2:], 16)
if encoding is None or num > 255:
return unichr(num)
return chr(num).decode(encoding)
if ent.startswith(u'#'):
return unichr(int(ent[1:]))
num = int(ent[1:])
if encoding is None or num > 255:
return unichr(num)
return chr(num).decode(encoding)
try:
return unichr(name2codepoint[ent])
except KeyError: