From 7a3cd084a547f7ae841e34d206ca4ae94430ebb3 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 23 Jan 2008 08:16:45 +0000 Subject: [PATCH] Fix #479 --- src/libprs500/ebooks/lrf/__init__.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/libprs500/ebooks/lrf/__init__.py b/src/libprs500/ebooks/lrf/__init__.py index 5d1cee0ea7..8c5078c921 100644 --- a/src/libprs500/ebooks/lrf/__init__.py +++ b/src/libprs500/ebooks/lrf/__init__.py @@ -308,16 +308,26 @@ def Book(options, logger, font_delta=0, header=None, raise ConversionError, 'Could not find the normal version of the ' + family + ' font' return book, fonts -def entity_to_unicode(match, exceptions=[]): +def entity_to_unicode(match, exceptions=[], encoding='cp1252'): + ''' + @param encoding: The encoding to use to decode numeric entities between 128 and 256. + If None, the Unicode UCS encoding is used. A common encoding is cp1252. + ''' ent = match.group(1) if ent in exceptions: return '&'+ent+';' if ent == 'apos': return "'" if ent.startswith(u'#x'): - return unichr(int(ent[2:], 16)) + num = int(ent[2:], 16) + if encoding is None or num > 255: + return unichr(num) + return chr(num).decode(encoding) if ent.startswith(u'#'): - return unichr(int(ent[1:])) + num = int(ent[1:]) + if encoding is None or num > 255: + return unichr(num) + return chr(num).decode(encoding) try: return unichr(name2codepoint[ent]) except KeyError: