From a529cb0303f22329214012e280d1ff026a8942a7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 1 Jun 2010 18:21:39 -0600 Subject: [PATCH] Fix #5662 (< hexa entity problem) --- src/calibre/ebooks/conversion/preprocess.py | 10 ++++++++-- src/calibre/ebooks/oeb/base.py | 1 - 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 40c67453b2..7a7f362169 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -14,8 +14,14 @@ XMLDECL_RE = re.compile(r'^\s*<[?]xml.*?[?]>') SVG_NS = 'http://www.w3.org/2000/svg' XLINK_NS = 'http://www.w3.org/1999/xlink' -convert_entities = functools.partial(entity_to_unicode, exceptions=['quot', - 'apos', 'lt', 'gt', 'amp', '#60', '#62']) +convert_entities = functools.partial(entity_to_unicode, + result_exceptions = { + u'<' : '<', + u'>' : '>', + u"'" : ''', + u'"' : '"', + u'&' : '&', + }) _span_pat = re.compile('', re.DOTALL|re.IGNORECASE) LIGATURES = { diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 79f9f15248..76e2cef3bb 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -787,7 +787,6 @@ class Manifest(object): data = self.oeb.decode(data) data = self.oeb.html_preprocessor(data) - # Remove DOCTYPE declaration as it messes up parsing # In particular, it causes tostring to insert xmlns # declarations, which messes up the coercing logic