From b36ac2f96c9167e7ef388febe03d1f6f836f3873 Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Mon, 5 Jan 2009 00:18:21 -0500 Subject: [PATCH] Fix excessive HTML entity substitution. --- src/calibre/ebooks/oeb/base.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index c736c4ad98..847aa412ec 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -15,7 +15,7 @@ from urlparse import urldefrag, urlparse, urlunparse from urllib import unquote as urlunquote import logging import re -from htmlentitydefs import entitydefs +import htmlentitydefs import uuid from lxml import etree from calibre import LoggingInterface @@ -64,6 +64,12 @@ OEB_IMAGES = set([GIF_MIME, JPEG_MIME, PNG_MIME, SVG_MIME]) MS_COVER_TYPE = 'other.ms-coverimage-standard' +ENTITYDEFS = dict(htmlentitydefs.entitydefs) +del ENTITYDEFS['lt'] +del ENTITYDEFS['gt'] +del ENTITYDEFS['quot'] +del ENTITYDEFS['amp'] + def element(parent, *args, **kwargs): if parent is not None: @@ -301,7 +307,7 @@ class Manifest(object): % (self.id, self.href, self.media_type) def _force_xhtml(self, data): - repl = lambda m: entitydefs.get(m.group(1), m.group(0)) + repl = lambda m: ENTITYDEFS.get(m.group(1), m.group(0)) data = self.ENTITY_RE.sub(repl, data) data = etree.fromstring(data, parser=XML_PARSER) if namespace(data.tag) != XHTML_NS: