Fix excessive HTML entity substitution.

This commit is contained in:
Marshall T. Vandegrift 2009-01-05 00:18:21 -05:00
parent d3f12fcf36
commit b36ac2f96c

View File

@ -15,7 +15,7 @@ from urlparse import urldefrag, urlparse, urlunparse
from urllib import unquote as urlunquote from urllib import unquote as urlunquote
import logging import logging
import re import re
from htmlentitydefs import entitydefs import htmlentitydefs
import uuid import uuid
from lxml import etree from lxml import etree
from calibre import LoggingInterface from calibre import LoggingInterface
@ -64,6 +64,12 @@ OEB_IMAGES = set([GIF_MIME, JPEG_MIME, PNG_MIME, SVG_MIME])
MS_COVER_TYPE = 'other.ms-coverimage-standard' MS_COVER_TYPE = 'other.ms-coverimage-standard'
ENTITYDEFS = dict(htmlentitydefs.entitydefs)
del ENTITYDEFS['lt']
del ENTITYDEFS['gt']
del ENTITYDEFS['quot']
del ENTITYDEFS['amp']
def element(parent, *args, **kwargs): def element(parent, *args, **kwargs):
if parent is not None: if parent is not None:
@ -301,7 +307,7 @@ class Manifest(object):
% (self.id, self.href, self.media_type) % (self.id, self.href, self.media_type)
def _force_xhtml(self, data): def _force_xhtml(self, data):
repl = lambda m: entitydefs.get(m.group(1), m.group(0)) repl = lambda m: ENTITYDEFS.get(m.group(1), m.group(0))
data = self.ENTITY_RE.sub(repl, data) data = self.ENTITY_RE.sub(repl, data)
data = etree.fromstring(data, parser=XML_PARSER) data = etree.fromstring(data, parser=XML_PARSER)
if namespace(data.tag) != XHTML_NS: if namespace(data.tag) != XHTML_NS: