mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix excessive HTML entity substitution.
This commit is contained in:
parent
d3f12fcf36
commit
b36ac2f96c
@ -15,7 +15,7 @@ from urlparse import urldefrag, urlparse, urlunparse
|
|||||||
from urllib import unquote as urlunquote
|
from urllib import unquote as urlunquote
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from htmlentitydefs import entitydefs
|
import htmlentitydefs
|
||||||
import uuid
|
import uuid
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from calibre import LoggingInterface
|
from calibre import LoggingInterface
|
||||||
@ -64,6 +64,12 @@ OEB_IMAGES = set([GIF_MIME, JPEG_MIME, PNG_MIME, SVG_MIME])
|
|||||||
|
|
||||||
MS_COVER_TYPE = 'other.ms-coverimage-standard'
|
MS_COVER_TYPE = 'other.ms-coverimage-standard'
|
||||||
|
|
||||||
|
ENTITYDEFS = dict(htmlentitydefs.entitydefs)
|
||||||
|
del ENTITYDEFS['lt']
|
||||||
|
del ENTITYDEFS['gt']
|
||||||
|
del ENTITYDEFS['quot']
|
||||||
|
del ENTITYDEFS['amp']
|
||||||
|
|
||||||
|
|
||||||
def element(parent, *args, **kwargs):
|
def element(parent, *args, **kwargs):
|
||||||
if parent is not None:
|
if parent is not None:
|
||||||
@ -301,7 +307,7 @@ class Manifest(object):
|
|||||||
% (self.id, self.href, self.media_type)
|
% (self.id, self.href, self.media_type)
|
||||||
|
|
||||||
def _force_xhtml(self, data):
|
def _force_xhtml(self, data):
|
||||||
repl = lambda m: entitydefs.get(m.group(1), m.group(0))
|
repl = lambda m: ENTITYDEFS.get(m.group(1), m.group(0))
|
||||||
data = self.ENTITY_RE.sub(repl, data)
|
data = self.ENTITY_RE.sub(repl, data)
|
||||||
data = etree.fromstring(data, parser=XML_PARSER)
|
data = etree.fromstring(data, parser=XML_PARSER)
|
||||||
if namespace(data.tag) != XHTML_NS:
|
if namespace(data.tag) != XHTML_NS:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user