mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
Fix #6136 (epub-fix fails on ePub check)
This commit is contained in:
parent
3bcf57e814
commit
3bf6888b34
@ -5,7 +5,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, posixpath, urllib, sys
|
import os, posixpath, urllib, sys, re
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
@ -160,8 +160,26 @@ class Container(object):
|
|||||||
mt = mimetype.lower()
|
mt = mimetype.lower()
|
||||||
if mt.endswith('+xml'):
|
if mt.endswith('+xml'):
|
||||||
parser = etree.XMLParser(no_network=True, huge_tree=not iswindows)
|
parser = etree.XMLParser(no_network=True, huge_tree=not iswindows)
|
||||||
return etree.fromstring(xml_to_unicode(raw,
|
raw = xml_to_unicode(raw,
|
||||||
strip_encoding_pats=True, assume_utf8=True)[0], parser=parser)
|
strip_encoding_pats=True, assume_utf8=True,
|
||||||
|
resolve_entities=True)[0].strip()
|
||||||
|
idx = raw.find('<html')
|
||||||
|
if idx == -1:
|
||||||
|
idx = raw.find('<HTML')
|
||||||
|
if idx > -1:
|
||||||
|
pre = raw[:idx]
|
||||||
|
raw = raw[idx:]
|
||||||
|
if '<!DOCTYPE' in pre:
|
||||||
|
user_entities = {}
|
||||||
|
for match in re.finditer(r'<!ENTITY\s+(\S+)\s+([^>]+)', pre):
|
||||||
|
val = match.group(2)
|
||||||
|
if val.startswith('"') and val.endswith('"'):
|
||||||
|
val = val[1:-1]
|
||||||
|
user_entities[match.group(1)] = val
|
||||||
|
if user_entities:
|
||||||
|
pat = re.compile(r'&(%s);'%('|'.join(user_entities.keys())))
|
||||||
|
raw = pat.sub(lambda m:user_entities[m.group(1)], raw)
|
||||||
|
return etree.fromstring(raw, parser=parser)
|
||||||
return raw
|
return raw
|
||||||
|
|
||||||
def write(self, path):
|
def write(self, path):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user