mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Work around for error when parsing malformed documents containing annotation-xml tags
This commit is contained in:
parent
fe84fd3519
commit
a82ff8b749
@ -155,10 +155,17 @@ class HTMLParser(object):
|
|||||||
def isHTMLIntegrationPoint(self, element):
|
def isHTMLIntegrationPoint(self, element):
|
||||||
if (element.name == "annotation-xml" and
|
if (element.name == "annotation-xml" and
|
||||||
element.namespace == namespaces["mathml"]):
|
element.namespace == namespaces["mathml"]):
|
||||||
return ("encoding" in element.attributes and
|
try:
|
||||||
element.attributes["encoding"].translate(
|
return ("encoding" in element.attributes and
|
||||||
asciiUpper2Lower) in
|
element.attributes["encoding"].translate(
|
||||||
("text/html", "application/xhtml+xml"))
|
asciiUpper2Lower) in
|
||||||
|
("text/html", "application/xhtml+xml"))
|
||||||
|
except TypeError:
|
||||||
|
# This happens for some documents, for some reason
|
||||||
|
# lxml refuses to store a unicode representation of the
|
||||||
|
# encoding attribute.
|
||||||
|
return element.attributes["encoding"].lower().decode('utf-8', 'replace') in (
|
||||||
|
"text/html", "application/xhtml+xml")
|
||||||
else:
|
else:
|
||||||
return (element.namespace, element.name) in htmlIntegrationPointElements
|
return (element.namespace, element.name) in htmlIntegrationPointElements
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user