mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Work around for error when parsing malformed documents containing annotation-xml tags
This commit is contained in:
parent
fe84fd3519
commit
a82ff8b749
@ -155,10 +155,17 @@ class HTMLParser(object):
|
||||
def isHTMLIntegrationPoint(self, element):
|
||||
if (element.name == "annotation-xml" and
|
||||
element.namespace == namespaces["mathml"]):
|
||||
try:
|
||||
return ("encoding" in element.attributes and
|
||||
element.attributes["encoding"].translate(
|
||||
asciiUpper2Lower) in
|
||||
("text/html", "application/xhtml+xml"))
|
||||
except TypeError:
|
||||
# This happens for some documents, for some reason
|
||||
# lxml refuses to store a unicode representation of the
|
||||
# encoding attribute.
|
||||
return element.attributes["encoding"].lower().decode('utf-8', 'replace') in (
|
||||
"text/html", "application/xhtml+xml")
|
||||
else:
|
||||
return (element.namespace, element.name) in htmlIntegrationPointElements
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user