mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Fix a couple of things I forgot to merge from upstream html5lib
This commit is contained in:
parent
424a430d15
commit
dbb4092b35
@ -212,7 +212,7 @@ class HTMLSanitizerMixin(object):
|
|||||||
# remove replacement characters from unescaped characters
|
# remove replacement characters from unescaped characters
|
||||||
val_unescaped = val_unescaped.replace("\ufffd", "")
|
val_unescaped = val_unescaped.replace("\ufffd", "")
|
||||||
try:
|
try:
|
||||||
uri = urlparse.urlparse(val_unescaped)
|
uri = urlparse(val_unescaped)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
uri = None
|
uri = None
|
||||||
del attrs[attr]
|
del attrs[attr]
|
||||||
@ -291,11 +291,11 @@ class HTMLSanitizerMixin(object):
|
|||||||
|
|
||||||
class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
|
class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
|
||||||
def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
|
def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
|
||||||
lowercaseElementName=False, lowercaseAttrName=False, parser=None):
|
lowercaseElementName=False, lowercaseAttrName=False, parser=None, track_positions=False):
|
||||||
# Change case matching defaults as we only output lowercase html anyway
|
# Change case matching defaults as we only output lowercase html anyway
|
||||||
# This solution doesn't seem ideal...
|
# This solution doesn't seem ideal...
|
||||||
HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
|
HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
|
||||||
lowercaseElementName, lowercaseAttrName, parser=parser)
|
lowercaseElementName, lowercaseAttrName, parser=parser, track_positions=track_positions)
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
for token in HTMLTokenizer.__iter__(self):
|
for token in HTMLTokenizer.__iter__(self):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user