From dbb4092b35ca2d0eb44b1768b6beac8233b0c9ee Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 4 Nov 2015 20:24:57 +0530 Subject: [PATCH] Fix a couple of things I forgot to merge from upstream html5lib --- src/html5lib/sanitizer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/html5lib/sanitizer.py b/src/html5lib/sanitizer.py index 5a05eb1213..ecccdc7dcd 100644 --- a/src/html5lib/sanitizer.py +++ b/src/html5lib/sanitizer.py @@ -212,7 +212,7 @@ class HTMLSanitizerMixin(object): # remove replacement characters from unescaped characters val_unescaped = val_unescaped.replace("\ufffd", "") try: - uri = urlparse.urlparse(val_unescaped) + uri = urlparse(val_unescaped) except ValueError: uri = None del attrs[attr] @@ -291,11 +291,11 @@ class HTMLSanitizerMixin(object): class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin): def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True, - lowercaseElementName=False, lowercaseAttrName=False, parser=None): + lowercaseElementName=False, lowercaseAttrName=False, parser=None, track_positions=False): # Change case matching defaults as we only output lowercase html anyway # This solution doesn't seem ideal... HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet, - lowercaseElementName, lowercaseAttrName, parser=parser) + lowercaseElementName, lowercaseAttrName, parser=parser, track_positions=track_positions) def __iter__(self): for token in HTMLTokenizer.__iter__(self):