diff --git a/src/html5lib/html5parser.py b/src/html5lib/html5parser.py index 9503152952..602f5ff66b 100644 --- a/src/html5lib/html5parser.py +++ b/src/html5lib/html5parser.py @@ -19,6 +19,7 @@ from .constants import ( try: unicode + def with_metaclass(meta, *bases): """Create a base class with a metaclass.""" return meta(b"NewBase", bases, {}) @@ -29,11 +30,11 @@ except NameError: def parse(doc, treebuilder="etree", encoding=None, - namespaceHTMLElements=True): + namespaceHTMLElements=True, transport_encoding=None): """Parse a string or file-like object into a tree""" tb = treebuilders.getTreeBuilder(treebuilder) p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) - return p.parse(doc, encoding=encoding) + return p.parse(doc, encoding=transport_encoding or encoding) def parseFragment(doc, container="div", treebuilder="etree", encoding=None, @@ -368,6 +369,7 @@ class HTMLParser(object): ans['position'] = (self.tokenizer.stream.position(), True) return ans + def getPhases(debug): def log(function): """Logger that records which phase processes each token""" @@ -2668,11 +2670,13 @@ def getPhases(debug): # XXX after after frameset } + def adjust_attributes(token, replacements): if token['data'].viewkeys() & replacements.viewkeys(): token['data'] = OrderedDict( (replacements.get(k, k), v) for k, v in token['data'].iteritems()) + class ParseError(Exception): """Error in parsed document"""