mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Accept transport_encoding in html5lib.parse
This commit is contained in:
parent
4da5526228
commit
0f7b73d7c4
@ -19,6 +19,7 @@ from .constants import (
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
unicode
|
unicode
|
||||||
|
|
||||||
def with_metaclass(meta, *bases):
|
def with_metaclass(meta, *bases):
|
||||||
"""Create a base class with a metaclass."""
|
"""Create a base class with a metaclass."""
|
||||||
return meta(b"NewBase", bases, {})
|
return meta(b"NewBase", bases, {})
|
||||||
@ -29,11 +30,11 @@ except NameError:
|
|||||||
|
|
||||||
|
|
||||||
def parse(doc, treebuilder="etree", encoding=None,
|
def parse(doc, treebuilder="etree", encoding=None,
|
||||||
namespaceHTMLElements=True):
|
namespaceHTMLElements=True, transport_encoding=None):
|
||||||
"""Parse a string or file-like object into a tree"""
|
"""Parse a string or file-like object into a tree"""
|
||||||
tb = treebuilders.getTreeBuilder(treebuilder)
|
tb = treebuilders.getTreeBuilder(treebuilder)
|
||||||
p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
|
p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
|
||||||
return p.parse(doc, encoding=encoding)
|
return p.parse(doc, encoding=transport_encoding or encoding)
|
||||||
|
|
||||||
|
|
||||||
def parseFragment(doc, container="div", treebuilder="etree", encoding=None,
|
def parseFragment(doc, container="div", treebuilder="etree", encoding=None,
|
||||||
@ -368,6 +369,7 @@ class HTMLParser(object):
|
|||||||
ans['position'] = (self.tokenizer.stream.position(), True)
|
ans['position'] = (self.tokenizer.stream.position(), True)
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
|
|
||||||
def getPhases(debug):
|
def getPhases(debug):
|
||||||
def log(function):
|
def log(function):
|
||||||
"""Logger that records which phase processes each token"""
|
"""Logger that records which phase processes each token"""
|
||||||
@ -2668,11 +2670,13 @@ def getPhases(debug):
|
|||||||
# XXX after after frameset
|
# XXX after after frameset
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def adjust_attributes(token, replacements):
|
def adjust_attributes(token, replacements):
|
||||||
if token['data'].viewkeys() & replacements.viewkeys():
|
if token['data'].viewkeys() & replacements.viewkeys():
|
||||||
token['data'] = OrderedDict(
|
token['data'] = OrderedDict(
|
||||||
(replacements.get(k, k), v) for k, v in token['data'].iteritems())
|
(replacements.get(k, k), v) for k, v in token['data'].iteritems())
|
||||||
|
|
||||||
|
|
||||||
class ParseError(Exception):
|
class ParseError(Exception):
|
||||||
|
|
||||||
"""Error in parsed document"""
|
"""Error in parsed document"""
|
||||||
|
Loading…
x
Reference in New Issue
Block a user