Remove html5lib from HTML sanitizer, since the html5lib sanitizer no longer exists

This commit is contained in:
Kovid Goyal 2017-07-08 18:34:09 +05:30
parent 5e67ba1369
commit 46de368b4b
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 4 additions and 21 deletions

View File

@ -145,29 +145,12 @@ def merge_comments(one, two):
return comments_to_html(one) + '\n\n' + comments_to_html(two) return comments_to_html(one) + '\n\n' + comments_to_html(two)
def sanitize_html(html):
if not html:
return u''
if isinstance(html, bytes):
html = html.decode('utf-8', 'replace')
import html5lib
from html5lib.sanitizer import HTMLSanitizer
from html5lib.serializer.htmlserializer import HTMLSerializer
from html5lib.treebuilders.etree_lxml import TreeBuilder
from html5lib.treewalkers.lxmletree import TreeWalker
parser = html5lib.HTMLParser(tokenizer=HTMLSanitizer, tree=TreeBuilder)
tree = parser.parseFragment(html)
serializer = HTMLSerializer(quote_attr_values=True, alphabetical_attributes=False, omit_optional_tags=False)
stream = TreeWalker(tree)
return serializer.render(stream)
def sanitize_comments_html(html): def sanitize_comments_html(html):
from calibre.ebooks.markdown import Markdown from calibre.ebooks.markdown import Markdown
text = html2text(html) text = html2text(html)
md = Markdown() md = Markdown()
html = md.convert(text) html = md.convert(text)
return sanitize_html(html) return html
def test(): def test():
@ -185,6 +168,6 @@ def test():
print 'FAILED' print 'FAILED'
break break
if __name__ == '__main__': if __name__ == '__main__':
test() test()

View File

@ -244,8 +244,8 @@ class BuildTest(unittest.TestCase):
def test_markdown(self): def test_markdown(self):
from calibre.ebooks.markdown import Markdown from calibre.ebooks.markdown import Markdown
Markdown(extensions=['extra']) Markdown(extensions=['extra'])
from calibre.library.comments import sanitize_html from calibre.library.comments import sanitize_comments_html
sanitize_html(b'''<script>moo</script>xxx<img src="http://moo.com/x.jpg">''') sanitize_comments_html(b'''<script>moo</script>xxx<img src="http://moo.com/x.jpg">''')
def test_openssl(self): def test_openssl(self):
import ssl import ssl