From 7b284b949ffc9e4adb48b4232017757675ff086a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 9 Jun 2014 15:03:49 +0530 Subject: [PATCH] Handle invalid bytes when in index_to_soup() for JavascriptRecipe --- src/calibre/web/feeds/jsnews.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/calibre/web/feeds/jsnews.py b/src/calibre/web/feeds/jsnews.py index bb8d891d06..4993598fa8 100644 --- a/src/calibre/web/feeds/jsnews.py +++ b/src/calibre/web/feeds/jsnews.py @@ -16,6 +16,7 @@ from calibre.web.feeds import feeds_from_index from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.fetch.javascript import fetch_page, AbortFetch, links_from_selectors from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations +from calibre.utils.cleantext import clean_xml_chars def image_data_to_url(data, base='cover'): from calibre.utils.imghdr import what @@ -221,7 +222,7 @@ class JavascriptRecipe(BasicNewsRecipe): if raw: return html import html5lib - root = html5lib.parse(html, treebuilder='lxml', namespaceHTMLElements=False).getroot() + root = html5lib.parse(clean_xml_chars(html), treebuilder='lxml', namespaceHTMLElements=False).getroot() return root # ***************************** Internal API *****************************