mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Handle invalid bytes when in index_to_soup() for JavascriptRecipe
This commit is contained in:
parent
e58cd115e1
commit
7b284b949f
@ -16,6 +16,7 @@ from calibre.web.feeds import feeds_from_index
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.web.fetch.javascript import fetch_page, AbortFetch, links_from_selectors
|
||||
from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
|
||||
from calibre.utils.cleantext import clean_xml_chars
|
||||
|
||||
def image_data_to_url(data, base='cover'):
|
||||
from calibre.utils.imghdr import what
|
||||
@ -221,7 +222,7 @@ class JavascriptRecipe(BasicNewsRecipe):
|
||||
if raw:
|
||||
return html
|
||||
import html5lib
|
||||
root = html5lib.parse(html, treebuilder='lxml', namespaceHTMLElements=False).getroot()
|
||||
root = html5lib.parse(clean_xml_chars(html), treebuilder='lxml', namespaceHTMLElements=False).getroot()
|
||||
return root
|
||||
|
||||
# ***************************** Internal API *****************************
|
||||
|
Loading…
x
Reference in New Issue
Block a user