mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Handle invalid bytes when in index_to_soup() for JavascriptRecipe
This commit is contained in:
parent
e58cd115e1
commit
7b284b949f
@ -16,6 +16,7 @@ from calibre.web.feeds import feeds_from_index
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre.web.fetch.javascript import fetch_page, AbortFetch, links_from_selectors
|
from calibre.web.fetch.javascript import fetch_page, AbortFetch, links_from_selectors
|
||||||
from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
|
from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
|
||||||
|
from calibre.utils.cleantext import clean_xml_chars
|
||||||
|
|
||||||
def image_data_to_url(data, base='cover'):
|
def image_data_to_url(data, base='cover'):
|
||||||
from calibre.utils.imghdr import what
|
from calibre.utils.imghdr import what
|
||||||
@ -221,7 +222,7 @@ class JavascriptRecipe(BasicNewsRecipe):
|
|||||||
if raw:
|
if raw:
|
||||||
return html
|
return html
|
||||||
import html5lib
|
import html5lib
|
||||||
root = html5lib.parse(html, treebuilder='lxml', namespaceHTMLElements=False).getroot()
|
root = html5lib.parse(clean_xml_chars(html), treebuilder='lxml', namespaceHTMLElements=False).getroot()
|
||||||
return root
|
return root
|
||||||
|
|
||||||
# ***************************** Internal API *****************************
|
# ***************************** Internal API *****************************
|
||||||
|
Loading…
x
Reference in New Issue
Block a user