mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Dont use a recovering XML parser in a few places where we are expected to fail on invalid XML
This commit is contained in:
parent
3eb28b395e
commit
fbfebda03f
@ -282,7 +282,7 @@ class RTFInput(InputFormatPlugin):
|
||||
|
||||
self.log('Converting XML to HTML...')
|
||||
inline_class = InlineClass(self.log)
|
||||
styledoc = safe_xml_fromstring(P('templates/rtf.xsl', data=True))
|
||||
styledoc = safe_xml_fromstring(P('templates/rtf.xsl', data=True), recover=False)
|
||||
extensions = {('calibre', 'inline-class') : inline_class}
|
||||
transform = etree.XSLT(styledoc, extensions=extensions)
|
||||
result = transform(doc)
|
||||
|
@ -208,14 +208,14 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
|
||||
|
||||
# Try with more & more drastic measures to parse
|
||||
try:
|
||||
data = safe_xml_fromstring(data)
|
||||
data = safe_xml_fromstring(data, recover=False)
|
||||
check_for_html5(pre, data)
|
||||
except (HTML5Doc, etree.XMLSyntaxError):
|
||||
log.debug('Initial parse failed, using more'
|
||||
' forgiving parsers')
|
||||
raw = data = xml_replace_entities(raw)
|
||||
try:
|
||||
data = safe_xml_fromstring(data)
|
||||
data = safe_xml_fromstring(data, recover=False)
|
||||
check_for_html5(pre, data)
|
||||
except (HTML5Doc, etree.XMLSyntaxError):
|
||||
log.debug('Parsing %s as HTML' % filename)
|
||||
@ -269,12 +269,12 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
|
||||
data = etree.tostring(data, encoding='unicode')
|
||||
|
||||
try:
|
||||
data = safe_xml_fromstring(data)
|
||||
data = safe_xml_fromstring(data, recover=False)
|
||||
except:
|
||||
data = data.replace(':=', '=').replace(':>', '>')
|
||||
data = data.replace('<http:/>', '')
|
||||
try:
|
||||
data = safe_xml_fromstring(data)
|
||||
data = safe_xml_fromstring(data, recover=False)
|
||||
except etree.XMLSyntaxError:
|
||||
log.warn('Stripping comments from %s'%
|
||||
filename)
|
||||
|
@ -78,7 +78,7 @@ def parse(raw, decoder=None, log=None, line_numbers=True, linenumber_attribute=N
|
||||
if force_html5_parse:
|
||||
return parse_html5(raw, log=log, line_numbers=line_numbers, linenumber_attribute=linenumber_attribute, replace_entities=False, fix_newlines=False)
|
||||
try:
|
||||
ans = safe_xml_fromstring(raw)
|
||||
ans = safe_xml_fromstring(raw, recover=False)
|
||||
if ans.tag != '{%s}html' % XHTML_NS:
|
||||
raise ValueError('Root tag is not <html> in the XHTML namespace')
|
||||
if linenumber_attribute:
|
||||
|
@ -124,7 +124,7 @@ def html_to_lxml(raw):
|
||||
root.set('xmlns', "http://www.w3.org/1999/xhtml")
|
||||
raw = etree.tostring(root, encoding=None)
|
||||
try:
|
||||
return safe_xml_fromstring(raw)
|
||||
return safe_xml_fromstring(raw, recover=False)
|
||||
except:
|
||||
for x in root.iterdescendants():
|
||||
remove = []
|
||||
@ -135,7 +135,7 @@ def html_to_lxml(raw):
|
||||
del x.attrib[a]
|
||||
raw = etree.tostring(root, encoding=None)
|
||||
try:
|
||||
return safe_xml_fromstring(raw)
|
||||
return safe_xml_fromstring(raw, recover=False)
|
||||
except:
|
||||
from calibre.ebooks.oeb.parse_utils import _html4_parse
|
||||
return _html4_parse(raw)
|
||||
|
@ -125,7 +125,7 @@ def get_custom_recipe_collection(*args):
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
continue
|
||||
return safe_xml_fromstring(serialize_collection(rmap))
|
||||
return safe_xml_fromstring(serialize_collection(rmap), recover=False)
|
||||
|
||||
|
||||
def update_custom_recipe(id_, title, script):
|
||||
@ -288,7 +288,7 @@ class SchedulerConfig(object):
|
||||
if os.access(self.conf_path, os.R_OK):
|
||||
with ExclusiveFile(self.conf_path) as f:
|
||||
try:
|
||||
self.root = safe_xml_fromstring(f.read())
|
||||
self.root = safe_xml_fromstring(f.read(), recover=False)
|
||||
except:
|
||||
print('Failed to read recipe scheduler config')
|
||||
import traceback
|
||||
|
Loading…
x
Reference in New Issue
Block a user