mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Better workaround for XXE in lxml
Instead of disabling resolution of all entities, only disable resolution of entities from files
This commit is contained in:
parent
f686647286
commit
f0e6ead4c7
@ -135,6 +135,8 @@ def find_tests(which_tests=None):
|
|||||||
a(find_tests())
|
a(find_tests())
|
||||||
from calibre.utils.hyphenation.test_hyphenation import find_tests
|
from calibre.utils.hyphenation.test_hyphenation import find_tests
|
||||||
a(find_tests())
|
a(find_tests())
|
||||||
|
from calibre.utils.xml_parse import find_tests
|
||||||
|
a(find_tests())
|
||||||
if iswindows:
|
if iswindows:
|
||||||
from calibre.utils.windows.wintest import find_tests
|
from calibre.utils.windows.wintest import find_tests
|
||||||
a(find_tests())
|
a(find_tests())
|
||||||
|
@ -5,27 +5,61 @@
|
|||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
import threading
|
|
||||||
|
|
||||||
# resolve_entities is turned off as entities can cause
|
# resolving of SYSTEM entitties is turned off as entities can cause
|
||||||
# reads of local files, for example:
|
# reads of local files, for example:
|
||||||
# <!DOCTYPE foo [ <!ENTITY passwd SYSTEM "file:///etc/passwd" >]>
|
# <!DOCTYPE foo [ <!ENTITY passwd SYSTEM "file:///etc/passwd" >]>
|
||||||
|
|
||||||
_global_tls = threading.local()
|
|
||||||
fs = etree.fromstring
|
fs = etree.fromstring
|
||||||
|
|
||||||
|
|
||||||
def parser(recover):
|
class Resolver(etree.Resolver):
|
||||||
parsers = getattr(_global_tls, 'parsers', None)
|
|
||||||
if parsers is None:
|
def resolve(self, url, id, context):
|
||||||
_global_tls.parsers = parsers = {
|
return self.resolve_string('', context)
|
||||||
True:
|
|
||||||
etree.XMLParser(recover=True, no_network=True, resolve_entities=False),
|
|
||||||
False:
|
def create_parser(recover):
|
||||||
etree.XMLParser(recover=False, no_network=True, resolve_entities=False)
|
parser = etree.XMLParser(recover=recover, no_network=True)
|
||||||
}
|
parser.resolvers.add(Resolver())
|
||||||
return parsers[recover]
|
return parser
|
||||||
|
|
||||||
|
|
||||||
def safe_xml_fromstring(string_or_bytes, recover=True):
|
def safe_xml_fromstring(string_or_bytes, recover=True):
|
||||||
return fs(string_or_bytes, parser(recover))
|
return fs(string_or_bytes, parser=create_parser(recover))
|
||||||
|
|
||||||
|
|
||||||
|
def find_tests():
|
||||||
|
import unittest, tempfile, os
|
||||||
|
|
||||||
|
class TestXMLParse(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False) as tf:
|
||||||
|
tf.write(b'external')
|
||||||
|
self.temp_file = tf.name
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
os.remove(self.temp_file)
|
||||||
|
|
||||||
|
def test_safe_xml_fromstring(self):
|
||||||
|
templ = '''<!DOCTYPE foo [ <!ENTITY e {id} "{val}" > ]><r>&e;</r>'''
|
||||||
|
external = 'file:///' + self.temp_file
|
||||||
|
self.assertEqual(etree.fromstring(templ.format(id='SYSTEM', val=external)).text, 'external')
|
||||||
|
for eid, val, expected in (
|
||||||
|
('', 'normal entity', 'normal entity'),
|
||||||
|
('SYSTEM', external, None),
|
||||||
|
('SYSTEM', 'http://example.com', None),
|
||||||
|
('', external, external),
|
||||||
|
('PUBLIC', external, None),
|
||||||
|
('PUBLIC', 'http://example.com', None),
|
||||||
|
):
|
||||||
|
got = safe_xml_fromstring(templ.format(id=eid, val=val)).text
|
||||||
|
self.assertEqual(got, expected)
|
||||||
|
|
||||||
|
return unittest.defaultTestLoader.loadTestsFromTestCase(TestXMLParse)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
from calibre.utils.run_tests import run_tests
|
||||||
|
run_tests(find_tests)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user