From 746381d179184c02ec71722021f7fceb388f7270 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 29 Dec 2019 22:02:16 +0530 Subject: [PATCH] A few more etree.fromstring --- imgsrc/srv/generate.py | 21 +++++++++++++++++---- recipes/ekathemerini.recipe | 2 +- recipes/thecodelesscode.recipe | 2 +- setup/hyphenation.py | 2 +- src/calibre/test_build.py | 2 +- src/css_selectors/select.py | 4 +++- src/css_selectors/tests.py | 4 ++-- 7 files changed, 26 insertions(+), 11 deletions(-) diff --git a/imgsrc/srv/generate.py b/imgsrc/srv/generate.py index c875063efd..5aab1a3772 100644 --- a/imgsrc/srv/generate.py +++ b/imgsrc/srv/generate.py @@ -7,8 +7,9 @@ import os, re, sys from lxml import etree -SVG_NS = 'http://www.w3.org/2000/svg' -XLINK_NS = 'http://www.w3.org/1999/xlink' +SVG_NS = 'http://www.w3.org/2000/svg' +XLINK_NS = 'http://www.w3.org/1999/xlink' + def clone_node(node, parent): ans = parent.makeelement(node.tag) @@ -20,15 +21,26 @@ def clone_node(node, parent): parent.append(ans) return ans + def merge(): base = os.path.dirname(os.path.abspath(__file__)) - ans = etree.fromstring('' % (SVG_NS, XLINK_NS)) + ans = etree.fromstring( + '' % (SVG_NS, XLINK_NS), + parser=etree.XMLParser( + recover=True, no_network=True, resolve_entities=False + ) + ) for f in os.listdir(base): if not f.endswith('.svg'): continue with open(os.path.join(base, f), 'rb') as ff: raw = ff.read() - svg = etree.fromstring(raw) + svg = etree.fromstring( + raw, + parser=etree.XMLParser( + recover=True, no_network=True, resolve_entities=False + ) + ) symbol = ans.makeelement('{%s}symbol' % SVG_NS) symbol.set('viewBox', svg.get('viewBox')) symbol.set('id', 'icon-' + f.rpartition('.')[0]) @@ -39,5 +51,6 @@ def merge(): ans = re.sub(']+>', '', ans, count=1) return ans + if __name__ == '__main__': sys.stdout.write(merge().encode('utf-8')) diff --git a/recipes/ekathemerini.recipe b/recipes/ekathemerini.recipe index 094b1953dc..1770607c12 100644 --- a/recipes/ekathemerini.recipe +++ b/recipes/ekathemerini.recipe @@ -42,7 +42,7 @@ class Ekathimerini(BasicNewsRecipe): def parse_index(self): idx_contents = self.browser.open(self.rss_url).read() - idx = etree.fromstring(idx_contents) + idx = etree.fromstring(idx_contents, parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False)) cats = sorted({self.tag_to_string(subcat) for subcat in idx.xpath('//*[local-name()="subcat"]')}) diff --git a/recipes/thecodelesscode.recipe b/recipes/thecodelesscode.recipe index f47cb349d0..261189bda3 100644 --- a/recipes/thecodelesscode.recipe +++ b/recipes/thecodelesscode.recipe @@ -170,7 +170,7 @@ class CodelessCode(BasicNewsRecipe): for item in oeb.manifest.hrefs['index.html'].data.xpath('//*[local-name()="div"]'): for credit in self.credits[::-1]: - item.insert(0, etree.fromstring(credit)) + item.insert(0, etree.fromstring(credit, parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))) # Change the creator from "calibre" to the actual author # Also, we don't need the date in the ebook's title diff --git a/setup/hyphenation.py b/setup/hyphenation.py index 1d622bf5b7..2f92320731 100644 --- a/setup/hyphenation.py +++ b/setup/hyphenation.py @@ -26,7 +26,7 @@ def locales_from_dicts(dicts): def locales_from_xcu(xcu, dicts): from lxml import etree with open(xcu, 'rb') as f: - root = etree.fromstring(f.read()) + root = etree.fromstring(f.read(), parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False)) ans = {} dicts = {os.path.basename(x) for x in dicts} for value in root.xpath('//*[contains(text(),"DICT_HYPH")]'): diff --git a/src/calibre/test_build.py b/src/calibre/test_build.py index e1bb453c23..c6b1600f20 100644 --- a/src/calibre/test_build.py +++ b/src/calibre/test_build.py @@ -108,7 +108,7 @@ class BuildTest(unittest.TestCase): test_clean_xml_chars() from lxml import etree raw = b'' - root = etree.fromstring(raw) + root = etree.fromstring(raw, parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False)) self.assertEqual(etree.tostring(root), raw) def test_certgen(self): diff --git a/src/css_selectors/select.py b/src/css_selectors/select.py index ce6b2df178..b6e2a1884a 100644 --- a/src/css_selectors/select.py +++ b/src/css_selectors/select.py @@ -687,6 +687,8 @@ default_dispatch_map = {name.partition('_')[2]:obj for name, obj in globals().it if __name__ == '__main__': from pprint import pprint - root = etree.fromstring('

') + root = etree.fromstring( + '

', + parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False)) select = Select(root, ignore_inappropriate_pseudo_classes=True, trace=True) pprint(list(select('p:disabled'))) diff --git a/src/css_selectors/tests.py b/src/css_selectors/tests.py index d514204488..0176f1db93 100644 --- a/src/css_selectors/tests.py +++ b/src/css_selectors/tests.py @@ -646,7 +646,7 @@ by William Shakespeare # }}} def test_select(self): # {{{ - document = etree.fromstring(self.HTML_IDS) + document = etree.fromstring(self.HTML_IDS, parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False)) select = Select(document) def select_ids(selector): @@ -823,7 +823,7 @@ def run_tests(find_tests=find_tests, for_build=False): except StopIteration: pass if ans is None: - print ('No test named %s found' % args.name) + print('No test named %s found' % args.name) raise SystemExit(1) tests = ans else: